2013-10-16 22:57:40 +08:00
# encoding: utf-8
2014-01-28 07:56:09 +08:00
from __future__ import unicode_literals
2013-10-16 22:57:40 +08:00
import re
2014-01-28 07:56:09 +08:00
import itertools
2013-10-16 22:57:40 +08:00
from . common import InfoExtractor
2014-12-13 19:24:42 +08:00
from . . compat import (
2013-10-16 22:57:40 +08:00
compat_str ,
2014-12-13 19:24:42 +08:00
)
from . . utils import (
2013-10-16 22:57:40 +08:00
ExtractorError ,
2014-12-13 19:24:42 +08:00
unified_strdate ,
2013-10-16 22:57:40 +08:00
)
class RutubeIE ( InfoExtractor ) :
2014-01-28 07:56:09 +08:00
IE_NAME = ' rutube '
2014-01-28 10:32:07 +08:00
IE_DESC = ' Rutube videos '
2014-01-28 09:47:17 +08:00
_VALID_URL = r ' https?://rutube \ .ru/video/(?P<id>[ \ da-z] {32} ) '
2013-10-16 22:57:40 +08:00
_TEST = {
2014-01-28 07:56:09 +08:00
' url ' : ' http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/ ' ,
' info_dict ' : {
2014-03-30 16:35:07 +08:00
' id ' : ' 3eac3b4561676c17df9132a9a1e62e3e ' ,
' ext ' : ' mp4 ' ,
2014-01-28 07:56:09 +08:00
' title ' : ' Раненный кенгуру забежал в аптеку ' ,
2014-01-28 10:31:14 +08:00
' description ' : ' http://www.ntdtv.ru ' ,
' duration ' : 80 ,
2014-01-28 07:56:09 +08:00
' uploader ' : ' NTDRussian ' ,
' uploader_id ' : ' 29790 ' ,
2014-01-28 10:31:14 +08:00
' upload_date ' : ' 20131016 ' ,
2013-10-16 22:57:40 +08:00
} ,
2014-01-28 07:56:09 +08:00
' params ' : {
2013-10-16 22:57:40 +08:00
# It requires ffmpeg (m3u8 download)
2014-01-28 07:56:09 +08:00
' skip_download ' : True ,
2013-10-16 22:57:40 +08:00
} ,
}
def _real_extract ( self , url ) :
2014-12-13 19:24:42 +08:00
video_id = self . _match_id ( url )
2014-03-30 17:26:35 +08:00
video = self . _download_json (
2014-03-30 16:35:07 +08:00
' http://rutube.ru/api/video/ %s /?format=json ' % video_id ,
video_id , ' Downloading video JSON ' )
2014-03-30 17:26:35 +08:00
2013-10-16 22:57:40 +08:00
# Some videos don't have the author field
2014-04-19 16:59:12 +08:00
author = video . get ( ' author ' ) or { }
options = self . _download_json (
2014-04-21 20:04:11 +08:00
' http://rutube.ru/api/play/options/ %s /?format=json ' % video_id ,
2014-04-19 16:59:12 +08:00
video_id , ' Downloading options JSON ' )
m3u8_url = options [ ' video_balancer ' ] . get ( ' m3u8 ' )
2013-10-16 22:57:40 +08:00
if m3u8_url is None :
2014-01-28 07:56:09 +08:00
raise ExtractorError ( ' Couldn \' t find m3u8 manifest url ' )
2014-12-02 01:20:36 +08:00
formats = self . _extract_m3u8_formats ( m3u8_url , video_id , ext = ' mp4 ' )
2013-10-16 22:57:40 +08:00
return {
2014-01-28 09:19:45 +08:00
' id ' : video [ ' id ' ] ,
' title ' : video [ ' title ' ] ,
' description ' : video [ ' description ' ] ,
' duration ' : video [ ' duration ' ] ,
' view_count ' : video [ ' hits ' ] ,
2014-12-02 01:20:36 +08:00
' formats ' : formats ,
2014-01-28 09:19:45 +08:00
' thumbnail ' : video [ ' thumbnail_url ' ] ,
2013-10-16 22:57:40 +08:00
' uploader ' : author . get ( ' name ' ) ,
' uploader_id ' : compat_str ( author [ ' id ' ] ) if author else None ,
2014-01-28 09:19:45 +08:00
' upload_date ' : unified_strdate ( video [ ' created_ts ' ] ) ,
' age_limit ' : 18 if video [ ' is_adult ' ] else 0 ,
2013-10-16 22:57:40 +08:00
}
2014-01-28 07:56:09 +08:00
2015-01-04 10:15:27 +08:00
class RutubeEmbedIE ( InfoExtractor ) :
IE_NAME = ' rutube:embed '
IE_DESC = ' Rutube embedded videos '
_VALID_URL = ' https?://rutube \ .ru/video/embed/(?P<id>[0-9]+) '
_TEST = {
' url ' : ' http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38= ' ,
' info_dict ' : {
' id ' : ' a10e53b86e8f349080f718582ce4c661 ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20131223 ' ,
' uploader_id ' : ' 297833 ' ,
' description ' : ' Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89 ' ,
' uploader ' : ' subziro89 ILya ' ,
' title ' : ' Мистический городок Эйри в Индиан 5 серия озвучка subziro89 ' ,
} ,
' params ' : {
' skip_download ' : ' Requires ffmpeg ' ,
} ,
}
def _real_extract ( self , url ) :
embed_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , embed_id )
canonical_url = self . _html_search_regex (
r ' <link \ s+rel= " canonical " \ s+href= " ([^ " ]+?) " ' , webpage ,
' Canonical URL ' )
return self . url_result ( canonical_url , ' Rutube ' )
2014-01-28 07:56:09 +08:00
class RutubeChannelIE ( InfoExtractor ) :
IE_NAME = ' rutube:channel '
2014-01-28 10:32:07 +08:00
IE_DESC = ' Rutube channels '
2014-01-28 07:56:09 +08:00
_VALID_URL = r ' http://rutube \ .ru/tags/video/(?P<id> \ d+) '
2014-08-28 06:58:24 +08:00
_TESTS = [ {
' url ' : ' http://rutube.ru/tags/video/1800/ ' ,
' info_dict ' : {
' id ' : ' 1800 ' ,
} ,
' playlist_mincount ' : 68 ,
} ]
2014-01-28 07:56:09 +08:00
_PAGE_TEMPLATE = ' http://rutube.ru/api/tags/video/ %s /?page= %s &format=json '
def _extract_videos ( self , channel_id , channel_title = None ) :
entries = [ ]
for pagenum in itertools . count ( 1 ) :
2014-03-30 17:26:35 +08:00
page = self . _download_json (
2014-01-28 10:32:07 +08:00
self . _PAGE_TEMPLATE % ( channel_id , pagenum ) ,
channel_id , ' Downloading page %s ' % pagenum )
2014-01-28 07:56:09 +08:00
results = page [ ' results ' ]
2014-01-28 10:32:07 +08:00
if not results :
break
2014-01-28 09:19:45 +08:00
entries . extend ( self . url_result ( result [ ' video_url ' ] , ' Rutube ' ) for result in results )
2014-01-28 10:32:07 +08:00
if not page [ ' has_next ' ] :
break
2014-01-28 07:56:09 +08:00
return self . playlist_result ( entries , channel_id , channel_title )
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
channel_id = mobj . group ( ' id ' )
return self . _extract_videos ( channel_id )
class RutubeMovieIE ( RutubeChannelIE ) :
IE_NAME = ' rutube:movie '
2014-01-28 10:32:07 +08:00
IE_DESC = ' Rutube movies '
2014-01-28 07:56:09 +08:00
_VALID_URL = r ' http://rutube \ .ru/metainfo/tv/(?P<id> \ d+) '
2014-08-28 06:58:24 +08:00
_TESTS = [ ]
2014-01-28 07:56:09 +08:00
_MOVIE_TEMPLATE = ' http://rutube.ru/api/metainfo/tv/ %s /?format=json '
_PAGE_TEMPLATE = ' http://rutube.ru/api/metainfo/tv/ %s /video?page= %s &format=json '
def _real_extract ( self , url ) :
2014-12-13 19:24:42 +08:00
movie_id = self . _match_id ( url )
2014-03-30 17:26:35 +08:00
movie = self . _download_json (
2014-01-28 10:32:07 +08:00
self . _MOVIE_TEMPLATE % movie_id , movie_id ,
' Downloading movie JSON ' )
2014-01-28 07:56:09 +08:00
movie_name = movie [ ' name ' ]
2014-01-28 09:47:17 +08:00
return self . _extract_videos ( movie_id , movie_name )
class RutubePersonIE ( RutubeChannelIE ) :
IE_NAME = ' rutube:person '
IE_DESC = ' Rutube person videos '
_VALID_URL = r ' http://rutube \ .ru/video/person/(?P<id> \ d+) '
2014-08-28 06:58:24 +08:00
_TESTS = [ {
' url ' : ' http://rutube.ru/video/person/313878/ ' ,
' info_dict ' : {
' id ' : ' 313878 ' ,
} ,
' playlist_mincount ' : 37 ,
} ]
2014-01-28 09:47:17 +08:00
2014-01-28 10:32:07 +08:00
_PAGE_TEMPLATE = ' http://rutube.ru/api/video/person/ %s /?page= %s &format=json '