2014-03-21 07:53:18 +08:00
from __future__ import unicode_literals
2013-11-24 14:30:05 +08:00
import re
2015-05-02 00:08:38 +08:00
from . . compat import (
compat_urlparse ,
compat_urllib_request ,
)
2013-11-24 14:30:05 +08:00
from . . utils import (
2013-11-24 22:28:33 +08:00
ExtractorError ,
2013-11-25 13:06:18 +08:00
unescapeHTML ,
2013-11-24 14:30:05 +08:00
unified_strdate ,
2014-03-21 07:59:51 +08:00
US_RATINGS ,
2015-05-02 00:32:46 +08:00
clean_html ,
2013-11-24 14:30:05 +08:00
)
2015-02-19 03:37:16 +08:00
from . common import InfoExtractor
2013-11-24 14:30:05 +08:00
2015-02-19 03:37:16 +08:00
class VikiIE ( InfoExtractor ) :
2014-03-21 07:53:18 +08:00
IE_NAME = ' viki '
2013-11-24 14:30:05 +08:00
2015-05-02 00:08:38 +08:00
# iPad2
_USER_AGENT = ' Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5 '
2013-11-24 14:30:05 +08:00
_VALID_URL = r ' ^https?://(?:www \ .)?viki \ .com/videos/(?P<id>[0-9]+v) '
2015-05-02 00:08:38 +08:00
_TESTS = [ {
2014-03-21 07:53:18 +08:00
' url ' : ' http://www.viki.com/videos/1023585v-heirs-episode-14 ' ,
' info_dict ' : {
' id ' : ' 1023585v ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Heirs Episode 14 ' ,
' uploader ' : ' SBS ' ,
' description ' : ' md5:c4b17b9626dd4b143dcc4d855ba3474e ' ,
' upload_date ' : ' 20131121 ' ,
' age_limit ' : 13 ,
2013-11-24 22:28:33 +08:00
} ,
2014-03-21 07:53:18 +08:00
' skip ' : ' Blocked in the US ' ,
2015-05-02 00:08:38 +08:00
} , {
' url ' : ' http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference ' ,
' md5 ' : ' ca6493e6f0a6ec07da9aa8d6304b4b2c ' ,
' info_dict ' : {
' id ' : ' 1067139v ' ,
' ext ' : ' mp4 ' ,
' description ' : ' md5:d70b2f9428f5488321bfe1db10d612ea ' ,
' upload_date ' : ' 20150430 ' ,
' title ' : ' \' The Avengers: Age of Ultron \' Press Conference ' ,
}
} ]
2013-11-24 14:30:05 +08:00
def _real_extract ( self , url ) :
2015-01-07 14:21:24 +08:00
video_id = self . _match_id ( url )
2013-11-24 14:30:05 +08:00
webpage = self . _download_webpage ( url , video_id )
title = self . _og_search_title ( webpage )
description = self . _og_search_description ( webpage )
thumbnail = self . _og_search_thumbnail ( webpage )
2013-11-25 09:02:34 +08:00
uploader_m = re . search (
r ' <strong>Broadcast Network: </strong> \ s*([^<]*)< ' , webpage )
if uploader_m is None :
uploader = None
else :
2013-11-25 12:57:55 +08:00
uploader = uploader_m . group ( 1 ) . strip ( )
2013-11-24 14:30:05 +08:00
rating_str = self . _html_search_regex (
r ' <strong>Rating: </strong> \ s*([^<]*)< ' , webpage ,
2014-03-21 07:53:18 +08:00
' rating information ' , default = ' ' ) . strip ( )
2014-03-21 07:59:51 +08:00
age_limit = US_RATINGS . get ( rating_str )
2013-11-24 14:30:05 +08:00
2015-05-02 00:08:38 +08:00
req = compat_urllib_request . Request (
' http://www.viki.com/player5_fragment/ %s ?action=show&controller=videos ' % video_id )
req . add_header ( ' User-Agent ' , self . _USER_AGENT )
2013-11-24 22:20:16 +08:00
info_webpage = self . _download_webpage (
2015-05-02 00:08:38 +08:00
req , video_id , note = ' Downloading info page ' )
2015-05-02 00:32:46 +08:00
err_msg = self . _html_search_regex ( r ' <div[^>]+class= " video-error[^>]+>(.+)</div> ' , info_webpage , ' error message ' , default = None )
if err_msg :
err_msg = clean_html ( err_msg )
if ' not available in your region ' in err_msg :
raise ExtractorError (
' Video %s is blocked from your location. ' % video_id ,
expected = True )
else :
raise ExtractorError ( ' Viki said: ' + err_msg )
2013-11-24 14:30:05 +08:00
video_url = self . _html_search_regex (
2014-03-21 07:53:18 +08:00
r ' <source[^>]+src= " ([^ " ]+) " ' , info_webpage , ' video URL ' )
2013-11-24 14:30:05 +08:00
upload_date_str = self . _html_search_regex (
2014-03-21 07:53:18 +08:00
r ' " created_at " : " ([^ " ]+) " ' , info_webpage , ' upload date ' )
2013-11-24 14:30:05 +08:00
upload_date = (
unified_strdate ( upload_date_str )
if upload_date_str is not None
else None
)
# subtitles
video_subtitles = self . extract_subtitles ( video_id , info_webpage )
return {
' id ' : video_id ,
' title ' : title ,
' url ' : video_url ,
' description ' : description ,
' thumbnail ' : thumbnail ,
' age_limit ' : age_limit ,
' uploader ' : uploader ,
' subtitles ' : video_subtitles ,
' upload_date ' : upload_date ,
}
2015-02-19 03:37:16 +08:00
def _get_subtitles ( self , video_id , info_webpage ) :
2013-11-24 14:30:05 +08:00
res = { }
2015-02-19 03:37:16 +08:00
for sturl_html in re . findall ( r ' <track src= " ([^ " ]+) " ' , info_webpage ) :
2013-11-25 13:06:18 +08:00
sturl = unescapeHTML ( sturl_html )
2013-11-24 14:30:05 +08:00
m = re . search ( r ' /(?P<lang>[a-z]+) \ .vtt ' , sturl )
if not m :
continue
2015-02-19 03:37:16 +08:00
res [ m . group ( ' lang ' ) ] = [ {
' url ' : compat_urlparse . urljoin ( ' http://www.viki.com ' , sturl ) ,
' ext ' : ' vtt ' ,
} ]
2013-11-24 14:30:05 +08:00
return res