2014-01-05 10:18:45 +08:00
from __future__ import unicode_literals
2013-06-24 02:44:48 +08:00
import re
from . common import InfoExtractor
2014-12-09 00:17:15 +08:00
from . . compat import (
2013-06-24 02:44:48 +08:00
compat_urllib_request ,
2014-05-15 23:20:40 +08:00
compat_urlparse ,
2014-12-09 00:17:15 +08:00
)
from . . utils import (
2014-05-15 23:20:40 +08:00
clean_html ,
2014-12-09 00:17:15 +08:00
int_or_none ,
parse_iso8601 ,
unescapeHTML ,
2015-07-18 03:45:36 +08:00
xpath_text ,
xpath_with_ns ,
2013-06-24 02:44:48 +08:00
)
2015-02-18 04:56:25 +08:00
class BlipTVIE ( InfoExtractor ) :
2014-08-25 23:13:19 +08:00
_VALID_URL = r ' https?://(?: \ w+ \ .)?blip \ .tv/(?:(?:.+-|rss/flash/)(?P<id> \ d+)|((?:play/|api \ .swf#)(?P<lookup_id>[ \ da-zA-Z+_]+))) '
2014-05-15 23:20:40 +08:00
_TESTS = [
{
' url ' : ' http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352 ' ,
2015-07-18 03:45:36 +08:00
' md5 ' : ' 80baf1ec5c3d2019037c1c707d676b9f ' ,
2014-05-15 23:20:40 +08:00
' info_dict ' : {
' id ' : ' 5779306 ' ,
2015-07-18 03:45:36 +08:00
' ext ' : ' m4v ' ,
2014-05-15 23:20:40 +08:00
' title ' : ' CBR EXCLUSIVE: " Gotham City Imposters " Bats VS Jokerz Short 3 ' ,
' description ' : ' md5:9bc31f227219cde65e47eeec8d2dc596 ' ,
' timestamp ' : 1323138843 ,
' upload_date ' : ' 20111206 ' ,
' uploader ' : ' cbr ' ,
' uploader_id ' : ' 679425 ' ,
' duration ' : 81 ,
}
} ,
{
# https://github.com/rg3/youtube-dl/pull/2274
' note ' : ' Video with subtitles ' ,
' url ' : ' http://blip.tv/play/h6Uag5OEVgI.html ' ,
' md5 ' : ' 309f9d25b820b086ca163ffac8031806 ' ,
' info_dict ' : {
' id ' : ' 6586561 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Red vs. Blue Season 11 Episode 1 ' ,
' description ' : ' One-Zero-One ' ,
' timestamp ' : 1371261608 ,
' upload_date ' : ' 20130615 ' ,
' uploader ' : ' redvsblue ' ,
' uploader_id ' : ' 792887 ' ,
' duration ' : 279 ,
}
2014-08-24 09:13:49 +08:00
} ,
{
# https://bugzilla.redhat.com/show_bug.cgi?id=967465
' url ' : ' http://a.blip.tv/api.swf#h6Uag5KbVwI ' ,
' md5 ' : ' 314e87b1ebe7a48fcbfdd51b791ce5a6 ' ,
' info_dict ' : {
' id ' : ' 6573122 ' ,
' ext ' : ' mov ' ,
' upload_date ' : ' 20130520 ' ,
' description ' : ' Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands. ' ,
' title ' : ' Red vs. Blue Season 11 Trailer ' ,
' timestamp ' : 1369029609 ,
' uploader ' : ' redvsblue ' ,
' uploader_id ' : ' 792887 ' ,
}
2014-11-29 23:58:34 +08:00
} ,
{
' url ' : ' http://blip.tv/play/gbk766dkj4Yn ' ,
' md5 ' : ' fe0a33f022d49399a241e84a8ea8b8e3 ' ,
' info_dict ' : {
' id ' : ' 1749452 ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20090208 ' ,
' description ' : ' Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers. ' ,
' title ' : ' Nostalgia Critic: Transformers ' ,
' timestamp ' : 1234068723 ,
' uploader ' : ' NostalgiaCritic ' ,
' uploader_id ' : ' 246467 ' ,
}
2014-12-09 00:17:15 +08:00
} ,
{
# https://github.com/rg3/youtube-dl/pull/4404
' note ' : ' Audio only ' ,
' url ' : ' http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982 ' ,
' md5 ' : ' 76c0a56f24e769ceaab21fbb6416a351 ' ,
' info_dict ' : {
' id ' : ' 7103299 ' ,
' ext ' : ' flv ' ,
' title ' : ' Weekly Manga Recap: Kingdom ' ,
' description ' : ' And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay? ' ,
' timestamp ' : 1417660321 ,
' upload_date ' : ' 20141204 ' ,
' uploader ' : ' The Rollo T ' ,
' uploader_id ' : ' 407429 ' ,
' duration ' : 7251 ,
' vcodec ' : ' none ' ,
}
} ,
2015-07-18 03:45:54 +08:00
{
# missing duration
' url ' : ' http://blip.tv/rss/flash/6700880 ' ,
' info_dict ' : {
' id ' : ' 6684191 ' ,
' ext ' : ' m4v ' ,
' title ' : ' Cowboy Bebop: Gateway Shuffle Review ' ,
' description ' : ' md5:3acc480c0f9ae157f5fe88547ecaf3f8 ' ,
' timestamp ' : 1386639757 ,
' upload_date ' : ' 20131210 ' ,
' uploader ' : ' sfdebris ' ,
' uploader_id ' : ' 706520 ' ,
}
}
2014-05-15 23:20:40 +08:00
]
2013-06-24 02:44:48 +08:00
2015-04-21 13:48:02 +08:00
@staticmethod
def _extract_url ( webpage ) :
mobj = re . search ( r ' <meta \ s[^>]*https?://api \ .blip \ .tv/ \ w+/redirect/ \ w+/( \ d+) ' , webpage )
if mobj :
return ' http://blip.tv/a/a- ' + mobj . group ( 1 )
mobj = re . search ( r ' <(?:iframe|embed|object) \ s[^>]*(https?://(?: \ w+ \ .)?blip \ .tv/(?:play/|api \ .swf#)[a-zA-Z0-9_]+) ' , webpage )
if mobj :
return mobj . group ( 1 )
2013-06-24 02:44:48 +08:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
2014-05-15 23:20:40 +08:00
lookup_id = mobj . group ( ' lookup_id ' )
2013-06-24 02:44:48 +08:00
2014-11-15 22:56:04 +08:00
# See https://github.com/rg3/youtube-dl/issues/857 and
# https://github.com/rg3/youtube-dl/issues/4197
2014-05-15 23:20:40 +08:00
if lookup_id :
2014-11-29 23:58:34 +08:00
urlh = self . _request_webpage (
' http://blip.tv/play/ %s ' % lookup_id , lookup_id , ' Resolving lookup id ' )
url = compat_urlparse . urlparse ( urlh . geturl ( ) )
qs = compat_urlparse . parse_qs ( url . query )
mobj = re . match ( self . _VALID_URL , qs [ ' file ' ] [ 0 ] )
video_id = mobj . group ( ' id ' )
2014-05-15 23:20:40 +08:00
rss = self . _download_xml ( ' http://blip.tv/rss/flash/ %s ' % video_id , video_id , ' Downloading video RSS ' )
2015-07-18 03:45:36 +08:00
def _x ( p ) :
return xpath_with_ns ( p , {
' blip ' : ' http://blip.tv/dtd/blip/1.0 ' ,
' media ' : ' http://search.yahoo.com/mrss/ ' ,
' itunes ' : ' http://www.itunes.com/dtds/podcast-1.0.dtd ' ,
} )
2014-05-15 23:20:40 +08:00
item = rss . find ( ' channel/item ' )
2015-07-18 03:45:36 +08:00
video_id = xpath_text ( item , _x ( ' blip:item_id ' ) , ' video id ' ) or lookup_id
title = xpath_text ( item , ' title ' , ' title ' , fatal = True )
description = clean_html ( xpath_text ( item , _x ( ' blip:puredescription ' ) , ' description ' ) )
timestamp = parse_iso8601 ( xpath_text ( item , _x ( ' blip:datestamp ' ) , ' timestamp ' ) )
uploader = xpath_text ( item , _x ( ' blip:user ' ) , ' uploader ' )
uploader_id = xpath_text ( item , _x ( ' blip:userid ' ) , ' uploader id ' )
duration = int_or_none ( xpath_text ( item , _x ( ' blip:runtime ' ) , ' duration ' ) )
media_thumbnail = item . find ( _x ( ' media:thumbnail ' ) )
thumbnail = ( media_thumbnail . get ( ' url ' ) if media_thumbnail is not None
else xpath_text ( item , ' image ' , ' thumbnail ' ) )
categories = [ category . text for category in item . findall ( ' category ' ) if category is not None ]
2014-02-03 12:18:30 +08:00
formats = [ ]
2015-02-18 04:56:25 +08:00
subtitles_urls = { }
2014-05-15 23:20:40 +08:00
2015-07-18 03:45:36 +08:00
media_group = item . find ( _x ( ' media:group ' ) )
for media_content in media_group . findall ( _x ( ' media:content ' ) ) :
2014-05-15 23:20:40 +08:00
url = media_content . get ( ' url ' )
2015-07-18 03:45:36 +08:00
role = media_content . get ( _x ( ' blip:role ' ) )
2014-05-15 23:20:40 +08:00
msg = self . _download_webpage (
url + ' ?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url ' ,
video_id , ' Resolving URL for %s ' % role )
2014-11-30 00:14:57 +08:00
real_url = compat_urlparse . parse_qs ( msg . strip ( ) ) [ ' message ' ] [ 0 ]
2014-05-15 23:20:40 +08:00
media_type = media_content . get ( ' type ' )
if media_type == ' text/srt ' or url . endswith ( ' .srt ' ) :
LANGS = {
' english ' : ' en ' ,
}
lang = role . rpartition ( ' - ' ) [ - 1 ] . strip ( ) . lower ( )
langcode = LANGS . get ( lang , lang )
2015-02-18 04:56:25 +08:00
subtitles_urls [ langcode ] = url
2014-05-15 23:20:40 +08:00
elif media_type . startswith ( ' video/ ' ) :
2014-01-02 02:45:45 +08:00
formats . append ( {
2014-05-15 23:20:40 +08:00
' url ' : real_url ,
' format_id ' : role ,
' format_note ' : media_type ,
2015-07-18 03:45:36 +08:00
' vcodec ' : media_content . get ( _x ( ' blip:vcodec ' ) ) or ' none ' ,
' acodec ' : media_content . get ( _x ( ' blip:acodec ' ) ) ,
2014-05-15 23:20:40 +08:00
' filesize ' : media_content . get ( ' filesize ' ) ,
2014-12-09 00:17:15 +08:00
' width ' : int_or_none ( media_content . get ( ' width ' ) ) ,
' height ' : int_or_none ( media_content . get ( ' height ' ) ) ,
2014-01-02 02:45:45 +08:00
} )
2015-04-05 00:27:25 +08:00
self . _check_formats ( formats , video_id )
2014-02-03 12:18:30 +08:00
self . _sort_formats ( formats )
2015-02-18 04:56:25 +08:00
subtitles = self . extract_subtitles ( video_id , subtitles_urls )
2014-02-03 12:18:30 +08:00
return {
' id ' : video_id ,
2014-05-15 23:20:40 +08:00
' title ' : title ,
' description ' : description ,
' timestamp ' : timestamp ,
' uploader ' : uploader ,
' uploader_id ' : uploader_id ,
' duration ' : duration ,
' thumbnail ' : thumbnail ,
' categories ' : categories ,
2014-02-03 12:18:30 +08:00
' formats ' : formats ,
2015-02-18 04:56:25 +08:00
' subtitles ' : subtitles ,
2014-02-03 12:18:30 +08:00
}
2013-12-23 11:31:38 +08:00
2015-02-18 04:56:25 +08:00
def _get_subtitles ( self , video_id , subtitles_urls ) :
subtitles = { }
for lang , url in subtitles_urls . items ( ) :
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = compat_urllib_request . Request ( url )
req . add_header ( ' User-Agent ' , ' youtube-dl ' )
subtitles [ lang ] = [ {
# The extension is 'srt' but it's actually an 'ass' file
' ext ' : ' ass ' ,
' data ' : self . _download_webpage ( req , None , note = False ) ,
} ]
return subtitles
2013-06-24 02:44:48 +08:00
class BlipTVUserIE ( InfoExtractor ) :
2014-11-21 07:25:13 +08:00
_VALID_URL = r ' (?:(?:https?://(?: \ w+ \ .)?blip \ .tv/)|bliptvuser:)(?!api \ .swf)([^/]+)/*$ '
2013-06-24 02:44:48 +08:00
_PAGE_SIZE = 12
2014-01-05 10:18:45 +08:00
IE_NAME = ' blip.tv:user '
2014-11-21 07:25:13 +08:00
_TEST = {
' url ' : ' http://blip.tv/actone ' ,
' info_dict ' : {
' id ' : ' actone ' ,
' title ' : ' Act One: The Series ' ,
} ,
' playlist_count ' : 5 ,
}
2013-06-24 02:44:48 +08:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
username = mobj . group ( 1 )
page_base = ' http://m.blip.tv/pr/show_get_full_episode_list?users_id= %s &lite=0&esi=1 '
2014-01-05 10:18:45 +08:00
page = self . _download_webpage ( url , username , ' Downloading user page ' )
2013-06-24 02:44:48 +08:00
mobj = re . search ( r ' data-users-id= " ([^ " ]+) " ' , page )
page_base = page_base % mobj . group ( 1 )
2014-11-21 07:25:13 +08:00
title = self . _og_search_title ( page )
2013-06-24 02:44:48 +08:00
# Download video ids using BlipTV Ajax calls. Result size per
# query is limited (currently to 12 videos) so we need to query
# page by page until there are no video ids - it means we got
# all of them.
video_ids = [ ]
pagenum = 1
while True :
url = page_base + " &page= " + str ( pagenum )
2014-02-03 12:18:30 +08:00
page = self . _download_webpage (
url , username , ' Downloading video ids from page %d ' % pagenum )
2013-06-24 02:44:48 +08:00
# Extract video identifiers
ids_in_page = [ ]
for mobj in re . finditer ( r ' href= " /([^ " ]+) " ' , page ) :
if mobj . group ( 1 ) not in ids_in_page :
ids_in_page . append ( unescapeHTML ( mobj . group ( 1 ) ) )
video_ids . extend ( ids_in_page )
# A little optimization - if current page is not
# "full", ie. does not contain PAGE_SIZE video ids then
# we can assume that this page is the last one - there
# are no more ids on further pages - no need to query
# again.
if len ( ids_in_page ) < self . _PAGE_SIZE :
break
pagenum + = 1
2014-01-05 10:18:45 +08:00
urls = [ ' http://blip.tv/ %s ' % video_id for video_id in video_ids ]
2013-07-08 08:12:20 +08:00
url_entries = [ self . url_result ( vurl , ' BlipTV ' ) for vurl in urls ]
2014-11-21 07:25:13 +08:00
return self . playlist_result (
url_entries , playlist_title = title , playlist_id = username )