2014-01-05 10:18:45 +08:00
from __future__ import unicode_literals
2013-06-24 02:44:48 +08:00
import re
from . common import InfoExtractor
2014-12-09 00:17:15 +08:00
from . . compat import (
compat_str ,
2013-06-24 02:44:48 +08:00
compat_urllib_request ,
2014-05-15 23:20:40 +08:00
compat_urlparse ,
2014-12-09 00:17:15 +08:00
)
from . . utils import (
2014-05-15 23:20:40 +08:00
clean_html ,
2014-12-09 00:17:15 +08:00
int_or_none ,
parse_iso8601 ,
unescapeHTML ,
2013-06-24 02:44:48 +08:00
)
2015-02-18 04:56:25 +08:00
class BlipTVIE ( InfoExtractor ) :
2014-08-25 23:13:19 +08:00
_VALID_URL = r ' https?://(?: \ w+ \ .)?blip \ .tv/(?:(?:.+-|rss/flash/)(?P<id> \ d+)|((?:play/|api \ .swf#)(?P<lookup_id>[ \ da-zA-Z+_]+))) '
2014-05-15 23:20:40 +08:00
_TESTS = [
{
' url ' : ' http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352 ' ,
' md5 ' : ' c6934ad0b6acf2bd920720ec888eb812 ' ,
' info_dict ' : {
' id ' : ' 5779306 ' ,
' ext ' : ' mov ' ,
' title ' : ' CBR EXCLUSIVE: " Gotham City Imposters " Bats VS Jokerz Short 3 ' ,
' description ' : ' md5:9bc31f227219cde65e47eeec8d2dc596 ' ,
' timestamp ' : 1323138843 ,
' upload_date ' : ' 20111206 ' ,
' uploader ' : ' cbr ' ,
' uploader_id ' : ' 679425 ' ,
' duration ' : 81 ,
}
} ,
{
# https://github.com/rg3/youtube-dl/pull/2274
' note ' : ' Video with subtitles ' ,
' url ' : ' http://blip.tv/play/h6Uag5OEVgI.html ' ,
' md5 ' : ' 309f9d25b820b086ca163ffac8031806 ' ,
' info_dict ' : {
' id ' : ' 6586561 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Red vs. Blue Season 11 Episode 1 ' ,
' description ' : ' One-Zero-One ' ,
' timestamp ' : 1371261608 ,
' upload_date ' : ' 20130615 ' ,
' uploader ' : ' redvsblue ' ,
' uploader_id ' : ' 792887 ' ,
' duration ' : 279 ,
}
2014-08-24 09:13:49 +08:00
} ,
{
# https://bugzilla.redhat.com/show_bug.cgi?id=967465
' url ' : ' http://a.blip.tv/api.swf#h6Uag5KbVwI ' ,
' md5 ' : ' 314e87b1ebe7a48fcbfdd51b791ce5a6 ' ,
' info_dict ' : {
' id ' : ' 6573122 ' ,
' ext ' : ' mov ' ,
' upload_date ' : ' 20130520 ' ,
' description ' : ' Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands. ' ,
' title ' : ' Red vs. Blue Season 11 Trailer ' ,
' timestamp ' : 1369029609 ,
' uploader ' : ' redvsblue ' ,
' uploader_id ' : ' 792887 ' ,
}
2014-11-29 23:58:34 +08:00
} ,
{
' url ' : ' http://blip.tv/play/gbk766dkj4Yn ' ,
' md5 ' : ' fe0a33f022d49399a241e84a8ea8b8e3 ' ,
' info_dict ' : {
' id ' : ' 1749452 ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20090208 ' ,
' description ' : ' Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers. ' ,
' title ' : ' Nostalgia Critic: Transformers ' ,
' timestamp ' : 1234068723 ,
' uploader ' : ' NostalgiaCritic ' ,
' uploader_id ' : ' 246467 ' ,
}
2014-12-09 00:17:15 +08:00
} ,
{
# https://github.com/rg3/youtube-dl/pull/4404
' note ' : ' Audio only ' ,
' url ' : ' http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982 ' ,
' md5 ' : ' 76c0a56f24e769ceaab21fbb6416a351 ' ,
' info_dict ' : {
' id ' : ' 7103299 ' ,
' ext ' : ' flv ' ,
' title ' : ' Weekly Manga Recap: Kingdom ' ,
' description ' : ' And then Shin breaks the enemy line, and he's all like HWAH! And then he slices a guy and it's all like FWASHING! And... it's really hard to describe the best parts of this series without breaking down into sound effects, okay? ' ,
' timestamp ' : 1417660321 ,
' upload_date ' : ' 20141204 ' ,
' uploader ' : ' The Rollo T ' ,
' uploader_id ' : ' 407429 ' ,
' duration ' : 7251 ,
' vcodec ' : ' none ' ,
}
} ,
2014-05-15 23:20:40 +08:00
]
2013-06-24 02:44:48 +08:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
2014-05-15 23:20:40 +08:00
lookup_id = mobj . group ( ' lookup_id ' )
2013-06-24 02:44:48 +08:00
2014-11-15 22:56:04 +08:00
# See https://github.com/rg3/youtube-dl/issues/857 and
# https://github.com/rg3/youtube-dl/issues/4197
2014-05-15 23:20:40 +08:00
if lookup_id :
2014-11-29 23:58:34 +08:00
urlh = self . _request_webpage (
' http://blip.tv/play/ %s ' % lookup_id , lookup_id , ' Resolving lookup id ' )
url = compat_urlparse . urlparse ( urlh . geturl ( ) )
qs = compat_urlparse . parse_qs ( url . query )
mobj = re . match ( self . _VALID_URL , qs [ ' file ' ] [ 0 ] )
video_id = mobj . group ( ' id ' )
2014-05-15 23:20:40 +08:00
rss = self . _download_xml ( ' http://blip.tv/rss/flash/ %s ' % video_id , video_id , ' Downloading video RSS ' )
def blip ( s ) :
return ' { http://blip.tv/dtd/blip/1.0} %s ' % s
def media ( s ) :
return ' { http://search.yahoo.com/mrss/} %s ' % s
def itunes ( s ) :
return ' { http://www.itunes.com/dtds/podcast-1.0.dtd} %s ' % s
item = rss . find ( ' channel/item ' )
video_id = item . find ( blip ( ' item_id ' ) ) . text
title = item . find ( ' ./title ' ) . text
description = clean_html ( compat_str ( item . find ( blip ( ' puredescription ' ) ) . text ) )
timestamp = parse_iso8601 ( item . find ( blip ( ' datestamp ' ) ) . text )
uploader = item . find ( blip ( ' user ' ) ) . text
uploader_id = item . find ( blip ( ' userid ' ) ) . text
duration = int ( item . find ( blip ( ' runtime ' ) ) . text )
media_thumbnail = item . find ( media ( ' thumbnail ' ) )
thumbnail = media_thumbnail . get ( ' url ' ) if media_thumbnail is not None else item . find ( itunes ( ' image ' ) ) . text
categories = [ category . text for category in item . findall ( ' category ' ) ]
2014-02-03 12:18:30 +08:00
formats = [ ]
2015-02-18 04:56:25 +08:00
subtitles_urls = { }
2014-05-15 23:20:40 +08:00
media_group = item . find ( media ( ' group ' ) )
for media_content in media_group . findall ( media ( ' content ' ) ) :
url = media_content . get ( ' url ' )
role = media_content . get ( blip ( ' role ' ) )
msg = self . _download_webpage (
url + ' ?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url ' ,
video_id , ' Resolving URL for %s ' % role )
2014-11-30 00:14:57 +08:00
real_url = compat_urlparse . parse_qs ( msg . strip ( ) ) [ ' message ' ] [ 0 ]
2014-05-15 23:20:40 +08:00
media_type = media_content . get ( ' type ' )
if media_type == ' text/srt ' or url . endswith ( ' .srt ' ) :
LANGS = {
' english ' : ' en ' ,
}
lang = role . rpartition ( ' - ' ) [ - 1 ] . strip ( ) . lower ( )
langcode = LANGS . get ( lang , lang )
2015-02-18 04:56:25 +08:00
subtitles_urls [ langcode ] = url
2014-05-15 23:20:40 +08:00
elif media_type . startswith ( ' video/ ' ) :
2014-01-02 02:45:45 +08:00
formats . append ( {
2014-05-15 23:20:40 +08:00
' url ' : real_url ,
' format_id ' : role ,
' format_note ' : media_type ,
2014-12-09 00:17:15 +08:00
' vcodec ' : media_content . get ( blip ( ' vcodec ' ) ) or ' none ' ,
2014-05-15 23:20:40 +08:00
' acodec ' : media_content . get ( blip ( ' acodec ' ) ) ,
' filesize ' : media_content . get ( ' filesize ' ) ,
2014-12-09 00:17:15 +08:00
' width ' : int_or_none ( media_content . get ( ' width ' ) ) ,
' height ' : int_or_none ( media_content . get ( ' height ' ) ) ,
2014-01-02 02:45:45 +08:00
} )
2015-04-05 00:27:25 +08:00
self . _check_formats ( formats , video_id )
2014-02-03 12:18:30 +08:00
self . _sort_formats ( formats )
2015-02-18 04:56:25 +08:00
subtitles = self . extract_subtitles ( video_id , subtitles_urls )
2014-02-03 12:18:30 +08:00
return {
' id ' : video_id ,
2014-05-15 23:20:40 +08:00
' title ' : title ,
' description ' : description ,
' timestamp ' : timestamp ,
' uploader ' : uploader ,
' uploader_id ' : uploader_id ,
' duration ' : duration ,
' thumbnail ' : thumbnail ,
' categories ' : categories ,
2014-02-03 12:18:30 +08:00
' formats ' : formats ,
2015-02-18 04:56:25 +08:00
' subtitles ' : subtitles ,
2014-02-03 12:18:30 +08:00
}
2013-12-23 11:31:38 +08:00
2015-02-18 04:56:25 +08:00
def _get_subtitles ( self , video_id , subtitles_urls ) :
subtitles = { }
for lang , url in subtitles_urls . items ( ) :
# For some weird reason, blip.tv serves a video instead of subtitles
# when we request with a common UA
req = compat_urllib_request . Request ( url )
req . add_header ( ' User-Agent ' , ' youtube-dl ' )
subtitles [ lang ] = [ {
# The extension is 'srt' but it's actually an 'ass' file
' ext ' : ' ass ' ,
' data ' : self . _download_webpage ( req , None , note = False ) ,
} ]
return subtitles
2013-06-24 02:44:48 +08:00
class BlipTVUserIE ( InfoExtractor ) :
2014-11-21 07:25:13 +08:00
_VALID_URL = r ' (?:(?:https?://(?: \ w+ \ .)?blip \ .tv/)|bliptvuser:)(?!api \ .swf)([^/]+)/*$ '
2013-06-24 02:44:48 +08:00
_PAGE_SIZE = 12
2014-01-05 10:18:45 +08:00
IE_NAME = ' blip.tv:user '
2014-11-21 07:25:13 +08:00
_TEST = {
' url ' : ' http://blip.tv/actone ' ,
' info_dict ' : {
' id ' : ' actone ' ,
' title ' : ' Act One: The Series ' ,
} ,
' playlist_count ' : 5 ,
}
2013-06-24 02:44:48 +08:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
username = mobj . group ( 1 )
page_base = ' http://m.blip.tv/pr/show_get_full_episode_list?users_id= %s &lite=0&esi=1 '
2014-01-05 10:18:45 +08:00
page = self . _download_webpage ( url , username , ' Downloading user page ' )
2013-06-24 02:44:48 +08:00
mobj = re . search ( r ' data-users-id= " ([^ " ]+) " ' , page )
page_base = page_base % mobj . group ( 1 )
2014-11-21 07:25:13 +08:00
title = self . _og_search_title ( page )
2013-06-24 02:44:48 +08:00
# Download video ids using BlipTV Ajax calls. Result size per
# query is limited (currently to 12 videos) so we need to query
# page by page until there are no video ids - it means we got
# all of them.
video_ids = [ ]
pagenum = 1
while True :
url = page_base + " &page= " + str ( pagenum )
2014-02-03 12:18:30 +08:00
page = self . _download_webpage (
url , username , ' Downloading video ids from page %d ' % pagenum )
2013-06-24 02:44:48 +08:00
# Extract video identifiers
ids_in_page = [ ]
for mobj in re . finditer ( r ' href= " /([^ " ]+) " ' , page ) :
if mobj . group ( 1 ) not in ids_in_page :
ids_in_page . append ( unescapeHTML ( mobj . group ( 1 ) ) )
video_ids . extend ( ids_in_page )
# A little optimization - if current page is not
# "full", ie. does not contain PAGE_SIZE video ids then
# we can assume that this page is the last one - there
# are no more ids on further pages - no need to query
# again.
if len ( ids_in_page ) < self . _PAGE_SIZE :
break
pagenum + = 1
2014-01-05 10:18:45 +08:00
urls = [ ' http://blip.tv/ %s ' % video_id for video_id in video_ids ]
2013-07-08 08:12:20 +08:00
url_entries = [ self . url_result ( vurl , ' BlipTV ' ) for vurl in urls ]
2014-11-21 07:25:13 +08:00
return self . playlist_result (
url_entries , playlist_title = title , playlist_id = username )