2014-01-29 04:57:38 +08:00
from __future__ import unicode_literals
2013-06-24 04:23:19 +08:00
import re
from . common import InfoExtractor
2014-02-19 08:27:12 +08:00
from . . utils import int_or_none
2013-06-24 04:23:19 +08:00
class LiveLeakIE ( InfoExtractor ) :
2018-04-07 21:55:01 +08:00
_VALID_URL = r ' https?://(?: \ w+ \ .)?liveleak \ .com/view \ ?.*? \ b[it]=(?P<id>[ \ w_]+) '
2014-01-30 13:09:06 +08:00
_TESTS = [ {
2014-01-29 04:57:38 +08:00
' url ' : ' http://www.liveleak.com/view?i=757_1364311680 ' ,
2017-05-12 19:15:33 +08:00
' md5 ' : ' 0813c2430bea7a46bf13acf3406992f4 ' ,
2014-01-29 04:57:38 +08:00
' info_dict ' : {
2014-02-19 08:27:12 +08:00
' id ' : ' 757_1364311680 ' ,
2017-05-12 19:15:33 +08:00
' ext ' : ' mp4 ' ,
2014-01-29 04:57:38 +08:00
' description ' : ' extremely bad day for this guy..! ' ,
' uploader ' : ' ljfriel2 ' ,
2016-05-07 03:05:37 +08:00
' title ' : ' Most unlucky car accident ' ,
2017-01-02 20:08:07 +08:00
' thumbnail ' : r ' re:^https?://.* \ .jpg$ '
2013-06-28 02:46:46 +08:00
}
2014-11-24 04:20:46 +08:00
} , {
2014-01-30 13:09:06 +08:00
' url ' : ' http://www.liveleak.com/view?i=f93_1390833151 ' ,
2017-05-12 19:15:33 +08:00
' md5 ' : ' d3f1367d14cc3c15bf24fbfbe04b9abf ' ,
2014-01-30 13:09:06 +08:00
' info_dict ' : {
2014-02-19 08:27:12 +08:00
' id ' : ' f93_1390833151 ' ,
' ext ' : ' mp4 ' ,
2014-01-30 13:09:06 +08:00
' description ' : ' German Television Channel NDR does an exclusive interview with Edward Snowden. \r \n Uploaded on LiveLeak cause German Television thinks the rest of the world isn \' t intereseted in Edward Snowden. ' ,
' uploader ' : ' ARD_Stinkt ' ,
' title ' : ' German Television does first Edward Snowden Interview (ENGLISH) ' ,
2017-01-02 20:08:07 +08:00
' thumbnail ' : r ' re:^https?://.* \ .jpg$ '
2014-01-30 13:09:06 +08:00
}
2014-11-24 04:20:46 +08:00
} , {
2017-05-12 19:15:33 +08:00
# Prochan embed
2014-02-19 08:27:12 +08:00
' url ' : ' http://www.liveleak.com/view?i=4f7_1392687779 ' ,
' md5 ' : ' 42c6d97d54f1db107958760788c5f48f ' ,
' info_dict ' : {
' id ' : ' 4f7_1392687779 ' ,
' ext ' : ' mp4 ' ,
' description ' : " The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends ' reactions would be a bit stronger. \r \n \r \n Action-go to 0:55. " ,
' uploader ' : ' CapObveus ' ,
' title ' : ' Man is Fatally Struck by Reckless Car While Packing up a Moving Truck ' ,
' age_limit ' : 18 ,
2017-05-12 19:15:33 +08:00
} ,
' skip ' : ' Video is dead ' ,
2015-06-15 04:41:44 +08:00
} , {
2019-03-09 20:14:41 +08:00
# Covers https://github.com/ytdl-org/youtube-dl/pull/5983
2017-05-12 19:15:33 +08:00
# Multiple resolutions
2015-06-15 04:41:44 +08:00
' url ' : ' http://www.liveleak.com/view?i=801_1409392012 ' ,
2017-05-12 19:15:33 +08:00
' md5 ' : ' c3a449dbaca5c0d1825caecd52a57d7b ' ,
2015-06-15 04:41:44 +08:00
' info_dict ' : {
' id ' : ' 801_1409392012 ' ,
' ext ' : ' mp4 ' ,
2016-02-14 17:37:17 +08:00
' description ' : ' Happened on 27.7.2014. \r \n At 0:53 you can see people still swimming at near beach. ' ,
2015-06-15 04:41:44 +08:00
' uploader ' : ' bony333 ' ,
2016-05-07 03:05:37 +08:00
' title ' : ' Crazy Hungarian tourist films close call waterspout in Croatia ' ,
2017-01-02 20:08:07 +08:00
' thumbnail ' : r ' re:^https?://.* \ .jpg$ '
2015-06-15 04:41:44 +08:00
}
2016-11-30 00:37:30 +08:00
} , {
2019-03-09 20:14:41 +08:00
# Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
2016-11-30 00:42:19 +08:00
' url ' : ' http://m.liveleak.com/view?i=763_1473349649 ' ,
2016-11-30 00:37:30 +08:00
' add_ie ' : [ ' Youtube ' ] ,
' info_dict ' : {
' id ' : ' 763_1473349649 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty ' ,
' description ' : ' Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to. ' ,
' uploader ' : ' Ziz ' ,
' upload_date ' : ' 20160908 ' ,
' uploader_id ' : ' UCEbta5E_jqlZmEJsriTEtnw '
2016-11-30 00:42:19 +08:00
} ,
' params ' : {
' skip_download ' : True ,
} ,
2017-08-19 22:48:00 +08:00
} , {
' url ' : ' https://www.liveleak.com/view?i=677_1439397581 ' ,
' info_dict ' : {
' id ' : ' 677_1439397581 ' ,
' title ' : ' Fuel Depot in China Explosion caught on video ' ,
} ,
' playlist_count ' : 3 ,
2018-04-07 21:55:01 +08:00
} , {
' url ' : ' https://www.liveleak.com/view?t=HvHi_1523016227 ' ,
' only_matching ' : True ,
2014-01-30 13:09:06 +08:00
} ]
2013-06-24 04:23:19 +08:00
2016-04-01 02:42:55 +08:00
@staticmethod
2017-08-19 22:27:53 +08:00
def _extract_urls ( webpage ) :
return re . findall (
2018-12-21 06:22:51 +08:00
r ' <iframe[^>]+src= " (https?://(?: \ w+ \ .)?liveleak \ .com/ll_embed \ ?[^ " ]*[ift]=[ \ w_]+[^ " ]+) " ' ,
2016-04-01 02:42:55 +08:00
webpage )
2013-06-24 04:23:19 +08:00
def _real_extract ( self , url ) :
2015-01-24 00:09:26 +08:00
video_id = self . _match_id ( url )
2013-06-24 04:23:19 +08:00
webpage = self . _download_webpage ( url , video_id )
2014-02-19 08:27:12 +08:00
video_title = self . _og_search_title ( webpage ) . replace ( ' LiveLeak.com - ' , ' ' ) . strip ( )
video_description = self . _og_search_description ( webpage )
video_uploader = self . _html_search_regex (
r ' By:.*?( \ w+)</a> ' , webpage , ' uploader ' , fatal = False )
age_limit = int_or_none ( self . _search_regex (
r ' you confirm that you are ([0-9]+) years and over. ' ,
webpage , ' age limit ' , default = None ) )
2016-05-07 03:05:37 +08:00
video_thumbnail = self . _og_search_thumbnail ( webpage )
2014-02-19 08:27:12 +08:00
2017-05-12 19:15:33 +08:00
entries = self . _parse_html5_media_entries ( url , webpage , video_id )
if not entries :
# Maybe an embed?
embed_url = self . _search_regex (
r ' <iframe[^>]+src= " ((?:https?:)?//(?:www \ .)?(?:prochan|youtube) \ .com/embed[^ " ]+) " ' ,
webpage , ' embed URL ' )
return {
' _type ' : ' url_transparent ' ,
' url ' : embed_url ,
' id ' : video_id ,
' title ' : video_title ,
' description ' : video_description ,
' uploader ' : video_uploader ,
' age_limit ' : age_limit ,
}
2014-01-30 13:09:06 +08:00
2017-08-19 22:48:00 +08:00
for idx , info_dict in enumerate ( entries ) :
2018-12-21 06:22:51 +08:00
formats = [ ]
2017-08-19 22:48:00 +08:00
for a_format in info_dict [ ' formats ' ] :
if not a_format . get ( ' height ' ) :
a_format [ ' height ' ] = int_or_none ( self . _search_regex (
r ' ([0-9]+)p \ .mp4 ' , a_format [ ' url ' ] , ' height label ' ,
default = None ) )
2018-12-21 06:22:51 +08:00
formats . append ( a_format )
# Removing '.*.mp4' gives the raw video, which is essentially
# the same video without the LiveLeak logo at the top (see
2019-03-09 20:14:41 +08:00
# https://github.com/ytdl-org/youtube-dl/pull/4768)
2018-12-21 06:22:51 +08:00
orig_url = re . sub ( r ' \ .mp4 \ .[^.]+ ' , ' ' , a_format [ ' url ' ] )
if a_format [ ' url ' ] != orig_url :
format_id = a_format . get ( ' format_id ' )
formats . append ( {
' format_id ' : ' original ' + ( ' - ' + format_id if format_id else ' ' ) ,
' url ' : orig_url ,
' preference ' : 1 ,
} )
self . _sort_formats ( formats )
info_dict [ ' formats ' ] = formats
2016-11-30 00:37:30 +08:00
2017-08-19 22:48:00 +08:00
# Don't append entry ID for one-video pages to keep backward compatibility
if len ( entries ) > 1 :
info_dict [ ' id ' ] = ' %s _ %s ' % ( video_id , idx + 1 )
else :
info_dict [ ' id ' ] = video_id
2013-06-24 04:23:19 +08:00
2017-08-19 22:48:00 +08:00
info_dict . update ( {
' title ' : video_title ,
' description ' : video_description ,
' uploader ' : video_uploader ,
' age_limit ' : age_limit ,
' thumbnail ' : video_thumbnail ,
} )
2017-05-12 19:15:33 +08:00
2017-08-19 22:48:00 +08:00
return self . playlist_result ( entries , video_id , video_title )
2017-08-19 22:27:53 +08:00
class LiveLeakEmbedIE ( InfoExtractor ) :
2018-12-21 06:22:51 +08:00
_VALID_URL = r ' https?://(?:www \ .)?liveleak \ .com/ll_embed \ ?.*? \ b(?P<kind>[ift])=(?P<id>[ \ w_]+) '
2017-08-19 22:27:53 +08:00
# See generic.py for actual test cases
_TESTS = [ {
' url ' : ' https://www.liveleak.com/ll_embed?i=874_1459135191 ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.liveleak.com/ll_embed?f=ab065df993c1 ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
2018-12-21 06:22:51 +08:00
kind , video_id = re . match ( self . _VALID_URL , url ) . groups ( )
2017-08-19 22:27:53 +08:00
if kind == ' f ' :
webpage = self . _download_webpage ( url , video_id )
liveleak_url = self . _search_regex (
2018-12-21 06:22:51 +08:00
r ' (?:logourl \ s*: \ s*|window \ .open \ ()(?P<q1>[ \' " ])(?P<url> %s )(?P=q1) ' % LiveLeakIE . _VALID_URL ,
2017-08-19 22:27:53 +08:00
webpage , ' LiveLeak URL ' , group = ' url ' )
2018-12-21 06:22:51 +08:00
else :
liveleak_url = ' http://www.liveleak.com/view? %s = %s ' % ( kind , video_id )
2017-08-19 22:27:53 +08:00
return self . url_result ( liveleak_url , ie = LiveLeakIE . ie_key ( ) )