2014-01-29 04:57:38 +08:00
from __future__ import unicode_literals
2014-01-30 11:52:50 +08:00
import json
2013-06-24 04:23:19 +08:00
import re
from . common import InfoExtractor
class LiveLeakIE ( InfoExtractor ) :
2013-12-04 21:34:47 +08:00
_VALID_URL = r ' ^(?:http://)?(?: \ w+ \ .)?liveleak \ .com/view \ ?(?:.*?)i=(?P<video_id>[ \ w_]+)(?:.*) '
2014-01-30 13:09:06 +08:00
_TESTS = [ {
2014-01-29 04:57:38 +08:00
' url ' : ' http://www.liveleak.com/view?i=757_1364311680 ' ,
' file ' : ' 757_1364311680.mp4 ' ,
' md5 ' : ' 0813c2430bea7a46bf13acf3406992f4 ' ,
' info_dict ' : {
' description ' : ' extremely bad day for this guy..! ' ,
' uploader ' : ' ljfriel2 ' ,
' title ' : ' Most unlucky car accident '
2013-06-28 02:46:46 +08:00
}
2014-01-30 13:09:06 +08:00
} ,
{
' url ' : ' http://www.liveleak.com/view?i=f93_1390833151 ' ,
' file ' : ' f93_1390833151.mp4 ' ,
' md5 ' : ' d3f1367d14cc3c15bf24fbfbe04b9abf ' ,
' info_dict ' : {
' description ' : ' German Television Channel NDR does an exclusive interview with Edward Snowden. \r \n Uploaded on LiveLeak cause German Television thinks the rest of the world isn \' t intereseted in Edward Snowden. ' ,
' uploader ' : ' ARD_Stinkt ' ,
' title ' : ' German Television does first Edward Snowden Interview (ENGLISH) ' ,
}
} ]
2013-06-24 04:23:19 +08:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' video_id ' )
webpage = self . _download_webpage ( url , video_id )
2014-01-30 11:52:50 +08:00
sources_raw = self . _search_regex (
2014-01-30 13:09:06 +08:00
r ' (?s)sources: \ s*( \ [.*? \ ]), ' , webpage , ' video URLs ' , default = None )
if sources_raw is None :
sources_raw = ' [ { %s }] ' % (
self . _search_regex ( r ' (file: " .*? " ), ' , webpage , ' video URL ' ) )
2014-01-30 11:52:50 +08:00
sources_json = re . sub ( r ' \ s([a-z]+): \ s ' , r ' " \ 1 " : ' , sources_raw )
sources = json . loads ( sources_json )
formats = [ {
' format_note ' : s . get ( ' label ' ) ,
' url ' : s [ ' file ' ] ,
} for s in sources ]
self . _sort_formats ( formats )
2013-06-24 04:23:19 +08:00
2013-07-13 01:00:19 +08:00
video_title = self . _og_search_title ( webpage ) . replace ( ' LiveLeak.com - ' , ' ' ) . strip ( )
video_description = self . _og_search_description ( webpage )
2014-01-29 04:57:38 +08:00
video_uploader = self . _html_search_regex (
r ' By:.*?( \ w+)</a> ' , webpage , ' uploader ' , fatal = False )
2013-06-24 04:23:19 +08:00
2014-01-29 04:57:38 +08:00
return {
' id ' : video_id ,
2013-06-24 04:23:19 +08:00
' title ' : video_title ,
' description ' : video_description ,
2014-01-30 11:52:50 +08:00
' uploader ' : video_uploader ,
' formats ' : formats ,
2013-06-24 04:23:19 +08:00
}