2014-01-27 12:47:30 +08:00
from __future__ import unicode_literals
import re
from . common import InfoExtractor
from . . utils import (
parse_duration ,
unified_strdate ,
)
class HuffPostIE ( I nfoExtractor ) :
IE_DESC = ' Huffington Post '
_VALID_URL = r ''' (?x)
https ? : / / ( embed \. ) ? live \. huffingtonpost \. com /
( ? :
r / segment / [ ^ / ] + / |
HPLEmbedPlayer / \? segmentId =
)
( ? P < id > [ 0 - 9 a - f ] + ) '''
_TEST = {
' url ' : ' http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677 ' ,
2014-01-27 13:55:35 +08:00
' md5 ' : ' 55f5e8981c1c80a64706a44b74833de8 ' ,
2014-01-27 12:47:30 +08:00
' info_dict ' : {
2014-03-29 21:35:45 +08:00
' id ' : ' 52dd3e4b02a7602131000677 ' ,
' ext ' : ' mp4 ' ,
2014-01-27 13:55:35 +08:00
' title ' : ' Legalese It! with @MikeSacksHP ' ,
' description ' : ' This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, " Rough Justice, " he also discusses the Virginia AG \' s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC \' s delay of Kenya \' s President and more. ' ,
2014-01-27 12:47:30 +08:00
' duration ' : 1549 ,
2014-01-27 13:55:35 +08:00
' upload_date ' : ' 20140124 ' ,
2014-01-27 12:47:30 +08:00
}
}
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' id ' )
api_url = ' http://embed.live.huffingtonpost.com/api/segments/ %s .json ' % video_id
data = self . _download_json ( api_url , video_id ) [ ' data ' ]
video_title = data [ ' title ' ]
duration = parse_duration ( data [ ' running_time ' ] )
2014-01-27 13:55:35 +08:00
upload_date = unified_strdate ( data [ ' schedule ' ] [ ' starts_at ' ] )
description = data . get ( ' description ' )
2014-01-27 12:47:30 +08:00
thumbnails = [ ]
for url in data [ ' images ' ] . values ( ) :
m = re . match ( ' .*-([0-9]+x[0-9]+) \ . ' , url )
if not m :
continue
thumbnails . append ( {
' url ' : url ,
' resolution ' : m . group ( 1 ) ,
} )
formats = [ {
' format ' : key ,
' format_id ' : key . replace ( ' / ' , ' . ' ) ,
' ext ' : ' mp4 ' ,
' url ' : url ,
' vcodec ' : ' none ' if key . startswith ( ' audio/ ' ) else None ,
} for key , url in data [ ' sources ' ] [ ' live ' ] . items ( ) ]
2014-01-27 13:55:35 +08:00
if data . get ( ' fivemin_id ' ) :
fid = data [ ' fivemin_id ' ]
fcat = str ( int ( fid ) / / 100 + 1 )
furl = ' http://avideos.5min.com/2/ ' + fcat [ - 3 : ] + ' / ' + fcat + ' / ' + fid + ' .mp4 '
formats . append ( {
' format ' : ' fivemin ' ,
' url ' : furl ,
' preference ' : 1 ,
} )
2014-01-27 12:47:30 +08:00
self . _sort_formats ( formats )
return {
' id ' : video_id ,
' title ' : video_title ,
2014-01-27 13:55:35 +08:00
' description ' : description ,
2014-01-27 12:47:30 +08:00
' formats ' : formats ,
' duration ' : duration ,
' upload_date ' : upload_date ,
' thumbnails ' : thumbnails ,
}