2013-11-28 12:47:20 +08:00
# coding: utf-8
2014-02-24 14:47:47 +08:00
from __future__ import unicode_literals
2013-11-28 12:47:20 +08:00
2014-12-12 00:07:59 +08:00
import functools
2013-06-24 02:24:07 +08:00
import re
from . common import InfoExtractor
from . . utils import (
2013-12-25 22:33:09 +08:00
int_or_none ,
2013-11-25 10:12:26 +08:00
unified_strdate ,
2014-12-12 00:07:59 +08:00
OnDemandPagedList ,
2015-10-19 03:48:05 +08:00
xpath_text ,
2013-06-24 02:24:07 +08:00
)
2013-09-28 21:43:34 +08:00
2014-10-27 09:43:59 +08:00
def extract_from_xml_url ( ie , video_id , xml_url ) :
doc = ie . _download_xml (
xml_url , video_id ,
note = ' Downloading video info ' ,
errnote = ' Failed to download video info ' )
title = doc . find ( ' .//information/title ' ) . text
2015-10-19 03:48:05 +08:00
description = xpath_text ( doc , ' .//information/detail ' , ' description ' )
duration = int_or_none ( xpath_text ( doc , ' .//details/lengthSec ' , ' duration ' ) )
uploader = xpath_text ( doc , ' .//details/originChannelTitle ' , ' uploader ' )
uploader_id = xpath_text ( doc , ' .//details/originChannelId ' , ' uploader id ' )
upload_date = unified_strdate ( xpath_text ( doc , ' .//details/airtime ' , ' upload date ' ) )
2014-10-27 09:43:59 +08:00
def xml_to_format ( fnode ) :
video_url = fnode . find ( ' url ' ) . text
is_available = ' http://www.metafilegenerator ' not in video_url
format_id = fnode . attrib [ ' basetype ' ]
format_m = re . match ( r ''' (?x)
( ? P < vcodec > [ ^ _ ] + ) _ ( ? P < acodec > [ ^ _ ] + ) _ ( ? P < container > [ ^ _ ] + ) _
( ? P < proto > [ ^ _ ] + ) _ ( ? P < index > [ ^ _ ] + ) _ ( ? P < indexproto > [ ^ _ ] + )
''' , format_id)
ext = format_m . group ( ' container ' )
proto = format_m . group ( ' proto ' ) . lower ( )
2015-10-19 03:48:05 +08:00
quality = xpath_text ( fnode , ' ./quality ' , ' quality ' )
abr = int_or_none ( xpath_text ( fnode , ' ./audioBitrate ' , ' abr ' ) , 1000 )
vbr = int_or_none ( xpath_text ( fnode , ' ./videoBitrate ' , ' vbr ' ) , 1000 )
2014-10-27 09:43:59 +08:00
2015-10-19 03:48:05 +08:00
width = int_or_none ( xpath_text ( fnode , ' ./width ' , ' width ' ) )
height = int_or_none ( xpath_text ( fnode , ' ./height ' , ' height ' ) )
filesize = int_or_none ( xpath_text ( fnode , ' ./filesize ' , ' filesize ' ) )
2014-10-27 09:43:59 +08:00
format_note = ' '
if not format_note :
format_note = None
return {
' format_id ' : format_id + ' - ' + quality ,
' url ' : video_url ,
' ext ' : ext ,
' acodec ' : format_m . group ( ' acodec ' ) ,
' vcodec ' : format_m . group ( ' vcodec ' ) ,
' abr ' : abr ,
' vbr ' : vbr ,
' width ' : width ,
' height ' : height ,
2015-10-19 03:48:05 +08:00
' filesize ' : filesize ,
2014-10-27 09:43:59 +08:00
' format_note ' : format_note ,
' protocol ' : proto ,
' _available ' : is_available ,
}
2015-10-19 01:56:22 +08:00
def xml_to_thumbnails ( fnode ) :
2015-10-19 03:48:05 +08:00
thumbnails = [ ]
2015-10-19 01:56:22 +08:00
for node in fnode :
2015-10-19 03:48:05 +08:00
thumbnail_url = node . text
if not thumbnail_url :
continue
thumbnail = {
' url ' : thumbnail_url ,
}
2015-10-19 02:47:42 +08:00
if ' key ' in node . attrib :
2015-10-19 03:25:26 +08:00
m = re . match ( ' ^([0-9]+)x([0-9]+)$ ' , node . attrib [ ' key ' ] )
if m :
thumbnail [ ' width ' ] = int ( m . group ( 1 ) )
thumbnail [ ' height ' ] = int ( m . group ( 2 ) )
2015-10-19 01:56:22 +08:00
thumbnails . append ( thumbnail )
return thumbnails
2015-10-19 03:48:05 +08:00
thumbnails = xml_to_thumbnails ( doc . findall ( ' .//teaserimages/teaserimage ' ) )
2015-10-19 01:56:22 +08:00
2014-10-27 09:43:59 +08:00
format_nodes = doc . findall ( ' .//formitaeten/formitaet ' )
formats = list ( filter (
lambda f : f [ ' _available ' ] ,
map ( xml_to_format , format_nodes ) ) )
ie . _sort_formats ( formats )
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' duration ' : duration ,
2015-10-19 01:56:22 +08:00
' thumbnails ' : thumbnails ,
2014-10-27 09:43:59 +08:00
' uploader ' : uploader ,
' uploader_id ' : uploader_id ,
' upload_date ' : upload_date ,
' formats ' : formats ,
}
2013-06-24 02:24:07 +08:00
class ZDFIE ( InfoExtractor ) :
2014-12-12 00:07:59 +08:00
_VALID_URL = r ' (?:zdf:|zdf:video:|https?://www \ .zdf \ .de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P<id>[0-9]+)(?:/[^/?]+)?(?: \ ?.*)? '
2013-11-28 12:47:20 +08:00
_TEST = {
2014-02-24 14:47:47 +08:00
' url ' : ' http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt ' ,
' info_dict ' : {
' id ' : ' 2037704 ' ,
' ext ' : ' webm ' ,
' title ' : ' ZDFspezial - Ende des Machtpokers ' ,
' description ' : ' Union und SPD haben sich auf einen Koalitionsvertrag geeinigt. Aber was bedeutet das für die Bürger? Sehen Sie hierzu das ZDFspezial " Ende des Machtpokers - Große Koalition für Deutschland " . ' ,
' duration ' : 1022 ,
' uploader ' : ' spezial ' ,
' uploader_id ' : ' 225948 ' ,
' upload_date ' : ' 20131127 ' ,
2013-11-28 12:47:20 +08:00
} ,
2014-02-24 14:47:47 +08:00
' skip ' : ' Videos on ZDF.de are depublicised in short order ' ,
2013-11-28 12:47:20 +08:00
}
2013-06-24 02:24:07 +08:00
2014-12-12 00:07:59 +08:00
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
2014-02-24 14:47:47 +08:00
xml_url = ' http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id= %s ' % video_id
2014-10-27 09:43:59 +08:00
return extract_from_xml_url ( self , video_id , xml_url )
2014-12-11 00:29:03 +08:00
2014-12-12 00:07:59 +08:00
class ZDFChannelIE ( InfoExtractor ) :
_VALID_URL = r ' (?:zdf:topic:|https?://www \ .zdf \ .de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P<id>[0-9]+) '
2014-12-11 00:29:03 +08:00
_TEST = {
' url ' : ' http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic ' ,
' info_dict ' : {
' id ' : ' 1586442 ' ,
} ,
2015-01-08 21:29:12 +08:00
' playlist_count ' : 3 ,
2014-12-11 00:29:03 +08:00
}
2014-12-12 00:07:59 +08:00
_PAGE_SIZE = 50
def _fetch_page ( self , channel_id , page ) :
offset = page * self . _PAGE_SIZE
xml_url = (
' http://www.zdf.de/ZDFmediathek/xmlservice/web/aktuellste?ak=web&offset= %d &maxLength= %d &id= %s '
% ( offset , self . _PAGE_SIZE , channel_id ) )
doc = self . _download_xml (
xml_url , channel_id ,
note = ' Downloading channel info ' ,
errnote = ' Failed to download channel info ' )
title = doc . find ( ' .//information/title ' ) . text
description = doc . find ( ' .//information/detail ' ) . text
for asset in doc . findall ( ' .//teasers/teaser ' ) :
a_type = asset . find ( ' ./type ' ) . text
a_id = asset . find ( ' ./details/assetId ' ) . text
if a_type not in ( ' video ' , ' topic ' ) :
continue
yield {
' _type ' : ' url ' ,
' playlist_title ' : title ,
' playlist_description ' : description ,
' url ' : ' zdf: %s : %s ' % ( a_type , a_id ) ,
}
def _real_extract ( self , url ) :
channel_id = self . _match_id ( url )
entries = OnDemandPagedList (
functools . partial ( self . _fetch_page , channel_id ) , self . _PAGE_SIZE )
2014-12-11 00:29:03 +08:00
return {
' _type ' : ' playlist ' ,
2014-12-12 00:07:59 +08:00
' id ' : channel_id ,
' entries ' : entries ,
2014-12-11 00:29:03 +08:00
}