2014-01-28 01:40:10 +08:00
# coding: utf-8
from __future__ import unicode_literals
2013-06-24 02:24:07 +08:00
import re
from . common import InfoExtractor
from . . utils import (
2014-01-28 01:40:10 +08:00
determine_ext ,
2013-06-24 02:24:07 +08:00
ExtractorError ,
)
2014-01-28 01:40:10 +08:00
2013-06-24 02:24:07 +08:00
class ARDIE ( InfoExtractor ) :
2014-01-28 01:40:10 +08:00
_VALID_URL = r ' ^https?://(?:(?:www \ .)?ardmediathek \ .de|mediathek \ .daserste \ .de)/(?:.*/)(?P<video_id>[^/ \ ?]+)(?: \ ?.*)? '
2013-06-28 02:46:46 +08:00
_TEST = {
2014-01-28 01:40:10 +08:00
' url ' : ' http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786 ' ,
' file ' : ' 19288786.mp4 ' ,
' md5 ' : ' 515bf47ce209fb3f5a61b7aad364634c ' ,
' info_dict ' : {
' title ' : ' Edward Snowden im Interview - Held oder Verräter? ' ,
' description ' : ' Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdc berwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend. ' ,
' thumbnail ' : ' http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037 ' ,
2013-06-28 02:46:46 +08:00
} ,
2014-01-28 01:40:10 +08:00
' skip ' : ' Blocked outside of Germany ' ,
2013-06-28 02:46:46 +08:00
}
2013-06-24 02:24:07 +08:00
def _real_extract ( self , url ) :
# determine video id from url
m = re . match ( self . _VALID_URL , url )
numid = re . search ( r ' documentId=([0-9]+) ' , url )
if numid :
video_id = numid . group ( 1 )
else :
video_id = m . group ( ' video_id ' )
2014-01-28 01:40:10 +08:00
webpage = self . _download_webpage ( url , video_id )
title = self . _html_search_regex (
2014-05-30 10:59:18 +08:00
[ r ' <h1(?: \ s+class= " boxTopHeadline " )?>(.*?)</h1> ' ,
r ' <h4 class= " headline " >(.*?)</h4> ' ] ,
webpage , ' title ' )
2014-01-28 01:40:10 +08:00
description = self . _html_search_meta (
' dcterms.abstract ' , webpage , ' description ' )
thumbnail = self . _og_search_thumbnail ( webpage )
streams = [
mo . groupdict ( )
for mo in re . finditer (
r ' mediaCollection \ .addMediaStream \ ((?P<media_type> \ d+), (?P<quality> \ d+), " (?P<rtmp_url>[^ " ]*) " , " (?P<video_url>[^ " ]*) " , " [^ " ]* " \ ) ' , webpage ) ]
2013-06-24 02:24:07 +08:00
if not streams :
2014-01-28 01:40:10 +08:00
if ' " fsk " ' in webpage :
raise ExtractorError ( ' This video is only available after 20:00 ' )
formats = [ ]
for s in streams :
format = {
' quality ' : int ( s [ ' quality ' ] ) ,
}
if s . get ( ' rtmp_url ' ) :
format [ ' protocol ' ] = ' rtmp '
format [ ' url ' ] = s [ ' rtmp_url ' ]
format [ ' playpath ' ] = s [ ' video_url ' ]
else :
format [ ' url ' ] = s [ ' video_url ' ]
quality_name = self . _search_regex (
r ' [,.]([a-zA-Z0-9_-]+),? \ .mp4 ' , format [ ' url ' ] ,
' quality name ' , default = ' NA ' )
format [ ' format_id ' ] = ' %s - %s - %s - %s ' % (
determine_ext ( format [ ' url ' ] ) , quality_name , s [ ' media_type ' ] ,
s [ ' quality ' ] )
formats . append ( format )
self . _sort_formats ( formats )
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' formats ' : formats ,
' thumbnail ' : thumbnail ,
}