2014-01-28 01:40:10 +08:00
# coding: utf-8
from __future__ import unicode_literals
2013-06-24 02:24:07 +08:00
import re
from . common import InfoExtractor
from . . utils import (
2014-01-28 01:40:10 +08:00
determine_ext ,
2013-06-24 02:24:07 +08:00
ExtractorError ,
)
2014-01-28 01:40:10 +08:00
2013-06-24 02:24:07 +08:00
class ARDIE ( InfoExtractor ) :
2014-01-28 01:40:10 +08:00
_VALID_URL = r ' ^https?://(?:(?:www \ .)?ardmediathek \ .de|mediathek \ .daserste \ .de)/(?:.*/)(?P<video_id>[^/ \ ?]+)(?: \ ?.*)? '
2013-06-28 02:46:46 +08:00
_TEST = {
2014-01-28 01:40:10 +08:00
' url ' : ' http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786 ' ,
' file ' : ' 19288786.mp4 ' ,
' md5 ' : ' 515bf47ce209fb3f5a61b7aad364634c ' ,
' info_dict ' : {
' title ' : ' Edward Snowden im Interview - Held oder Verräter? ' ,
' description ' : ' Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdc berwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend. ' ,
' thumbnail ' : ' http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037 ' ,
2013-06-28 02:46:46 +08:00
} ,
2014-01-28 01:40:10 +08:00
' skip ' : ' Blocked outside of Germany ' ,
2013-06-28 02:46:46 +08:00
}
2013-06-24 02:24:07 +08:00
def _real_extract ( self , url ) :
# determine video id from url
m = re . match ( self . _VALID_URL , url )
numid = re . search ( r ' documentId=([0-9]+) ' , url )
if numid :
video_id = numid . group ( 1 )
else :
video_id = m . group ( ' video_id ' )
2014-01-28 01:40:10 +08:00
webpage = self . _download_webpage ( url , video_id )
title = self . _html_search_regex (
2014-05-30 10:59:18 +08:00
[ r ' <h1(?: \ s+class= " boxTopHeadline " )?>(.*?)</h1> ' ,
2014-06-04 03:56:49 +08:00
r ' <meta name= " dcterms.title " content= " (.*?) " /> ' ,
2014-05-30 10:59:18 +08:00
r ' <h4 class= " headline " >(.*?)</h4> ' ] ,
webpage , ' title ' )
2014-01-28 01:40:10 +08:00
description = self . _html_search_meta (
' dcterms.abstract ' , webpage , ' description ' )
thumbnail = self . _og_search_thumbnail ( webpage )
2014-06-04 03:56:49 +08:00
media_info = self . _download_json (
' http://www.ardmediathek.de/play/media/ %s ' % video_id , video_id )
# The second element of the _mediaArray contains the standard http urls
streams = media_info [ ' _mediaArray ' ] [ 1 ] [ ' _mediaStreamArray ' ]
2013-06-24 02:24:07 +08:00
if not streams :
2014-01-28 01:40:10 +08:00
if ' " fsk " ' in webpage :
raise ExtractorError ( ' This video is only available after 20:00 ' )
formats = [ ]
for s in streams :
format = {
2014-06-04 03:56:49 +08:00
' quality ' : s [ ' _quality ' ] ,
' url ' : s [ ' _stream ' ] ,
2014-01-28 01:40:10 +08:00
}
2014-06-04 03:56:49 +08:00
format [ ' format_id ' ] = ' %s - %s ' % (
determine_ext ( format [ ' url ' ] ) , format [ ' quality ' ] )
2014-01-28 01:40:10 +08:00
formats . append ( format )
self . _sort_formats ( formats )
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' formats ' : formats ,
' thumbnail ' : thumbnail ,
}