2014-06-21 17:31:18 +08:00
# encoding: utf-8
2014-01-31 21:00:55 +08:00
from __future__ import unicode_literals
2013-06-24 04:22:08 +08:00
import re
from . common import InfoExtractor
2015-01-14 22:27:14 +08:00
from . . compat import (
compat_urlparse ,
compat_HTTPError ,
)
from . . utils import (
HEADRequest ,
ExtractorError ,
)
2014-11-16 07:51:31 +08:00
from . spiegeltv import SpiegeltvIE
2013-06-24 04:22:08 +08:00
class SpiegelIE ( InfoExtractor ) :
2014-11-13 22:02:31 +08:00
_VALID_URL = r ' https?://(?:www \ .)?spiegel \ .de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?: \ .html)?(?:#.*)?$ '
2013-11-16 08:33:12 +08:00
_TESTS = [ {
2014-01-31 21:00:55 +08:00
' url ' : ' http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html ' ,
' md5 ' : ' 2c2754212136f35fb4b19767d242f66e ' ,
' info_dict ' : {
2014-06-21 17:31:18 +08:00
' id ' : ' 1259285 ' ,
' ext ' : ' mp4 ' ,
2014-01-31 21:00:55 +08:00
' title ' : ' Vulkanausbruch in Ecuador: Der " Feuerschlund " ist wieder aktiv ' ,
2014-06-21 17:31:18 +08:00
' description ' : ' md5:8029d8310232196eb235d27575a8b9f4 ' ,
' duration ' : 49 ,
2014-01-31 21:00:55 +08:00
} ,
2014-06-21 17:31:18 +08:00
} , {
2014-01-31 21:00:55 +08:00
' url ' : ' http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html ' ,
' md5 ' : ' f2cdf638d7aa47654e251e1aee360af1 ' ,
' info_dict ' : {
2014-06-21 17:31:18 +08:00
' id ' : ' 1309159 ' ,
' ext ' : ' mp4 ' ,
2014-01-31 21:00:55 +08:00
' title ' : ' Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers ' ,
2014-06-21 17:31:18 +08:00
' description ' : ' md5:c2322b65e58f385a820c10fa03b2d088 ' ,
' duration ' : 983 ,
} ,
2014-11-13 22:02:31 +08:00
} , {
' url ' : ' http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html ' ,
' md5 ' : ' d8eeca6bfc8f1cd6f490eb1f44695d51 ' ,
' info_dict ' : {
' id ' : ' 1519126 ' ,
' ext ' : ' mp4 ' ,
' description ' : ' SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen. ' ,
' title ' : ' Fragen an Astronaut Alexander Gerst: " Bekommen Sie die Tageszeiten mit? " ' ,
}
2013-11-16 08:33:12 +08:00
} ]
2013-06-24 04:22:08 +08:00
def _real_extract ( self , url ) :
2014-11-13 21:45:17 +08:00
video_id = self . _match_id ( url )
2014-11-16 07:51:31 +08:00
webpage , handle = self . _download_webpage_handle ( url , video_id )
# 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html
if SpiegeltvIE . suitable ( handle . geturl ( ) ) :
return self . url_result ( handle . geturl ( ) , ' Spiegeltv ' )
2013-06-24 04:22:08 +08:00
2014-11-13 22:02:31 +08:00
title = re . sub ( r ' \ s+ ' , ' ' , self . _html_search_regex (
r ' (?s)<(?:h1|div) class= " module-title " [^>]*>(.*?)</(?:h1|div)> ' ,
webpage , ' title ' ) )
2014-06-21 17:31:18 +08:00
description = self . _html_search_meta ( ' description ' , webpage , ' description ' )
2013-06-24 04:22:08 +08:00
2014-06-21 07:00:48 +08:00
base_url = self . _search_regex (
2014-06-21 17:31:18 +08:00
r ' var \ s+server \ s*= \ s* " ([^ " ]+) \ " ' , webpage , ' server URL ' )
2014-06-21 07:00:48 +08:00
xml_url = base_url + video_id + ' .xml '
2014-06-21 17:31:18 +08:00
idoc = self . _download_xml ( xml_url , video_id )
2013-06-24 04:22:08 +08:00
2015-01-14 22:27:14 +08:00
formats = [ ]
for n in list ( idoc ) :
if n . tag . startswith ( ' type ' ) and n . tag != ' type6 ' :
format_id = n . tag . rpartition ( ' type ' ) [ 2 ]
video_url = base_url + n . find ( ' ./filename ' ) . text
# Test video URLs beforehand as some of them are invalid
try :
self . _request_webpage (
HEADRequest ( video_url ) , video_id ,
' Checking %s video URL ' % format_id )
except ExtractorError as e :
if isinstance ( e . cause , compat_HTTPError ) and e . cause . code == 404 :
self . report_warning (
' %s video URL is invalid, skipping ' % format_id , video_id )
continue
formats . append ( {
' format_id ' : format_id ,
' url ' : video_url ,
' width ' : int ( n . find ( ' ./width ' ) . text ) ,
' height ' : int ( n . find ( ' ./height ' ) . text ) ,
' abr ' : int ( n . find ( ' ./audiobitrate ' ) . text ) ,
' vbr ' : int ( n . find ( ' ./videobitrate ' ) . text ) ,
' vcodec ' : n . find ( ' ./codec ' ) . text ,
' acodec ' : ' MP4A ' ,
} )
2013-11-16 08:33:12 +08:00
duration = float ( idoc [ 0 ] . findall ( ' ./duration ' ) [ 0 ] . text )
2013-12-24 19:40:23 +08:00
self . _sort_formats ( formats )
2014-01-31 21:00:55 +08:00
return {
2013-06-24 04:22:08 +08:00
' id ' : video_id ,
2014-06-21 17:31:18 +08:00
' title ' : title ,
' description ' : description ,
2013-06-24 04:22:08 +08:00
' duration ' : duration ,
2013-11-16 08:33:12 +08:00
' formats ' : formats ,
2013-06-24 04:22:08 +08:00
}
2014-09-13 12:55:38 +08:00
class SpiegelArticleIE ( InfoExtractor ) :
_VALID_URL = ' https?://www \ .spiegel \ .de/(?!video/)[^?#]*?-(?P<id>[0-9]+) \ .html '
IE_NAME = ' Spiegel:Article '
IE_DESC = ' Articles on spiegel.de '
2014-11-13 22:02:31 +08:00
_TESTS = [ {
2014-09-13 12:55:38 +08:00
' url ' : ' http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html ' ,
' info_dict ' : {
' id ' : ' 1516455 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Faszination Badminton: Nennt es bloß nicht Federball ' ,
' description ' : ' re:^Patrick Kämnitz gehört. { 100,} ' ,
} ,
2014-11-13 22:02:31 +08:00
} , {
' url ' : ' http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html ' ,
' info_dict ' : {
} ,
' playlist_count ' : 6 ,
} ]
2014-09-13 12:55:38 +08:00
def _real_extract ( self , url ) :
2014-11-13 21:45:17 +08:00
video_id = self . _match_id ( url )
2014-09-13 12:55:38 +08:00
webpage = self . _download_webpage ( url , video_id )
2014-11-13 22:02:31 +08:00
# Single video on top of the page
2014-09-13 12:55:38 +08:00
video_link = self . _search_regex (
r ' <a href= " ([^ " ]+) " onclick= " return spOpenVideo \ (this, ' , webpage ,
2014-11-13 22:02:31 +08:00
' video page URL ' , default = None )
if video_link :
video_url = compat_urlparse . urljoin (
self . http_scheme ( ) + ' //spiegel.de/ ' , video_link )
return self . url_result ( video_url )
# Multiple embedded videos
embeds = re . findall (
r ' <div class= " vid_holder[0-9]+.*?</div> \ s*.*?url \ s*= \ s* " ([^ " ]+) " ' ,
webpage )
entries = [
self . url_result ( compat_urlparse . urljoin (
self . http_scheme ( ) + ' //spiegel.de/ ' , embed_path ) )
for embed_path in embeds
]
return self . playlist_result ( entries )