1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-24 14:52:58 +08:00

[Generic] WashingtonPost iframe embeds, w/ test

This commit is contained in:
John Hawkinson 2017-04-10 01:09:10 -04:00
parent 86ca418c01
commit b81dc70822
2 changed files with 27 additions and 0 deletions

View File

@ -84,6 +84,7 @@ from .videomore import VideomoreIE
from .videopress import VideoPressIE from .videopress import VideoPressIE
from .viewlift import ViewLiftEmbedIE from .viewlift import ViewLiftEmbedIE
from .vimeo import VimeoIE from .vimeo import VimeoIE
from .washingtonpost import WashingtonPostIE
from .webcaster import WebcasterFeedIE from .webcaster import WebcasterFeedIE
from .xhamster import XHamsterEmbedIE from .xhamster import XHamsterEmbedIE
@ -1568,6 +1569,20 @@ class GenericIE(InfoExtractor):
}, },
'add_ie': [RutubeIE.ie_key()], 'add_ie': [RutubeIE.ie_key()],
}, },
{
# WashingtonPost embed
'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
'info_dict': {
'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
'ext': 'mp4',
'title': "No one has seen the drama series based on Trump's life \u2014 until now",
'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
'timestamp': 1455216756,
'uploader': 'The Washington Post',
'upload_date': '20160211',
},
'add_ie': [WashingtonPostIE.ie_key()],
},
{ {
# ThePlatform embedded with whitespaces in URLs # ThePlatform embedded with whitespaces in URLs
'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm', 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
@ -2556,6 +2571,12 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
rutube_urls, ie=RutubeIE.ie_key()) rutube_urls, ie=RutubeIE.ie_key())
# Look for WashingtonPost embeds
wapo_urls = WashingtonPostIE._extract_urls(webpage)
if wapo_urls:
return self.playlist_from_matches(
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
# Looking for http://schema.org/VideoObject # Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld( json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject') webpage, video_id, default={}, expected_type='VideoObject')

View File

@ -13,6 +13,7 @@ from ..utils import (
class WashingtonPostIE(InfoExtractor): class WashingtonPostIE(InfoExtractor):
IE_NAME = 'washingtonpost' IE_NAME = 'washingtonpost'
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
EMBED_URL = 'https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
_TEST = { _TEST = {
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d', 'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
'md5': '6f537e1334b714eb15f9563bd4b9cdfa', 'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
@ -27,6 +28,11 @@ class WashingtonPostIE(InfoExtractor):
}, },
} }
@classmethod
def _extract_urls(cls, webpage):
return re.findall(
r'<iframe[^>]+\bsrc="(?P<url>%s)"' % cls.EMBED_URL, webpage)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( video_data = self._download_json(