From a013fd1479ded0a9344bc7f29c70906bd1d5b044 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sun, 9 Apr 2017 21:15:59 -0400 Subject: [PATCH 1/5] [CondeNast] Don't presume "var" precedes "params =" Fixes test_CondeNast_2. --- youtube_dl/extractor/condenast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 0c3f0c0e4..750363ea5 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -119,7 +119,7 @@ class CondeNastIE(InfoExtractor): def _extract_video_params(self, webpage): query = {} params = self._search_regex( - r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None) + r'(?s)\bparams = {(.+?)}[;,]', webpage, 'player params', default=None) if params: query.update({ 'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'), From 83a0fdeb822bdbe3393ad1f733cb0cc3bd417d8e Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sun, 9 Apr 2017 21:43:01 -0400 Subject: [PATCH 2/5] [Generic] CondeNast's test_Generic_44 missing info --- youtube_dl/extractor/generic.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c108d4a8a..32fa4cc64 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -740,7 +740,11 @@ class GenericIE(InfoExtractor): 'id': '53501be369702d3275860000', 'ext': 'mp4', 'title': 'Honda’s New Asimo Robot Is More Human Than Ever', - } + 'upload_date': '99990101', + 'uploader': 'wired', + 'timestamp': 253370764800, + }, + 'add_ie': ['CondeNast'], }, # Dailymotion embed { From f4cda0277b2ca34a86c1a1f3a2e859b795c80007 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sun, 9 Apr 2017 21:49:07 -0400 Subject: [PATCH 3/5] [Generic] fix title on text_Generic_52 + add_ie MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although The New Yorker is a Condé Nast property, this is a BrightcoveLegacyIE embed, not a CondeNastIE. :) --- youtube_dl/extractor/generic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 32fa4cc64..b0ef43442 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -865,13 +865,14 @@ class GenericIE(InfoExtractor): 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html', 'info_dict': { 'id': 'always-never', - 'title': 'Always / Never - The New Yorker', + 'title': 'Always / Never', }, 'playlist_count': 3, 'params': { 'extract_flat': False, 'skip_download': True, - } + }, + 'add_ie': ['BrightcoveLegacy'], }, # MLB embed { From b41be55155cd7a7c7e621a0f156f7e194862a1d6 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sun, 9 Apr 2017 22:36:55 -0400 Subject: [PATCH 4/5] [CondeNast] Refactor to handle iframe Fixes test_CondeNast, which changed from the old-style embedded js case to an iframe. --- youtube_dl/extractor/condenast.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 750363ea5..5eab75beb 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -62,18 +62,20 @@ class CondeNastIE(InfoExtractor): EMBED_URL = r'(?:https?:)?//player(?:-backend)?\.(?:%s)\.com/(?:embed(?:js)?|(?:script|inline)/video)/.+?' % '|'.join(_SITES.keys()) _TESTS = [{ + # Now an iframe 'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led', 'md5': '1921f713ed48aabd715691f774c451f7', 'info_dict': { 'id': '5171b343c2b4c00dd0c1ccb3', 'ext': 'mp4', 'title': '3D Printed Speakers Lit With LED', - 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', + # 'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.', 'uploader': 'wired', 'upload_date': '20130314', 'timestamp': 1363219200, } }, { + # older JS embed: var params = { \n videoId: 'url': 'http://video.gq.com/watch/the-closer-with-keith-olbermann-the-only-true-surprise-trump-s-an-idiot?c=series', 'info_dict': { 'id': '58d1865bfd2e6126e2000015', @@ -84,7 +86,7 @@ class CondeNastIE(InfoExtractor): 'timestamp': 1490126427, }, }, { - # JS embed + # new JS embed: var params;\n ... params = { \n videoId: 'url': 'http://player.cnevids.com/embedjs/55f9cf8b61646d1acf00000c/5511d76261646d5566020000.js', 'md5': 'f1a6f9cafb7083bab74a710f65d08999', 'info_dict': { @@ -126,15 +128,30 @@ class CondeNastIE(InfoExtractor): 'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'), 'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'), }) - else: - params = extract_attributes(self._search_regex( - r'(<[^>]+data-js="video-player"[^>]+>)', - webpage, 'player params element')) + return query + + # No test for this case? + # xxx @remitamine in 4f427c4be860c582ca72dd4be64d45b54499232c + tag = self._search_regex( + r'(<[^>]+data-js="video-player"[^>]+>)', + webpage, 'player params element', default=None) + if tag: + params = extract_attributes(tag) query.update({ 'videoId': params['data-video'], 'playerId': params['data-player'], 'target': params['id'], }) + return query + + iframe = self._search_regex( + r']+src="([^"]+)"', webpage, 'iframe player params') + query.update({ + 'videoId': self._search_regex(r'videoId=(\w+)', iframe, 'video id'), + 'playerId': self._search_regex(r'playerId=(\w+)', iframe, 'player id'), + # just guessing here: + 'target': self._search_regex(r'target=(\w+)', iframe, 'target', default=None), + }) return query def _extract_video(self, params): From e3ea5c7eb9f631e37efb09e451002a518b97f024 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sun, 9 Apr 2017 22:53:02 -0400 Subject: [PATCH 5/5] [Generic] CondeNast data-url embed, with test URL already supported in CondeNastIE, just needed a tweak to the GenericIE hook. --- youtube_dl/extractor/generic.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b0ef43442..7f7d64f35 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -732,7 +732,7 @@ class GenericIE(InfoExtractor): 'Forbidden' ] }, - # Condé Nast embed + # Condé Nast iframe embed { 'url': 'http://www.wired.com/2014/04/honda-asimo/', 'md5': 'ba0dfe966fa007657bd1443ee672db0f', @@ -746,6 +746,20 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['CondeNast'], }, + # Condé Nast embed
+ { + 'url': 'http://www.vanityfair.com/hollywood/2017/04/chris-evans-gifted-movie-captain-america-video', + 'md5': '07e1618750fa14b573c5d1bf6ff01429', + 'info_dict': { + 'id': '58e54e8dfd2e615252000010', + 'ext': 'mp4', + 'timestamp': 1491472800, + 'uploader': 'vanityfair', + 'title': 'Chris Evans Answers Kids’ Questions About The Universe', + 'upload_date': '20170406', + }, + 'add_ie': ['CondeNast'], + }, # Dailymotion embed { 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/', @@ -2403,7 +2417,7 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'MLB') mobj = re.search( - r'<(?:iframe|script)[^>]+?src=(["\'])(?P%s)\1' % CondeNastIE.EMBED_URL, + r'<(?:iframe|script|div)[^>]+?(?:src|data-url)=(["\'])(?P%s)\1' % CondeNastIE.EMBED_URL, webpage) if mobj is not None: return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')