From ad607563a2fbb5275ea39f7a052c09ffa232e271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 16:46:26 +0600 Subject: [PATCH 01/38] [globo] Separate article extractor --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/globo.py | 140 +++++++++++++++++-------------- 2 files changed, 79 insertions(+), 66 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 10286aa88..94150a28f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -212,7 +212,10 @@ from .gfycat import GfycatIE from .giantbomb import GiantBombIE from .giga import GigaIE from .glide import GlideIE -from .globo import GloboIE +from .globo import ( + GloboIE, + GloboArticleIE, +) from .godtube import GodTubeIE from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 33d6432a6..828e40d76 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -18,75 +18,52 @@ from ..utils import ( class GloboIE(InfoExtractor): - _VALID_URL = 'https?://.+?\.globo\.com/(?P.+)' + _VALID_URL = '(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P\d{7,})' _API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist' _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s' - _VIDEOID_REGEXES = [ - r'\bdata-video-id="(\d+)"', - r'\bdata-player-videosids="(\d+)"', - r']+\bid="(\d+)"', - ] - _RESIGN_EXPIRATION = 86400 - _TESTS = [ - { - 'url': 'http://globotv.globo.com/sportv/futebol-nacional/v/os-gols-de-atletico-mg-3-x-2-santos-pela-24a-rodada-do-brasileirao/3654973/', - 'md5': '03ebf41cb7ade43581608b7d9b71fab0', - 'info_dict': { - 'id': '3654973', - 'ext': 'mp4', - 'title': 'Os gols de Atlético-MG 3 x 2 Santos pela 24ª rodada do Brasileirão', - 'duration': 251.585, - 'uploader': 'SporTV', - 'uploader_id': 698, - 'like_count': int, - } - }, - { - 'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/', - 'md5': 'b3ccc801f75cd04a914d51dadb83a78d', - 'info_dict': { - 'id': '3607726', - 'ext': 'mp4', - 'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa', - 'duration': 103.204, - 'uploader': 'Globo.com', - 'uploader_id': 265, - 'like_count': int, - } - }, - { - 'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html', - 'md5': '307fdeae4390ccfe6ba1aa198cf6e72b', - 'info_dict': { - 'id': '3652183', - 'ext': 'mp4', - 'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião', - 'duration': 110.711, - 'uploader': 'Rede Globo', - 'uploader_id': 196, - 'like_count': int, - } - }, - { - 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/', - 'md5': 'c1defca721ce25b2354e927d3e4b3dec', - 'info_dict': { - 'id': '3928201', - 'ext': 'mp4', - 'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas', - 'duration': 1472.906, - 'uploader': 'Canal Brasil', - 'uploader_id': 705, - 'like_count': int, - } - }, - ] + _TESTS = [{ + 'url': 'http://globotv.globo.com/sportv/futebol-nacional/v/os-gols-de-atletico-mg-3-x-2-santos-pela-24a-rodada-do-brasileirao/3654973/', + 'md5': '03ebf41cb7ade43581608b7d9b71fab0', + 'info_dict': { + 'id': '3654973', + 'ext': 'mp4', + 'title': 'Os gols de Atlético-MG 3 x 2 Santos pela 24ª rodada do Brasileirão', + 'duration': 251.585, + 'uploader': 'SporTV', + 'uploader_id': 698, + 'like_count': int, + } + }, { + 'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/', + 'md5': 'b3ccc801f75cd04a914d51dadb83a78d', + 'info_dict': { + 'id': '3607726', + 'ext': 'mp4', + 'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa', + 'duration': 103.204, + 'uploader': 'Globo.com', + 'uploader_id': 265, + 'like_count': int, + } + }, { + 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/', + 'md5': 'c1defca721ce25b2354e927d3e4b3dec', + 'info_dict': { + 'id': '3928201', + 'ext': 'mp4', + 'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas', + 'duration': 1472.906, + 'uploader': 'Canal Brasil', + 'uploader_id': 705, + 'like_count': int, + } + }] - class MD5(): + class MD5: HEX_FORMAT_LOWERCASE = 0 HEX_FORMAT_UPPERCASE = 1 BASE64_PAD_CHARACTER_DEFAULT_COMPLIANCE = '' @@ -353,9 +330,6 @@ class GloboIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id') - video = self._download_json( self._API_URL_TEMPLATE % video_id, video_id)['videos'][0] @@ -417,3 +391,39 @@ class GloboIE(InfoExtractor): 'like_count': like_count, 'formats': formats } + + +class GloboArticleIE(InfoExtractor): + _VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P[^/]+)\.html' + + _VIDEOID_REGEXES = [ + r'\bdata-video-id=["\'](\d{7,})', + r'\bdata-player-videosids=["\'](\d{7,})', + r'\bvideosIDs\s*:\s*["\'](\d{7,})', + r'\bdata-id=["\'](\d{7,})', + r']+\bid=["\'](\d{7,})', + ] + + _TEST = { + 'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html', + 'md5': '307fdeae4390ccfe6ba1aa198cf6e72b', + 'info_dict': { + 'id': '3652183', + 'ext': 'mp4', + 'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião', + 'duration': 110.711, + 'uploader': 'Rede Globo', + 'uploader_id': 196, + 'like_count': int, + } + } + + @classmethod + def suitable(cls, url): + return False if GloboIE.suitable(url) else super(GloboArticleIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id') + return self.url_result('globo:%s' % video_id, 'Globo') From e3778cce0e912f803ea10cb806406f7fcafe840f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 16:51:19 +0600 Subject: [PATCH 02/38] [globo] Improve m3u8 extraction --- youtube_dl/extractor/globo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 828e40d76..c28899011 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -367,7 +367,10 @@ class GloboIE(InfoExtractor): resource_url = resource['url'] signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash') if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4')) + m3u8_formats = self._extract_m3u8_formats( + signed_url, resource_id, 'mp4', m3u8_id='hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) else: formats.append({ 'url': signed_url, From c3459d24f16056e8ae8f982db2a10871ef18e80a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 16:53:21 +0600 Subject: [PATCH 03/38] [globo] Skip unsupported smooth streaming --- youtube_dl/extractor/globo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index c28899011..ec451bb07 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -338,7 +338,7 @@ class GloboIE(InfoExtractor): formats = [] for resource in video['resources']: resource_id = resource.get('_id') - if not resource_id: + if not resource_id or resource_id.endswith('manifest'): continue security = self._download_json( From 5d235ca7f66af1f82c1a4d753d238f48fc3afa40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 16:55:39 +0600 Subject: [PATCH 04/38] [globo] Prefer native m3u8 --- youtube_dl/extractor/globo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index ec451bb07..2a805cbb2 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -368,7 +368,8 @@ class GloboIE(InfoExtractor): signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash') if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): m3u8_formats = self._extract_m3u8_formats( - signed_url, resource_id, 'mp4', m3u8_id='hls', fatal=False) + signed_url, resource_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False) if m3u8_formats: formats.extend(m3u8_formats) else: From b4ef6a0038657c1adde565df947e42ad1e1b4195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 17:01:27 +0600 Subject: [PATCH 05/38] [globo] Remove non available test --- youtube_dl/extractor/globo.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 2a805cbb2..8aada01dc 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -26,18 +26,6 @@ class GloboIE(InfoExtractor): _RESIGN_EXPIRATION = 86400 _TESTS = [{ - 'url': 'http://globotv.globo.com/sportv/futebol-nacional/v/os-gols-de-atletico-mg-3-x-2-santos-pela-24a-rodada-do-brasileirao/3654973/', - 'md5': '03ebf41cb7ade43581608b7d9b71fab0', - 'info_dict': { - 'id': '3654973', - 'ext': 'mp4', - 'title': 'Os gols de Atlético-MG 3 x 2 Santos pela 24ª rodada do Brasileirão', - 'duration': 251.585, - 'uploader': 'SporTV', - 'uploader_id': 698, - 'like_count': int, - } - }, { 'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/', 'md5': 'b3ccc801f75cd04a914d51dadb83a78d', 'info_dict': { From aebb42d32b608eaffb424e5e7c22f1b68a491e3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 17:01:55 +0600 Subject: [PATCH 06/38] [globo] Remove like count It's no longer provided --- youtube_dl/extractor/globo.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 8aada01dc..dc89e46ac 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -35,7 +35,6 @@ class GloboIE(InfoExtractor): 'duration': 103.204, 'uploader': 'Globo.com', 'uploader_id': 265, - 'like_count': int, } }, { 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/', @@ -47,7 +46,6 @@ class GloboIE(InfoExtractor): 'duration': 1472.906, 'uploader': 'Canal Brasil', 'uploader_id': 705, - 'like_count': int, } }] @@ -370,7 +368,6 @@ class GloboIE(InfoExtractor): self._sort_formats(formats) duration = float_or_none(video.get('duration'), 1000) - like_count = int_or_none(video.get('likes')) uploader = video.get('channel') uploader_id = video.get('channel_id') @@ -380,7 +377,6 @@ class GloboIE(InfoExtractor): 'duration': duration, 'uploader': uploader, 'uploader_id': uploader_id, - 'like_count': like_count, 'formats': formats } @@ -406,7 +402,6 @@ class GloboArticleIE(InfoExtractor): 'duration': 110.711, 'uploader': 'Rede Globo', 'uploader_id': 196, - 'like_count': int, } } From a4a6b7b80f18680ee0a8bba50a24c58edd3f2a73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 17:03:45 +0600 Subject: [PATCH 07/38] [globo] Improve http formats --- youtube_dl/extractor/globo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index dc89e46ac..64622aa5c 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -361,8 +361,8 @@ class GloboIE(InfoExtractor): else: formats.append({ 'url': signed_url, - 'format_id': resource_id, - 'height': resource.get('height'), + 'format_id': 'http-%s' % resource_id, + 'height': int_or_none(resource.get('height')), }) self._sort_formats(formats) From 264cd00fff4f6d7063d43e1d476de46901bd9c5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 17:10:45 +0600 Subject: [PATCH 08/38] [globo] Update tests --- youtube_dl/extractor/globo.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 64622aa5c..0337256ed 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -35,18 +35,30 @@ class GloboIE(InfoExtractor): 'duration': 103.204, 'uploader': 'Globo.com', 'uploader_id': 265, - } + }, + }, { + 'url': 'http://globoplay.globo.com/v/4581987/', + 'md5': 'f36a1ecd6a50da1577eee6dd17f67eff', + 'info_dict': { + 'id': '4581987', + 'ext': 'mp4', + 'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP', + 'duration': 137.973, + 'uploader': 'Rede Globo', + 'uploader_id': 196, + }, + }, { + 'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html', + 'only_matching': True, + }, { + 'url': 'http://globosatplay.globo.com/globonews/v/4472924/', + 'only_matching': True, + }, { + 'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/', + 'only_matching': True, }, { 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/', - 'md5': 'c1defca721ce25b2354e927d3e4b3dec', - 'info_dict': { - 'id': '3928201', - 'ext': 'mp4', - 'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas', - 'duration': 1472.906, - 'uploader': 'Canal Brasil', - 'uploader_id': 705, - } + 'only_matching': True, }] class MD5: From e7d34c03f200e178e9d6dfe4ae3f6856e382a4b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 17:12:42 +0600 Subject: [PATCH 09/38] [globo] Force uploader id to be string --- youtube_dl/extractor/globo.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 0337256ed..6c0fc54de 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -14,6 +14,7 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + str_or_none, ) @@ -34,7 +35,7 @@ class GloboIE(InfoExtractor): 'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa', 'duration': 103.204, 'uploader': 'Globo.com', - 'uploader_id': 265, + 'uploader_id': '265', }, }, { 'url': 'http://globoplay.globo.com/v/4581987/', @@ -45,7 +46,7 @@ class GloboIE(InfoExtractor): 'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP', 'duration': 137.973, 'uploader': 'Rede Globo', - 'uploader_id': 196, + 'uploader_id': '196', }, }, { 'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html', @@ -381,7 +382,7 @@ class GloboIE(InfoExtractor): duration = float_or_none(video.get('duration'), 1000) uploader = video.get('channel') - uploader_id = video.get('channel_id') + uploader_id = str_or_none(video.get('channel_id')) return { 'id': video_id, From c13722480bebfb1fc33169516790df2e99b3e499 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 17:13:35 +0600 Subject: [PATCH 10/38] [globo:article] Fix test --- youtube_dl/extractor/globo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 6c0fc54de..5883be704 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -414,7 +414,7 @@ class GloboArticleIE(InfoExtractor): 'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião', 'duration': 110.711, 'uploader': 'Rede Globo', - 'uploader_id': 196, + 'uploader_id': '196', } } From 5d501a0901c36695c9d6ca3958ac4ccfdea90954 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 17:42:11 +0600 Subject: [PATCH 11/38] [globo] Add more tests --- youtube_dl/extractor/globo.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 5883be704..c65ef6bcf 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -60,6 +60,9 @@ class GloboIE(InfoExtractor): }, { 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/', 'only_matching': True, + }, { + 'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html', + 'only_matching': True, }] class MD5: @@ -405,7 +408,7 @@ class GloboArticleIE(InfoExtractor): r']+\bid=["\'](\d{7,})', ] - _TEST = { + _TESTS = [{ 'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html', 'md5': '307fdeae4390ccfe6ba1aa198cf6e72b', 'info_dict': { @@ -416,7 +419,13 @@ class GloboArticleIE(InfoExtractor): 'uploader': 'Rede Globo', 'uploader_id': '196', } - } + }, { + 'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html', + 'only_matching': True, + }, { + 'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html', + 'only_matching': True, + }] @classmethod def suitable(cls, url): From 17d1900581ffd12866e56640080ce340d99149a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 17:57:46 +0600 Subject: [PATCH 12/38] [vk] Fix view count extraction (Closes #7353) --- youtube_dl/extractor/vk.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 765e9e6fd..01960b827 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -281,9 +281,13 @@ class VKIE(InfoExtractor): mobj.group(1) + ' ' + mobj.group(2) upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2)) - view_count = str_to_int(self._search_regex( - r'"mv_views_count_number"[^>]*>([\d,.]+) views<', - info_page, 'view count', fatal=False)) + view_count = None + views = self._html_search_regex( + r'"mv_views_count_number"[^>]*>(.+?\bviews?)<', + info_page, 'view count', fatal=False) + if views: + view_count = str_to_int(self._search_regex( + r'([\d,.]+)', views, 'view count', fatal=False)) formats = [{ 'format_id': k, From cb5a470635ea2ad91f18d33e391979aabb0755fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 4 Nov 2015 16:18:51 +0100 Subject: [PATCH 13/38] [vimeo] Remove unused import --- youtube_dl/extractor/vimeo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index b608740b8..ca716c8f5 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -8,7 +8,6 @@ import itertools from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_urllib_parse, compat_urllib_request, compat_urlparse, ) From 44b2264feae331eeb34e83eed1387def3d61a437 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 22:12:24 +0600 Subject: [PATCH 14/38] [youtube] Prefer video_info with token available --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d7eda7aa7..5eeb3c663 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1107,6 +1107,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not video_info: video_info = get_video_info if 'token' in get_video_info: + if 'token' not in video_info: + video_info = get_video_info break if 'token' not in video_info: if 'reason' in video_info: From 89ea063eebae84792a7ccb968533ff8bf6a41d56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 4 Nov 2015 22:49:23 +0600 Subject: [PATCH 15/38] [youtube] Clarify rationale for preferring a video info with token (#7362) --- youtube_dl/extractor/youtube.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5eeb3c663..e2a43299f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1107,6 +1107,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not video_info: video_info = get_video_info if 'token' in get_video_info: + # Different get_video_info requests may report different results, e.g. + # some may report video unavailability, but some may serve it without + # any complaint (see https://github.com/rg3/youtube-dl/issues/7362, + # the original webpage as well as el=info and el=embedded get_video_info + # requests report video unavailability due to geo restriction while + # el=detailpage succeeds and returns valid data). This is probably + # due to YouTube measures against IP ranges of hosting providers. + # Working around by preferring the first succeeded video_info containing + # the token if no such video_info yet was found. if 'token' not in video_info: video_info = get_video_info break From f93ded98522cc1272a8d2210738937132292afc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Nov 2015 01:54:49 +0600 Subject: [PATCH 16/38] [prosiebensat1] Add support for .ch domains (Closes #7365) --- youtube_dl/extractor/prosiebensat1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index effcf1db3..baa54a3af 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -20,7 +20,7 @@ from ..utils import ( class ProSiebenSat1IE(InfoExtractor): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' - _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at)|ran\.de|fem\.com)/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P.+)' _TESTS = [ { From b15c44cd36831f175e9dd4081b82beb8075790b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Nov 2015 02:51:30 +0600 Subject: [PATCH 17/38] [periscope] Add support for videos with broadcast_id (Closes #7359) --- youtube_dl/extractor/periscope.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 8ad936758..0f9d7576f 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -27,9 +27,10 @@ class PeriscopeIE(InfoExtractor): 'skip': 'Expires in 24 hours', } - def _call_api(self, method, token): + def _call_api(self, method, value): + attribute = 'token' if len(value) > 13 else 'broadcast_id' return self._download_json( - 'https://api.periscope.tv/api/v2/%s?token=%s' % (method, token), token) + 'https://api.periscope.tv/api/v2/%s?%s=%s' % (method, attribute, value), value) def _real_extract(self, url): token = self._match_id(url) From 2549e113b8750a493917436d4fd15ed74a1a4983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Nov 2015 02:55:53 +0600 Subject: [PATCH 18/38] [periscope] Add test for broadcast_id based URL --- youtube_dl/extractor/periscope.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 0f9d7576f..7621d9e99 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -12,7 +12,7 @@ from ..utils import parse_iso8601 class PeriscopeIE(InfoExtractor): IE_DESC = 'Periscope' _VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P[^/?#]+)' - _TEST = { + _TESTS = [{ 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', 'md5': '65b57957972e503fcbbaeed8f4fa04ca', 'info_dict': { @@ -25,7 +25,10 @@ class PeriscopeIE(InfoExtractor): 'uploader_id': '1465763', }, 'skip': 'Expires in 24 hours', - } + }, { + 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', + 'only_matching': True, + }] def _call_api(self, method, value): attribute = 'token' if len(value) > 13 else 'broadcast_id' From 53472df85793cc89deb779c2ffc3ae1f47292fd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Nov 2015 02:56:44 +0600 Subject: [PATCH 19/38] [periscope] Add note on where to find alive example URLs --- youtube_dl/extractor/periscope.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 7621d9e99..887c8020d 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -12,6 +12,7 @@ from ..utils import parse_iso8601 class PeriscopeIE(InfoExtractor): IE_DESC = 'Periscope' _VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P[^/?#]+)' + # Alive example URLs can be found here http://onperiscope.com/ _TESTS = [{ 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', 'md5': '65b57957972e503fcbbaeed8f4fa04ca', From b3613d36da14ab527166326707c0f911d192144d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 5 Nov 2015 04:37:51 +0600 Subject: [PATCH 20/38] [YoutubeDL] Sanitize path after output template substitution (Closes #7367) --- youtube_dl/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 12977bf80..1783ce01b 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -572,7 +572,7 @@ class YoutubeDL(object): if v is not None) template_dict = collections.defaultdict(lambda: 'NA', template_dict) - outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL)) + outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict # Temporary fix for #4787 @@ -580,7 +580,7 @@ class YoutubeDL(object): # to workaround encoding issues with subprocess on python2 @ Windows if sys.version_info < (3, 0) and sys.platform == 'win32': filename = encodeFilename(filename, True).decode(preferredencoding()) - return filename + return sanitize_path(filename) except ValueError as err: self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') return None From 6953d8e95a78e83f087693b7353baab96b09fbdd Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 6 Nov 2015 02:09:55 +0100 Subject: [PATCH 21/38] [miomio] fix info extraction (fixes #7366) --- youtube_dl/extractor/miomio.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index a784fc5fb..3f812e005 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import random from .common import InfoExtractor +from ..compat import compat_urllib_request from ..utils import ( xpath_text, int_or_none, @@ -60,10 +61,12 @@ class MioMioIE(InfoExtractor): 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)), video_id) - # the following xml contains the actual configuration information on the video file(s) - vid_config = self._download_xml( + vid_config_request = compat_urllib_request.Request( 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config), - video_id) + headers={'Referer': 'http://www.miomio.tv/mioplayer/mioplayer-v3.0.swf'}) + + # the following xml contains the actual configuration information on the video file(s) + vid_config = self._download_xml(vid_config_request, video_id) http_headers = { 'Referer': 'http://www.miomio.tv%s' % mioplayer_path, From e68dd1921ad7528d225a8571066f99b9934b6a06 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 6 Nov 2015 06:33:05 +0100 Subject: [PATCH 22/38] [miomio] use the formats urls headers for downloading xml --- youtube_dl/extractor/miomio.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index 3f812e005..6f40bf1b9 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -52,6 +52,8 @@ class MioMioIE(InfoExtractor): mioplayer_path = self._search_regex( r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path') + http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path,} + xml_config = self._search_regex( r'flashvars="type=(?:sina|video)&(.+?)&', webpage, 'xml config') @@ -63,15 +65,11 @@ class MioMioIE(InfoExtractor): vid_config_request = compat_urllib_request.Request( 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config), - headers={'Referer': 'http://www.miomio.tv/mioplayer/mioplayer-v3.0.swf'}) + headers=http_headers) # the following xml contains the actual configuration information on the video file(s) vid_config = self._download_xml(vid_config_request, video_id) - http_headers = { - 'Referer': 'http://www.miomio.tv%s' % mioplayer_path, - } - if not int_or_none(xpath_text(vid_config, 'timelength')): raise ExtractorError('Unable to load videos!', expected=True) From 5003e4283b35acb82ea9793d91bc3cd1ee679f86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Nov 2015 21:06:44 +0600 Subject: [PATCH 23/38] [ndr] Relax _VALID_URL (Closes #7383) --- youtube_dl/extractor/ndr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index ba06d8a98..a2b51ccb3 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -23,7 +23,7 @@ class NDRBaseIE(InfoExtractor): class NDRIE(NDRBaseIE): IE_NAME = 'ndr' IE_DESC = 'NDR.de - Norddeutscher Rundfunk' - _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P[^/?#]+),[\da-z]+\.html' + _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P[^/?#]+),[\da-z]+\.html' _TESTS = [{ # httpVideo, same content id 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', From 01003d072c20c2ed095930d87c5ce3d5610e66b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Nov 2015 21:07:52 +0600 Subject: [PATCH 24/38] [ndr] Add test for #7383 --- youtube_dl/extractor/ndr.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index a2b51ccb3..0be866681 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -78,6 +78,9 @@ class NDRIE(NDRBaseIE): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html', + 'only_matching': True, }] def _extract_embed(self, webpage, display_id): From 1e2eb4b40d46f39d15c067657ecad16fa3b2121d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Nov 2015 21:08:21 +0600 Subject: [PATCH 25/38] [njoy] Relax _VALID_URL --- youtube_dl/extractor/ndr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 0be866681..7043c7e0f 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -105,7 +105,7 @@ class NDRIE(NDRBaseIE): class NJoyIE(NDRBaseIE): IE_NAME = 'njoy' IE_DESC = 'N-JOY' - _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?:(?P[^/?#]+),)?(?P[\da-z]+)\.html' + _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?:(?P[^/?#]+),)?(?P[\da-z]+)\.html' _TESTS = [{ # httpVideo, same content id 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html', From 81413c01651eddcc5180af379f2ce3689a376051 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Nov 2015 21:08:52 +0600 Subject: [PATCH 26/38] [ndr:embed] Relax _VALID_URL --- youtube_dl/extractor/ndr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 7043c7e0f..477ce4e6b 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -238,7 +238,7 @@ class NDREmbedBaseIE(InfoExtractor): class NDREmbedIE(NDREmbedBaseIE): IE_NAME = 'ndr:embed' - _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P[\da-z]+)-(?:player|externalPlayer)\.html' + _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P[\da-z]+)-(?:player|externalPlayer)\.html' _TESTS = [{ 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9', From 92366d189ef280b8ba0057930c54aa14b0ecdd24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Nov 2015 21:09:17 +0600 Subject: [PATCH 27/38] [njoy:embed] Relax _VALID_URL --- youtube_dl/extractor/ndr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 477ce4e6b..16213eed9 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -332,7 +332,7 @@ class NDREmbedIE(NDREmbedBaseIE): class NJoyEmbedIE(NDREmbedBaseIE): IE_NAME = 'njoy:embed' - _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)+(?P[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' + _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?P[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' _TESTS = [{ # httpVideo 'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html', From deb85c32bbd32e8d280e1919432a11c0bdaa26bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Nov 2015 21:56:31 +0600 Subject: [PATCH 28/38] [postprocessor/ffmpeg] Use ffmpeg as prefix since it's used all over the places (Closes #7371) --- youtube_dl/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 4f320e124..5ed723bc6 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -272,7 +272,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): return [], information try: - self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path) + self._downloader.to_screen('[ffmpeg] Destination: ' + new_path) self.run_ffmpeg(path, new_path, acodec, more_opts) except AudioConversionError as e: raise PostProcessingError( From 179ffab69c3359ab7d0a7b0a2b63c94d8c70af67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Nov 2015 23:06:13 +0600 Subject: [PATCH 29/38] [lynda:course] Force log out (Closes #7361) --- youtube_dl/extractor/lynda.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 5c973e75c..67f2025de 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -82,6 +82,11 @@ class LyndaBaseIE(InfoExtractor): expected=True) raise ExtractorError('Unable to log in') + def _logout(self): + self._download_webpage( + 'http://www.lynda.com/ajax/logout.aspx', None, + 'Logging out', 'Unable to log out', fatal=False) + class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' @@ -210,6 +215,8 @@ class LyndaCourseIE(LyndaBaseIE): course_id, 'Downloading course JSON') course_json = json.loads(page) + self._logout() + if 'Status' in course_json and course_json['Status'] == 'NotFound': raise ExtractorError( 'Course %s does not exist' % course_id, expected=True) From 71bb016160744a80fecaadf5b75b0dc2b1e8089b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Nov 2015 23:10:07 +0600 Subject: [PATCH 30/38] [lynda:course] Modernize and make more robust --- youtube_dl/extractor/lynda.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 67f2025de..98474ded9 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -210,14 +210,13 @@ class LyndaCourseIE(LyndaBaseIE): course_path = mobj.group('coursepath') course_id = mobj.group('courseid') - page = self._download_webpage( + course = self._download_json( 'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, course_id, 'Downloading course JSON') - course_json = json.loads(page) self._logout() - if 'Status' in course_json and course_json['Status'] == 'NotFound': + if course.get('Status') == 'NotFound': raise ExtractorError( 'Course %s does not exist' % course_id, expected=True) @@ -227,12 +226,14 @@ class LyndaCourseIE(LyndaBaseIE): # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided # by single video API anymore - for chapter in course_json['Chapters']: - for video in chapter['Videos']: - if video['HasAccess'] is False: + for chapter in course['Chapters']: + for video in chapter.get('Videos', []): + if video.get('HasAccess') is False: unaccessible_videos += 1 continue - videos.append(video['ID']) + video_id = video.get('ID') + if video_id: + videos.append(video_id) if unaccessible_videos > 0: self._downloader.report_warning( @@ -245,6 +246,6 @@ class LyndaCourseIE(LyndaBaseIE): 'Lynda') for video_id in videos] - course_title = course_json['Title'] + course_title = course.get('Title') return self.playlist_result(entries, course_id, course_title) From ea8ed40b2fb70fc2f01aba475128821078873d46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Nov 2015 23:24:39 +0600 Subject: [PATCH 31/38] [lynda] Modernize and make more robust --- youtube_dl/extractor/lynda.py | 52 ++++++++++++++++------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 98474ded9..c8a16842e 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -113,51 +113,47 @@ class LyndaIE(LyndaBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - page = self._download_webpage( + video = self._download_json( 'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id, 'Downloading video JSON') - video_json = json.loads(page) - if 'Status' in video_json: + if 'Status' in video: raise ExtractorError( - 'lynda returned error: %s' % video_json['Message'], expected=True) + 'lynda returned error: %s' % video['Message'], expected=True) - if video_json['HasAccess'] is False: + if video.get('HasAccess') is False: self.raise_login_required('Video %s is only available for members' % video_id) - video_id = compat_str(video_json['ID']) - duration = video_json['DurationInSeconds'] - title = video_json['Title'] + video_id = compat_str(video.get('ID') or video_id) + duration = int_or_none(video.get('DurationInSeconds')) + title = video['Title'] formats = [] - fmts = video_json.get('Formats') + fmts = video.get('Formats') if fmts: - formats.extend([ - { - 'url': fmt['Url'], - 'ext': fmt['Extension'], - 'width': fmt['Width'], - 'height': fmt['Height'], - 'filesize': fmt['FileSize'], - 'format_id': str(fmt['Resolution']) - } for fmt in fmts]) + formats.extend([{ + 'url': f['Url'], + 'ext': f.get('Extension'), + 'width': int_or_none(f.get('Width')), + 'height': int_or_none(f.get('Height')), + 'filesize': int_or_none(f.get('FileSize')), + 'format_id': compat_str(f.get('Resolution')) if f.get('Resolution') else None, + } for f in fmts if f.get('Url')]) - prioritized_streams = video_json.get('PrioritizedStreams') + prioritized_streams = video.get('PrioritizedStreams') if prioritized_streams: for prioritized_stream_id, prioritized_stream in prioritized_streams.items(): - formats.extend([ - { - 'url': video_url, - 'width': int_or_none(format_id), - 'format_id': '%s-%s' % (prioritized_stream_id, format_id), - } for format_id, video_url in prioritized_stream.items() - ]) + formats.extend([{ + 'url': video_url, + 'width': int_or_none(format_id), + 'format_id': '%s-%s' % (prioritized_stream_id, format_id), + } for format_id, video_url in prioritized_stream.items()]) self._check_formats(formats, video_id) self._sort_formats(formats) - subtitles = self.extract_subtitles(video_id, page) + subtitles = self.extract_subtitles(video_id) return { 'id': video_id, @@ -188,7 +184,7 @@ class LyndaIE(LyndaBaseIE): if srt: return srt - def _get_subtitles(self, video_id, webpage): + def _get_subtitles(self, video_id): url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id subs = self._download_json(url, None, False) if subs: From ae4ddf9efae816f4d52fc584c93e4f0e3c79c410 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Nov 2015 23:27:38 +0600 Subject: [PATCH 32/38] [lynda] PEP 8 --- youtube_dl/extractor/lynda.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index c8a16842e..9a207b2cd 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -227,9 +227,8 @@ class LyndaCourseIE(LyndaBaseIE): if video.get('HasAccess') is False: unaccessible_videos += 1 continue - video_id = video.get('ID') - if video_id: - videos.append(video_id) + if video.get('ID'): + videos.append(video['ID']) if unaccessible_videos > 0: self._downloader.report_warning( From 472404953a22811cc8156da110ea872a924f1f18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Nov 2015 23:28:14 +0600 Subject: [PATCH 33/38] [miomio] PEP 8 --- youtube_dl/extractor/miomio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index 6f40bf1b9..ce391c759 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -52,7 +52,7 @@ class MioMioIE(InfoExtractor): mioplayer_path = self._search_regex( r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path') - http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path,} + http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path} xml_config = self._search_regex( r'flashvars="type=(?:sina|video)&(.+?)&', From 0fa6b17dccd2347cb0611651fc04e36839d33a4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 Nov 2015 23:45:26 +0600 Subject: [PATCH 34/38] [pbs] Simplify and speed up player URL search --- youtube_dl/extractor/pbs.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 3448736a2..7b868d057 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -191,9 +191,13 @@ class PBSIE(InfoExtractor): if media_id: return media_id, presumptive_id, upload_date - url = self._search_regex( - r'(?s)]+?(?:[a-z-]+?=["\'].*?["\'][^>]+?)*?\bsrc=["\']([^\'"]+partnerplayer[^\'"]+)["\']', - webpage, 'player URL') + for iframe in re.findall(r'(?s)', webpage): + url = self._search_regex( + r'src=(["\'])(?P.+?partnerplayer.+?)\1', iframe, + 'player URL', default=None, group='url') + if url: + break + mobj = re.match(self._VALID_URL, url) player_id = mobj.group('player_id') From 686f98816ecbbcb224d1336682688b05cdb051a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 7 Nov 2015 00:39:16 +0600 Subject: [PATCH 35/38] [pbs] Add support for flp frontlines (Closes #7369) --- youtube_dl/extractor/pbs.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 7b868d057..3169e9c3f 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -8,6 +8,7 @@ from ..utils import ( ExtractorError, determine_ext, int_or_none, + strip_jsonp, unified_strdate, US_RATINGS, ) @@ -191,6 +192,23 @@ class PBSIE(InfoExtractor): if media_id: return media_id, presumptive_id, upload_date + # Fronline video embedded via flp + video_id = self._search_regex( + r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid') + if video_id: + # pkg_id calculation is reverse engineered from + # http://www.pbs.org/wgbh/pages/frontline/js/flp2012.js + prg_id = self._search_regex( + r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid')[7:] + if 'q' in prg_id: + prg_id = prg_id.split('q')[1] + prg_id = int(prg_id, 16) + getdir = self._download_json( + 'http://www.pbs.org/wgbh/pages/frontline/.json/getdir/getdir%d.json' % prg_id, + presumptive_id, 'Downloading getdir JSON', + transform_source=strip_jsonp) + return getdir['mid'], presumptive_id, upload_date + for iframe in re.findall(r'(?s)', webpage): url = self._search_regex( r'src=(["\'])(?P.+?partnerplayer.+?)\1', iframe, From 8b6d9406db1d3361b006016e6aace54b05cb6fea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 7 Nov 2015 00:42:30 +0600 Subject: [PATCH 36/38] [pbs] Add test for flp frontline embeds --- youtube_dl/extractor/pbs.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 3169e9c3f..a690f9c29 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -154,6 +154,22 @@ class PBSIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, + { + # Frontline video embedded via flp2012.js + 'url': 'http://www.pbs.org/wgbh/pages/frontline/the-atomic-artists', + 'info_dict': { + 'id': '2070868960', + 'display_id': 'the-atomic-artists', + 'ext': 'mp4', + 'title': 'FRONTLINE - The Atomic Artists', + 'description': 'md5:f5bfbefadf421e8bb8647602011caf8e', + 'duration': 723, + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, # requires ffmpeg + }, } ] _ERRORS = { From 21d0c33ecde573db961b97f5f0c37ba9d3c02ff3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 7 Nov 2015 01:08:40 +0600 Subject: [PATCH 37/38] [pbs] Make flp embed lookup non fatal --- youtube_dl/extractor/pbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index a690f9c29..8fb9b1849 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -210,7 +210,7 @@ class PBSIE(InfoExtractor): # Fronline video embedded via flp video_id = self._search_regex( - r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid') + r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None) if video_id: # pkg_id calculation is reverse engineered from # http://www.pbs.org/wgbh/pages/frontline/js/flp2012.js From ee223abb88263bdda2d92c4b2139d1dca60ba3ae Mon Sep 17 00:00:00 2001 From: Mister Hat Date: Tue, 3 Nov 2015 19:13:27 -0600 Subject: [PATCH 38/38] [vidzi] fixed. finds url from hash and host in script Closes #7386. --- youtube_dl/extractor/vidzi.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py index 08a5a7b8d..2ba9f31df 100644 --- a/youtube_dl/extractor/vidzi.py +++ b/youtube_dl/extractor/vidzi.py @@ -20,8 +20,14 @@ class VidziIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_url = self._html_search_regex( - r'{\s*file\s*:\s*"([^"]+)"\s*}', webpage, 'video url') + video_host = self._html_search_regex( + r'id=\'vplayer\'>(.*?)', webpage, 'title')