From 0b1eaec3bc7f974fba4dae742516153e2f9b7562 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Jun 2020 01:35:09 +0700 Subject: [PATCH 1/7] [tele5] Prefer jwplatform over nexx (closes #25533) --- youtube_dl/extractor/tele5.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py index 364556a1f..c209eb04f 100644 --- a/youtube_dl/extractor/tele5.py +++ b/youtube_dl/extractor/tele5.py @@ -6,14 +6,8 @@ import re from .common import InfoExtractor from .jwplatform import JWPlatformIE from .nexx import NexxIE -from ..compat import ( - compat_str, - compat_urlparse, -) -from ..utils import ( - NO_DEFAULT, - try_get, -) +from ..compat import compat_urlparse +from ..utils import NO_DEFAULT class Tele5IE(InfoExtractor): @@ -30,6 +24,21 @@ class Tele5IE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # jwplatform, nexx unavailable + 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/', + 'info_dict': { + 'id': 'WJuiOlUp', + 'ext': 'mp4', + 'upload_date': '20200603', + 'timestamp': 1591214400, + 'title': 'Ghoul - Das Geheimnis des Friedhofmonsters', + 'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [JWPlatformIE.ie_key()], }, { 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191', 'only_matching': True, @@ -88,15 +97,6 @@ class Tele5IE(InfoExtractor): if not jwplatform_id: jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id') - media = self._download_json( - 'https://cdn.jwplayer.com/v2/media/' + jwplatform_id, - display_id) - nexx_id = try_get( - media, lambda x: x['playlist'][0]['nexx_id'], compat_str) - - if nexx_id: - return nexx_result(nexx_id) - return self.url_result( 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(), video_id=jwplatform_id) From b77888228d605a73f85d367845cf50609b855b62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Jun 2020 01:44:36 +0700 Subject: [PATCH 2/7] [jwplatform] Add support for bypass geo restriction --- youtube_dl/extractor/jwplatform.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index dfa07e423..c34b5f5e6 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import unsmuggle_url class JWPlatformIE(InfoExtractor): @@ -36,6 +37,10 @@ class JWPlatformIE(InfoExtractor): webpage) def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass({ + 'countries': smuggled_data.get('geo_countries'), + }) video_id = self._match_id(url) json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id) return self._parse_jwplayer_data(json_data, video_id) From a5b6102ea893d6943f9ffa9fc0677229c56c99ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Jun 2020 01:45:05 +0700 Subject: [PATCH 3/7] [tele5] Bypass geo restriction --- youtube_dl/extractor/tele5.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py index c209eb04f..3e1a7a9e6 100644 --- a/youtube_dl/extractor/tele5.py +++ b/youtube_dl/extractor/tele5.py @@ -7,11 +7,15 @@ from .common import InfoExtractor from .jwplatform import JWPlatformIE from .nexx import NexxIE from ..compat import compat_urlparse -from ..utils import NO_DEFAULT +from ..utils import ( + NO_DEFAULT, + smuggle_url, +) class Tele5IE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P[^/?#&]+)' + _GEO_COUNTRIES = ['DE'] _TESTS = [{ 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416', 'info_dict': { @@ -98,5 +102,7 @@ class Tele5IE(InfoExtractor): jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id') return self.url_result( - 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(), - video_id=jwplatform_id) + smuggle_url( + 'jwplatform:%s' % jwplatform_id, + {'geo_countries': self._GEO_COUNTRIES}), + ie=JWPlatformIE.ie_key(), video_id=jwplatform_id) From 607d204551aa0def292383c2870fba2afca096da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Jun 2020 01:49:27 +0700 Subject: [PATCH 4/7] [ChangeLog] Actualize [ci skip] --- ChangeLog | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/ChangeLog b/ChangeLog index c13035c89..03b05ca28 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +version + +Extractors +* [tele5] Bypass geo restriction ++ [jwplatform] Add support for bypass geo restriction +* [tele5] Prefer jwplatform over nexx (#25533) +* [twitch:stream] Expect 400 and 410 HTTP errors from API +* [twitch:stream] Fix extraction (#25528) +* [twitch] Fix thumbnails extraction (#25531) ++ [twitch] Pass v5 Accept HTTP header (#25531) +* [brightcove] Fix subtitles extraction (#25540) ++ [malltv] Add support for sk.mall.tv (#25445) +* [periscope] Fix untitled broadcasts (#25482) +* [jwplatform] Improve embeds extraction (#25467) + + version 2020.05.29 Core From e1723c4bac4e465991789b5a29beb946d872f508 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Jun 2020 01:51:39 +0700 Subject: [PATCH 5/7] release 2020.06.06 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 09bf763cd..3fe1b1a33 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running youtube-dl version **2020.05.29** +- [ ] I've verified that I'm running youtube-dl version **2020.06.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.05.29 + [debug] youtube-dl version 2020.06.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index dc9b67cc8..e9f4b880c 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -19,7 +19,7 @@ labels: 'site-support-request' - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running youtube-dl version **2020.05.29** +- [ ] I've verified that I'm running youtube-dl version **2020.06.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've searched the bugtracker for similar site support requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 129ca0a02..bbd34ecab 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -18,13 +18,13 @@ title: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running youtube-dl version **2020.05.29** +- [ ] I've verified that I'm running youtube-dl version **2020.06.06** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 40e53bcae..4299474fa 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -18,7 +18,7 @@ title: '' - [ ] I'm reporting a broken site support issue -- [ ] I've verified that I'm running youtube-dl version **2020.05.29** +- [ ] I've verified that I'm running youtube-dl version **2020.06.06** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones @@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v < [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] youtube-dl version 2020.05.29 + [debug] youtube-dl version 2020.06.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index 619a45f19..c9ccc7010 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -19,13 +19,13 @@ labels: 'request' - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running youtube-dl version **2020.05.29** +- [ ] I've verified that I'm running youtube-dl version **2020.06.06** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/ChangeLog b/ChangeLog index 03b05ca28..f439f29e0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2020.06.06 Extractors * [tele5] Bypass geo restriction diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 966fb3aa9..30f31f888 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.05.29' +__version__ = '2020.06.06' From 562de77f41d0c08df9dbb08cfa86ba6c7d239c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Jun 2020 02:14:35 +0700 Subject: [PATCH 6/7] [kaltura] Add support for multiple embeds on a webpage (closes #25523) --- youtube_dl/extractor/generic.py | 18 +++++++++++++++--- youtube_dl/extractor/kaltura.py | 19 +++++++++++++------ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ce8252f6a..355067a50 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1708,6 +1708,15 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, + { + # multiple kaltura embeds, nsfw + 'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html', + 'info_dict': { + 'id': 'kamila-avec-video-jaime-sadomie', + 'title': "Kamila avec vídeo “J'aime sadomie”", + }, + 'playlist_count': 8, + }, { # Non-standard Vimeo embed 'url': 'https://openclassrooms.com/courses/understanding-the-web', @@ -2844,9 +2853,12 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'Zapiks') # Look for Kaltura embeds - kaltura_url = KalturaIE._extract_url(webpage) - if kaltura_url: - return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) + kaltura_urls = KalturaIE._extract_urls(webpage) + if kaltura_urls: + return self.playlist_from_matches( + kaltura_urls, video_id, video_title, + getter=lambda x: smuggle_url(x, {'source_url': url}), + ie=KalturaIE.ie_key()) # Look for EaglePlatform embeds eagleplatform_url = EaglePlatformIE._extract_url(webpage) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 2d38b758b..49d13460d 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -113,9 +113,14 @@ class KalturaIE(InfoExtractor): @staticmethod def _extract_url(webpage): + urls = KalturaIE._extract_urls(webpage) + return urls[0] if urls else None + + @staticmethod + def _extract_urls(webpage): # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site - mobj = ( - re.search( + finditer = ( + re.finditer( r"""(?xs) kWidget\.(?:thumb)?[Ee]mbed\( \{.*? @@ -124,7 +129,7 @@ class KalturaIE(InfoExtractor): (?P['"])entry_?[Ii]d(?P=q3)\s*:\s* (?P['"])(?P(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) """, webpage) - or re.search( + or re.finditer( r'''(?xs) (?P["']) (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P\d+)(?:(?!(?P=q1)).)* @@ -138,7 +143,7 @@ class KalturaIE(InfoExtractor): ) (?P["'])(?P(?:(?!(?P=q3)).)+)(?P=q3) ''', webpage) - or re.search( + or re.finditer( r'''(?xs) <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P["']) (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P\d+) @@ -148,7 +153,8 @@ class KalturaIE(InfoExtractor): (?P=q1) ''', webpage) ) - if mobj: + urls = [] + for mobj in finditer: embed_info = mobj.groupdict() for k, v in embed_info.items(): if v: @@ -160,7 +166,8 @@ class KalturaIE(InfoExtractor): webpage) if service_mobj: url = smuggle_url(url, {'service_url': service_mobj.group('id')}) - return url + urls.append(url) + return urls def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): params = actions[0] From 84213ea8d41d5fe1608333a16ac578dccdf9a915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 6 Jun 2020 04:16:31 +0700 Subject: [PATCH 7/7] [youtube] Extract chapters from JSON (closes #24819) --- test/test_youtube_chapters.py | 2 +- youtube_dl/extractor/youtube.py | 63 +++++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/test/test_youtube_chapters.py b/test/test_youtube_chapters.py index 324ca8525..e69c57377 100644 --- a/test/test_youtube_chapters.py +++ b/test/test_youtube_chapters.py @@ -267,7 +267,7 @@ class TestYoutubeChapters(unittest.TestCase): for description, duration, expected_chapters in self._TEST_CASES: ie = YoutubeIE() expect_value( - self, ie._extract_chapters(description, duration), + self, ie._extract_chapters_from_description(description, duration), expected_chapters, None) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index fec17987b..54ec76db5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1652,8 +1652,63 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_id = mobj.group(2) return video_id + def _extract_chapters_from_json(self, webpage, video_id, duration): + if not webpage: + return + player = self._parse_json( + self._search_regex( + r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage, + 'player args', default='{}'), + video_id, fatal=False) + if not player or not isinstance(player, dict): + return + watch_next_response = player.get('watch_next_response') + if not isinstance(watch_next_response, compat_str): + return + response = self._parse_json(watch_next_response, video_id, fatal=False) + if not response or not isinstance(response, dict): + return + chapters_list = try_get( + response, + lambda x: x['playerOverlays'] + ['playerOverlayRenderer'] + ['decoratedPlayerBarRenderer'] + ['decoratedPlayerBarRenderer'] + ['playerBar'] + ['chapteredPlayerBarRenderer'] + ['chapters'], + list) + if not chapters_list: + return + + def chapter_time(chapter): + return float_or_none( + try_get( + chapter, + lambda x: x['chapterRenderer']['timeRangeStartMillis'], + int), + scale=1000) + chapters = [] + for next_num, chapter in enumerate(chapters_list, start=1): + start_time = chapter_time(chapter) + if start_time is None: + continue + end_time = (chapter_time(chapters_list[next_num]) + if next_num < len(chapters_list) else duration) + if end_time is None: + continue + title = try_get( + chapter, lambda x: x['chapterRenderer']['title']['simpleText'], + compat_str) + chapters.append({ + 'start_time': start_time, + 'end_time': end_time, + 'title': title, + }) + return chapters + @staticmethod - def _extract_chapters(description, duration): + def _extract_chapters_from_description(description, duration): if not description: return None chapter_lines = re.findall( @@ -1687,6 +1742,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }) return chapters + def _extract_chapters(self, webpage, description, video_id, duration): + return (self._extract_chapters_from_json(webpage, video_id, duration) + or self._extract_chapters_from_description(description, duration)) + def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -2324,7 +2383,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): errnote='Unable to download video annotations', fatal=False, data=urlencode_postdata({xsrf_field_name: xsrf_token})) - chapters = self._extract_chapters(description_original, video_duration) + chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration) # Look for the DASH manifest if self._downloader.params.get('youtube_include_dash_manifest', True):