From f051742c3b0834b01c8daf3becead85a8a7f27a2 Mon Sep 17 00:00:00 2001 From: Urgau Date: Sun, 3 Jun 2018 10:48:24 +0200 Subject: [PATCH 1/7] [PBS] Fix AttributeError: 'NoneType' This is a fix for #15373 --- youtube_dl/extractor/pbs.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index a28ee17ca..22f7736e6 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -455,7 +455,9 @@ class PBSIE(InfoExtractor): if not url: url = self._og_search_url(webpage) - + + if url.strip().startswith("//"): + url = "https:" + url.strip() mobj = re.match(self._VALID_URL, url) player_id = mobj.group('player_id') @@ -465,8 +467,13 @@ class PBSIE(InfoExtractor): player_page = self._download_webpage( url, display_id, note='Downloading player page', errnote='Could not download player page') - video_id = self._search_regex( - r' Date: Sun, 3 Jun 2018 11:24:41 +0200 Subject: [PATCH 2/7] Improve code convention --- youtube_dl/extractor/pbs.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 22f7736e6..1f047fbe2 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -456,8 +456,7 @@ class PBSIE(InfoExtractor): if not url: url = self._og_search_url(webpage) - if url.strip().startswith("//"): - url = "https:" + url.strip() + url = self._proto_relative_url(url.strip()) mobj = re.match(self._VALID_URL, url) player_id = mobj.group('player_id') @@ -467,13 +466,8 @@ class PBSIE(InfoExtractor): player_page = self._download_webpage( url, display_id, note='Downloading player page', errnote='Could not download player page') - - try: - video_id = self._search_regex( - r' Date: Sun, 3 Jun 2018 14:08:09 +0200 Subject: [PATCH 3/7] Remove unnecessary [] in regex --- youtube_dl/extractor/pbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 1f047fbe2..765432fad 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -467,7 +467,7 @@ class PBSIE(InfoExtractor): url, display_id, note='Downloading player page', errnote='Could not download player page') video_id = self._search_regex( - [r' Date: Mon, 4 Jun 2018 21:53:07 +0200 Subject: [PATCH 4/7] Add test --- youtube_dl/extractor/pbs.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 765432fad..fbd634365 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -360,6 +360,21 @@ class PBSIE(InfoExtractor): 'skip_download': True, }, }, + { + 'url': 'https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/', + 'info_dict': { + 'id': '3007193718', + 'ext': 'mp4', + 'title': 'Victoria - A Soldier\'s Daughter / The Green-Eyed Monster', + 'description': 'md5:37efbac85e0c09b009586523ec143652', + 'duration': 6292, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['HTTP Error 403: Forbidden'], + }, { 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', 'only_matching': True, From 3f4fda53dbdcea1cb89b78beb3915a64b75db2c6 Mon Sep 17 00:00:00 2001 From: Urgau Date: Thu, 7 Jun 2018 19:14:36 +0200 Subject: [PATCH 5/7] Oups fix merging. --- youtube_dl/extractor/pbs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 454ae605c..992603bcd 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -374,6 +374,7 @@ class PBSIE(InfoExtractor): }, 'expected_warnings': ['HTTP Error 403: Forbidden'], }, + { 'url': 'http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/', 'info_dict': { 'id': '2365936247', From dbe8b9f5ca616509f6bb150abd738e4fccb5f1cf Mon Sep 17 00:00:00 2001 From: Urgau Date: Mon, 11 Jun 2018 15:14:25 +0200 Subject: [PATCH 6/7] Add test for the second id extractor --- youtube_dl/extractor/pbs.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 992603bcd..748ac9470 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -389,6 +389,21 @@ class PBSIE(InfoExtractor): }, 'expected_warnings': ['HTTP Error 403: Forbidden'], }, + { + # Test for the second id extractor + 'url': 'https://player.pbs.org/partnerplayer/tOz9tM5ljOXQqIIWke53UA==/', + 'info_dict': { + 'id': '3011407934', + 'ext': 'mp4', + 'title': 'Stories from the Stage - Road Trip', + 'duration': 1619, + 'thumbnail': r're:^https?://.*\.JPG$', + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['HTTP Error 403: Forbidden'], + }, { 'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true', 'only_matching': True, From 7f6fd812a2b0e54ce762b6014b2ec6890ce44e93 Mon Sep 17 00:00:00 2001 From: Urgau Date: Tue, 12 Jun 2018 21:20:22 +0200 Subject: [PATCH 7/7] Remove regex --- youtube_dl/extractor/pbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 748ac9470..9f737e9d2 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -512,7 +512,7 @@ class PBSIE(InfoExtractor): url, display_id, note='Downloading player page', errnote='Could not download player page') video_id = self._search_regex( - [r'