From d53104f9230d0e2cf7ec00ef2afd1813d33e08bc Mon Sep 17 00:00:00 2001 From: Jan Hoek Date: Mon, 3 Apr 2017 17:49:50 +0200 Subject: [PATCH 1/4] Extractor for npo.nl programs (as opposed to episodes of programs). Retrieves only the most recent episodes of the program in question (hence the name). Some programs have so many episodes available that it doesn't make any practical sense to retrieve all, as discussed in issue #7947. --- testnporecent.ps1 | 43 +++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/npo.py | 74 ++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 testnporecent.ps1 diff --git a/testnporecent.ps1 b/testnporecent.ps1 new file mode 100644 index 000000000..f95778abd --- /dev/null +++ b/testnporecent.ps1 @@ -0,0 +1,43 @@ +Describe 'Flake8' { + It 'Does not return any errors' { + & flake8 /Users/jhoek/GitHub/youtube-dl/youtube_dl/extractor/npo.py | Should BeNullOrEmpty + } +} + +Describe 'Tests' { + It 'Should work in Python 2.6' { + & 'python2.6' '--version' 2>&1 | Should Be 'Python 2.6.9' + + '', '_1', '_2' | ForEach-Object { + & 'python2.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } + + It 'Should work in Python 2.7' { + & python '--version' 2>&1 | Should Be 'Python 2.7.13' + + '', '_1', '_2' | ForEach-Object { + & python /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } + + It 'Should work in Python 3.5' { + & python3 '--version' | Should Be 'Python 3.5.2' + + '', '_1', '_2' | ForEach-Object { + & python3 /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } + + It 'Should work in Python 3.6' { + & python3.6 '--version' | Should Be 'Python 3.6.1' + + '', '_1', '_2' | ForEach-Object { + & 'python3.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } +} diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 13ca1d2cd..36ff44103 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -686,6 +686,7 @@ from .npo import ( NPORadioFragmentIE, SchoolTVIE, HetKlokhuisIE, + NPORecentsIE, VPROIE, WNLIE, ) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 38fefe492..408bbc36d 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -478,6 +478,80 @@ class HetKlokhuisIE(NPODataMidEmbedIE): } +class NPORecentsIE(NPOIE): + IE_Name = 'npo:recents' + npo12_regex = r"""
\s*
\s*\s*(
)?""" + npo3_regex = r"""
\s*\s*
\s*
\s*
\s*
.*?
\s*
\s*
\s*(
)?""" + _VALID_URL = r'(?:https?://)?(?:www\.)?npo\.nl/(?P[^/]+)/(?P\w+_\d+)' + _TESTS = [{ + # Example of an npo3 program + 'url': 'https://www.npo.nl/keuringsdienst-van-waarde/KN_1678993', + 'info_dict': { + 'title': 'Keuringsdienst van Waarde', + 'id': 'KN_1678993', + 'description': 'md5:5ffaf131f175d8a771e7a7884833dad2' + }, + 'playlist_mincount': 8 + }, { + # Example of an npo1/npo2 program + 'url': 'https://www.npo.nl/jinek/KN_1676589', + 'info_dict': { + 'title': 'Jinek', + 'id': 'KN_1676589', + 'description': 'md5:6998986899b4903395f0cdd0670cedaf' + }, + 'playlist_mincount': 8 + }, { + # Example of a program for which there will be only one available episode (if any) + 'url': 'https://www.npo.nl/midsomer-murders/POW_00828660', + 'info_dict': { + 'title': 'Midsomer murders', + 'id': 'POW_00828660', + 'description': 'md5:a8b6e9d3e3bd367be88766e3ce8e8362' + }, + 'playlist_maxcount': 1 + }] + + def _extract_entries(self, webpage, program_id, program_url): + is_npo3 = 'www-assets.npo.nl/uploads/tv_channel/265/logo/smaller_npo3-logo.png' in webpage + + if is_npo3: + episodes_url = '%s//search?category=broadcasts&page=1' % program_url + regex = self.npo3_regex + else: + episodes_url = '%s/search?media_type=broadcast&start=0&rows=8' % program_url + regex = self.npo12_regex + + episodes = self._download_webpage(episodes_url, program_id, note='Retrieving episodes') + + for match in re.finditer(regex, episodes): + url = match.group(1) + available = match.group(2) is None + + if available: + yield self.url_result( + url='http://npo.nl%s' % url, + video_title=self._og_search_title(webpage)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + alt_id = mobj.group('alt_id') + program_id = mobj.group('program_id') + webpage = self._download_webpage(url, program_id) + title = self._og_search_title(webpage, fatal=False) or alt_id + description = self._og_search_description(webpage) or self._html_search_meta('description', webpage, 'description', fatal=False) + entries = self._extract_entries(webpage, program_id, url) + + return { + '_type': 'playlist', + 'id': program_id, + 'display_id': alt_id, + 'title': title, + 'description': description, + 'entries': entries + } + + class NPOPlaylistBaseIE(NPOIE): def _real_extract(self, url): playlist_id = self._match_id(url) From 905c54d4696ca6bf4b92cc5a8d9ee2072161560e Mon Sep 17 00:00:00 2001 From: Jan Hoek Date: Mon, 3 Apr 2017 19:59:22 +0200 Subject: [PATCH 2/4] Removed PowerShell test script --- testnporecent.ps1 | 43 ------------------------------------------- 1 file changed, 43 deletions(-) delete mode 100644 testnporecent.ps1 diff --git a/testnporecent.ps1 b/testnporecent.ps1 deleted file mode 100644 index f95778abd..000000000 --- a/testnporecent.ps1 +++ /dev/null @@ -1,43 +0,0 @@ -Describe 'Flake8' { - It 'Does not return any errors' { - & flake8 /Users/jhoek/GitHub/youtube-dl/youtube_dl/extractor/npo.py | Should BeNullOrEmpty - } -} - -Describe 'Tests' { - It 'Should work in Python 2.6' { - & 'python2.6' '--version' 2>&1 | Should Be 'Python 2.6.9' - - '', '_1', '_2' | ForEach-Object { - & 'python2.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 - $LASTEXITCODE | Should Be 0 - } - } - - It 'Should work in Python 2.7' { - & python '--version' 2>&1 | Should Be 'Python 2.7.13' - - '', '_1', '_2' | ForEach-Object { - & python /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 - $LASTEXITCODE | Should Be 0 - } - } - - It 'Should work in Python 3.5' { - & python3 '--version' | Should Be 'Python 3.5.2' - - '', '_1', '_2' | ForEach-Object { - & python3 /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 - $LASTEXITCODE | Should Be 0 - } - } - - It 'Should work in Python 3.6' { - & python3.6 '--version' | Should Be 'Python 3.6.1' - - '', '_1', '_2' | ForEach-Object { - & 'python3.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 - $LASTEXITCODE | Should Be 0 - } - } -} From 7bf58f999dac8175f6fc91422fe94e80c626191e Mon Sep 17 00:00:00 2001 From: Jan Hoek Date: Tue, 4 Apr 2017 16:30:20 +0200 Subject: [PATCH 3/4] Changed IE_NAME to hopefully prevent duplicates --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 408bbc36d..f5f3a1328 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -479,7 +479,7 @@ class HetKlokhuisIE(NPODataMidEmbedIE): class NPORecentsIE(NPOIE): - IE_Name = 'npo:recents' + IE_Name = 'npo.nl:recents' npo12_regex = r"""