From 503acf8c875cacecc544aa6a3ed3c2d99ef08318 Mon Sep 17 00:00:00 2001 From: Jan Hoek Date: Tue, 21 Mar 2017 17:29:25 +0100 Subject: [PATCH 01/10] Add npo:recents extractor Extractor for npo.nl programs. Retrieves only recent episodes of the program in question (hence the name...). Some programs have so many episodes available that it doesn't make any practical sense to retrieve all. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nporecents.py | 60 ++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 youtube_dl/extractor/nporecents.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 40a5c9842..41669cf07 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -686,6 +686,7 @@ from .npo import ( VPROIE, WNLIE, ) +from .nporecents import NPORecentsIE from .npr import NprIE from .nrk import ( NRKIE, diff --git a/youtube_dl/extractor/nporecents.py b/youtube_dl/extractor/nporecents.py new file mode 100644 index 000000000..77d67c419 --- /dev/null +++ b/youtube_dl/extractor/nporecents.py @@ -0,0 +1,60 @@ +#!/usr/bin/python +from .common import InfoExtractor + +import re +import xml.etree.ElementTree as ET + + +class NPORecentsIE(InfoExtractor): + IE_Name = 'npo:recents' + _VALID_URL = r'(?:https?://)?(?:www\.)?npo\.nl/(?P[^/]+)/(?P\w+_\d+)' + _TEST = { + 'url': 'https://www.npo.nl/keuringsdienst-van-waarde/KN_1678993', + 'info_dict': { + 'title': 'Keuringsdienst van Waarde', + 'id': 'KN_1678993', + 'description': 'In dit programma staat centraal wat fabrikanten ons als consumenten vertellen. Klopt het wat ze claimen en wat ze ons in reclames verkopen? Verslaggevers Teun van de Keuken, Sofie van den Enk, Daan Nieber, Ersin Kiris, Marijn Frank en Maarten Remmers nemen de telefoon ter hand en bellen er actief op los. Ze stellen simpele vragen en krijgen de meest verbazingwekkende antwoorden op food, non-food en nieuwsgerelateerde kwesties. Prikkelend, onderzoekend en vasthoudend. Keuringsdienst van Waarde: simpele vragen,verbazingwekkende antwoorden.' + }, + 'playlist_mincount': 8 + } + + def _extract_entries(self, webpage, program_id, program_url): + is_npo3 = 'www-assets.npo.nl/uploads/tv_channel/265/logo/smaller_npo3-logo.png' in webpage + + if is_npo3: + episodes_url = '{}//search?category=broadcasts&page=1'.format( + program_url) + else: + episodes_url = '{}/search?media_type=broadcast&start=0&rows=8'.format( + program_url) + + episodes = self._download_webpage( + episodes_url, program_id, note='Retrieving episodes') + tree = ET.fromstring(episodes.encode('utf-8')) + for element in tree.findall('.//div'): + if 'span4' in element.get('class'): + hyperlink = element.find('.//a') + inactive = hyperlink.find( + './div[@class="program-not-available"]') + if inactive is None: + yield self.url_result( + url='http://npo.nl{}'.format(hyperlink.get('href')), + video_title=self._og_search_title(webpage)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + alt_id = mobj.group('alt_id') + program_id = mobj.group('program_id') + webpage = self._download_webpage(url, program_id) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + entries = self._extract_entries(webpage, program_id, url) + + return { + '_type': 'playlist', + 'id': program_id, + 'display_id': alt_id, + 'title': title, + 'description': description, + 'entries': entries + } From f106284a5ee5b89249fdf87f1da6a0558371d16e Mon Sep 17 00:00:00 2001 From: Jan Hoek Date: Sat, 25 Mar 2017 20:17:55 +0100 Subject: [PATCH 02/10] Made corrections after review; merged into npo.py --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/npo.py | 83 ++++++++++++++++++++++++++++++ youtube_dl/extractor/nporecents.py | 60 --------------------- 3 files changed, 84 insertions(+), 61 deletions(-) delete mode 100644 youtube_dl/extractor/nporecents.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 41669cf07..9249f50ac 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -683,10 +683,10 @@ from .npo import ( NPORadioFragmentIE, SchoolTVIE, HetKlokhuisIE, + NPORecentsIE, VPROIE, WNLIE, ) -from .nporecents import NPORecentsIE from .npr import NprIE from .nrk import ( NRKIE, diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 38fefe492..511e8131d 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals import re +import xml.etree.ElementTree as ET from .common import InfoExtractor from ..compat import ( @@ -477,7 +478,89 @@ class HetKlokhuisIE(NPODataMidEmbedIE): } } +class NPORecentsIE(NPOIE): + IE_Name = 'npo:recents' + _VALID_URL = r'(?:https?://)?(?:www\.)?npo\.nl/(?P[^/]+)/(?P\w+_\d+)' + _TESTS = [ + { + # Example of an npo3 program + 'url': 'https://www.npo.nl/keuringsdienst-van-waarde/KN_1678993', + 'info_dict': { + 'title': 'Keuringsdienst van Waarde', + 'id': 'KN_1678993', + 'description': u'md5:5ffaf131f175d8a771e7a7884833dad2' + }, + 'playlist_mincount': 8 + }, + { + # Example of an npo1/npo2 program + 'url': 'https://www.npo.nl/jinek/KN_1676589', + 'info_dict': { + 'title': 'Jinek', + 'id': 'KN_1676589', + 'description': u'md5:6998986899b4903395f0cdd0670cedaf' + }, + 'playlist_mincount': 8 + }, + { + # Example of a program for which there will be only one available episode (if any) + 'url': 'https://www.npo.nl/midsomer-murders/POW_00828660', + 'info_dict': { + 'title': 'Midsomer murders', + 'id': 'POW_00828660', + 'description': u'md5:a8b6e9d3e3bd367be88766e3ce8e8362' + }, + 'playlist_maxcount': 1 + } + ] + def _extract_entries(self, webpage, program_id, program_url): + is_npo3 = 'www-assets.npo.nl/uploads/tv_channel/265/logo/smaller_npo3-logo.png' in webpage + + if is_npo3: + episodes_url = '%s//search?category=broadcasts&page=1' % program_url + else: + episodes_url = '%s/search?media_type=broadcast&start=0&rows=8' % program_url + + episodes = self._download_webpage( + episodes_url, program_id, note='Retrieving episodes') + tree = ET.fromstring(episodes.encode('utf-8')) + for element in tree.findall('.//div'): + if 'span4' in element.get('class'): + hyperlink = element.find('.//a') + + # Note: ElementTree in Python 2.6+ doesn't support + # the required XPath constructs + inactive = False + divs = hyperlink.findall('div') + for div in divs: + if div.attrib.get('class') == 'program-not-available': + inactive = True + + if not inactive: + yield self.url_result( + url='http://npo.nl%s' % hyperlink.get('href'), + video_title=self._og_search_title(webpage)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + alt_id = mobj.group('alt_id') + program_id = mobj.group('program_id') + webpage = self._download_webpage(url, program_id) + title = self._og_search_title(webpage, fatal=False) or alt_id + description = self._og_search_description(webpage) or self._html_search_meta('description', webpage, 'description', fatal=False) + entries = self._extract_entries(webpage, program_id, url) + + return { + '_type': 'playlist', + 'id': program_id, + 'display_id': alt_id, + 'title': title, + 'description': description, + 'entries': entries + } + + class NPOPlaylistBaseIE(NPOIE): def _real_extract(self, url): playlist_id = self._match_id(url) diff --git a/youtube_dl/extractor/nporecents.py b/youtube_dl/extractor/nporecents.py deleted file mode 100644 index 77d67c419..000000000 --- a/youtube_dl/extractor/nporecents.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/python -from .common import InfoExtractor - -import re -import xml.etree.ElementTree as ET - - -class NPORecentsIE(InfoExtractor): - IE_Name = 'npo:recents' - _VALID_URL = r'(?:https?://)?(?:www\.)?npo\.nl/(?P[^/]+)/(?P\w+_\d+)' - _TEST = { - 'url': 'https://www.npo.nl/keuringsdienst-van-waarde/KN_1678993', - 'info_dict': { - 'title': 'Keuringsdienst van Waarde', - 'id': 'KN_1678993', - 'description': 'In dit programma staat centraal wat fabrikanten ons als consumenten vertellen. Klopt het wat ze claimen en wat ze ons in reclames verkopen? Verslaggevers Teun van de Keuken, Sofie van den Enk, Daan Nieber, Ersin Kiris, Marijn Frank en Maarten Remmers nemen de telefoon ter hand en bellen er actief op los. Ze stellen simpele vragen en krijgen de meest verbazingwekkende antwoorden op food, non-food en nieuwsgerelateerde kwesties. Prikkelend, onderzoekend en vasthoudend. Keuringsdienst van Waarde: simpele vragen,verbazingwekkende antwoorden.' - }, - 'playlist_mincount': 8 - } - - def _extract_entries(self, webpage, program_id, program_url): - is_npo3 = 'www-assets.npo.nl/uploads/tv_channel/265/logo/smaller_npo3-logo.png' in webpage - - if is_npo3: - episodes_url = '{}//search?category=broadcasts&page=1'.format( - program_url) - else: - episodes_url = '{}/search?media_type=broadcast&start=0&rows=8'.format( - program_url) - - episodes = self._download_webpage( - episodes_url, program_id, note='Retrieving episodes') - tree = ET.fromstring(episodes.encode('utf-8')) - for element in tree.findall('.//div'): - if 'span4' in element.get('class'): - hyperlink = element.find('.//a') - inactive = hyperlink.find( - './div[@class="program-not-available"]') - if inactive is None: - yield self.url_result( - url='http://npo.nl{}'.format(hyperlink.get('href')), - video_title=self._og_search_title(webpage)) - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - alt_id = mobj.group('alt_id') - program_id = mobj.group('program_id') - webpage = self._download_webpage(url, program_id) - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - entries = self._extract_entries(webpage, program_id, url) - - return { - '_type': 'playlist', - 'id': program_id, - 'display_id': alt_id, - 'title': title, - 'description': description, - 'entries': entries - } From 421ac9e822e227273d858254b3c9d916c41ca0b5 Mon Sep 17 00:00:00 2001 From: Jan Hoek Date: Sat, 25 Mar 2017 20:56:45 +0100 Subject: [PATCH 03/10] Minor formatting changes to satify flake8 --- youtube_dl/extractor/npo.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 511e8131d..9084bbff3 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -478,11 +478,11 @@ class HetKlokhuisIE(NPODataMidEmbedIE): } } + class NPORecentsIE(NPOIE): IE_Name = 'npo:recents' _VALID_URL = r'(?:https?://)?(?:www\.)?npo\.nl/(?P[^/]+)/(?P\w+_\d+)' - _TESTS = [ - { + _TESTS = [{ # Example of an npo3 program 'url': 'https://www.npo.nl/keuringsdienst-van-waarde/KN_1678993', 'info_dict': { @@ -491,8 +491,7 @@ class NPORecentsIE(NPOIE): 'description': u'md5:5ffaf131f175d8a771e7a7884833dad2' }, 'playlist_mincount': 8 - }, - { + }, { # Example of an npo1/npo2 program 'url': 'https://www.npo.nl/jinek/KN_1676589', 'info_dict': { @@ -501,8 +500,7 @@ class NPORecentsIE(NPOIE): 'description': u'md5:6998986899b4903395f0cdd0670cedaf' }, 'playlist_mincount': 8 - }, - { + }, { # Example of a program for which there will be only one available episode (if any) 'url': 'https://www.npo.nl/midsomer-murders/POW_00828660', 'info_dict': { @@ -511,8 +509,7 @@ class NPORecentsIE(NPOIE): 'description': u'md5:a8b6e9d3e3bd367be88766e3ce8e8362' }, 'playlist_maxcount': 1 - } - ] + }] def _extract_entries(self, webpage, program_id, program_url): is_npo3 = 'www-assets.npo.nl/uploads/tv_channel/265/logo/smaller_npo3-logo.png' in webpage @@ -528,15 +525,15 @@ class NPORecentsIE(NPOIE): for element in tree.findall('.//div'): if 'span4' in element.get('class'): hyperlink = element.find('.//a') - - # Note: ElementTree in Python 2.6+ doesn't support + + # Note: ElementTree in Python 2.6+ doesn't support # the required XPath constructs inactive = False divs = hyperlink.findall('div') for div in divs: if div.attrib.get('class') == 'program-not-available': inactive = True - + if not inactive: yield self.url_result( url='http://npo.nl%s' % hyperlink.get('href'), @@ -559,8 +556,8 @@ class NPORecentsIE(NPOIE): 'description': description, 'entries': entries } - - + + class NPOPlaylistBaseIE(NPOIE): def _real_extract(self, url): playlist_id = self._match_id(url) From 3ff3fbaf701da8dc2819714a69a42b37de107f62 Mon Sep 17 00:00:00 2001 From: Jan Hoek Date: Mon, 27 Mar 2017 20:41:21 +0200 Subject: [PATCH 04/10] Flake8; tests in relevant Python versions --- testnporecent.ps1 | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 testnporecent.ps1 diff --git a/testnporecent.ps1 b/testnporecent.ps1 new file mode 100644 index 000000000..f95778abd --- /dev/null +++ b/testnporecent.ps1 @@ -0,0 +1,43 @@ +Describe 'Flake8' { + It 'Does not return any errors' { + & flake8 /Users/jhoek/GitHub/youtube-dl/youtube_dl/extractor/npo.py | Should BeNullOrEmpty + } +} + +Describe 'Tests' { + It 'Should work in Python 2.6' { + & 'python2.6' '--version' 2>&1 | Should Be 'Python 2.6.9' + + '', '_1', '_2' | ForEach-Object { + & 'python2.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } + + It 'Should work in Python 2.7' { + & python '--version' 2>&1 | Should Be 'Python 2.7.13' + + '', '_1', '_2' | ForEach-Object { + & python /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } + + It 'Should work in Python 3.5' { + & python3 '--version' | Should Be 'Python 3.5.2' + + '', '_1', '_2' | ForEach-Object { + & python3 /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } + + It 'Should work in Python 3.6' { + & python3.6 '--version' | Should Be 'Python 3.6.1' + + '', '_1', '_2' | ForEach-Object { + & 'python3.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } +} From d86a35ec35e5d5db1ba74ca92b8f73a01c434fdc Mon Sep 17 00:00:00 2001 From: Jan Hoek Date: Mon, 27 Mar 2017 20:42:36 +0200 Subject: [PATCH 05/10] Investigating structure of npo.nl hyperlinks --- testnpo.ps1 | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 testnpo.ps1 diff --git a/testnpo.ps1 b/testnpo.ps1 new file mode 100644 index 000000000..31a0a660e --- /dev/null +++ b/testnpo.ps1 @@ -0,0 +1,13 @@ +Clear-host + +$Urls = @{} +$Urls.Add('keuringsdienst', 'https://www.npo.nl/keuringsdienst-van-waarde/KN_1678993/search?category=broadcasts&page=1') +$Urls.Add('jinek', 'https://www.npo.nl/jinek/KN_1676589/search?media_type=broadcast&start=0&rows=8') +$Urls.Add('midsomer', 'https://www.npo.nl/midsomer-murders/POW_00828660/search?media_type=broadcast&start=0&rows=8') +$Urls.Add('pownews', 'https://www.npo.nl/pownews-flits/POW_03469040/search?category=broadcasts&page=1') + +$Urls.GetEnumerator() | ForEach-Object { + Write-Host $_.Key + + $response = Invoke-WebRequest -Uri ($_.Value) -UseBasicParsing -OutFile "~/Desktop/$($_.Key).txt" +} \ No newline at end of file From 4a0b588f3e6e9f42da94b70243d068a9193cf436 Mon Sep 17 00:00:00 2001 From: Jan Hoek Date: Wed, 29 Mar 2017 20:40:12 +0200 Subject: [PATCH 06/10] Using regexes again instead of xml parser --- youtube_dl/extractor/npo.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 9084bbff3..6573095dd 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import re -import xml.etree.ElementTree as ET from .common import InfoExtractor from ..compat import ( @@ -481,6 +480,8 @@ class HetKlokhuisIE(NPODataMidEmbedIE): class NPORecentsIE(NPOIE): IE_Name = 'npo:recents' + npo12_regex = r"""
\s*
\s*\s*(
)?""" + npo3_regex = r"""
\s*\s*
\s*
\s*
\s*
.*?
\s*
\s*
\s*(
)?""" _VALID_URL = r'(?:https?://)?(?:www\.)?npo\.nl/(?P[^/]+)/(?P\w+_\d+)' _TESTS = [{ # Example of an npo3 program @@ -516,28 +517,21 @@ class NPORecentsIE(NPOIE): if is_npo3: episodes_url = '%s//search?category=broadcasts&page=1' % program_url + regex = self.npo3_regex else: episodes_url = '%s/search?media_type=broadcast&start=0&rows=8' % program_url + regex = self.npo12_regex - episodes = self._download_webpage( - episodes_url, program_id, note='Retrieving episodes') - tree = ET.fromstring(episodes.encode('utf-8')) - for element in tree.findall('.//div'): - if 'span4' in element.get('class'): - hyperlink = element.find('.//a') + episodes = self._download_webpage(episodes_url, program_id, note='Retrieving episodes') - # Note: ElementTree in Python 2.6+ doesn't support - # the required XPath constructs - inactive = False - divs = hyperlink.findall('div') - for div in divs: - if div.attrib.get('class') == 'program-not-available': - inactive = True + for match in re.finditer(regex, episodes): + url = match.group(1) + available = match.group(2) is None - if not inactive: - yield self.url_result( - url='http://npo.nl%s' % hyperlink.get('href'), - video_title=self._og_search_title(webpage)) + if available: + yield self.url_result( + url='http://npo.nl%s' % url, + video_title=self._og_search_title(webpage)) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 481d6638fd0ecf0a92b26c11d06620ba5b5017f6 Mon Sep 17 00:00:00 2001 From: Jan Hoek Date: Wed, 29 Mar 2017 20:44:51 +0200 Subject: [PATCH 07/10] Testing with regexes for npo.nl episodes --- testregex.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 testregex.py diff --git a/testregex.py b/testregex.py new file mode 100644 index 000000000..5d90ff876 --- /dev/null +++ b/testregex.py @@ -0,0 +1,21 @@ +#!/usr/bin/python + +import re + +npo12files = ['./jinek.htm', './midsomer.htm'] +npo3files = ['./keuringsdienst.htm', './pownews.htm'] + +npo12regex = r"""
\s*
\s*\s*(
)?""" +npo3regex = r"""
\s*\s*
\s*
\s*
\s*
.*?
\s*
\s*
\s*(
)?""" + +for filename in npo12files: + with open(filename) as f: + for match in re.finditer(npo12regex, f.read()): + print(match.group(1), match.group(2) is None) + print('') + +for filename in npo3files: + with open(filename) as f: + for match in re.finditer(npo3regex, f.read()): + print(match.group(1), match.group(2) is None) + print('') \ No newline at end of file From f9b06385f19c82bce551f869cbfa0c116cc6cbf8 Mon Sep 17 00:00:00 2001 From: Jan Hoek Date: Wed, 29 Mar 2017 20:47:21 +0200 Subject: [PATCH 08/10] Removing temporary scripts --- testnpo.ps1 | 13 ------------- testnporecent.ps1 | 43 ------------------------------------------- testregex.py | 21 --------------------- 3 files changed, 77 deletions(-) delete mode 100644 testnpo.ps1 delete mode 100644 testnporecent.ps1 delete mode 100644 testregex.py diff --git a/testnpo.ps1 b/testnpo.ps1 deleted file mode 100644 index 31a0a660e..000000000 --- a/testnpo.ps1 +++ /dev/null @@ -1,13 +0,0 @@ -Clear-host - -$Urls = @{} -$Urls.Add('keuringsdienst', 'https://www.npo.nl/keuringsdienst-van-waarde/KN_1678993/search?category=broadcasts&page=1') -$Urls.Add('jinek', 'https://www.npo.nl/jinek/KN_1676589/search?media_type=broadcast&start=0&rows=8') -$Urls.Add('midsomer', 'https://www.npo.nl/midsomer-murders/POW_00828660/search?media_type=broadcast&start=0&rows=8') -$Urls.Add('pownews', 'https://www.npo.nl/pownews-flits/POW_03469040/search?category=broadcasts&page=1') - -$Urls.GetEnumerator() | ForEach-Object { - Write-Host $_.Key - - $response = Invoke-WebRequest -Uri ($_.Value) -UseBasicParsing -OutFile "~/Desktop/$($_.Key).txt" -} \ No newline at end of file diff --git a/testnporecent.ps1 b/testnporecent.ps1 deleted file mode 100644 index f95778abd..000000000 --- a/testnporecent.ps1 +++ /dev/null @@ -1,43 +0,0 @@ -Describe 'Flake8' { - It 'Does not return any errors' { - & flake8 /Users/jhoek/GitHub/youtube-dl/youtube_dl/extractor/npo.py | Should BeNullOrEmpty - } -} - -Describe 'Tests' { - It 'Should work in Python 2.6' { - & 'python2.6' '--version' 2>&1 | Should Be 'Python 2.6.9' - - '', '_1', '_2' | ForEach-Object { - & 'python2.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 - $LASTEXITCODE | Should Be 0 - } - } - - It 'Should work in Python 2.7' { - & python '--version' 2>&1 | Should Be 'Python 2.7.13' - - '', '_1', '_2' | ForEach-Object { - & python /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 - $LASTEXITCODE | Should Be 0 - } - } - - It 'Should work in Python 3.5' { - & python3 '--version' | Should Be 'Python 3.5.2' - - '', '_1', '_2' | ForEach-Object { - & python3 /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 - $LASTEXITCODE | Should Be 0 - } - } - - It 'Should work in Python 3.6' { - & python3.6 '--version' | Should Be 'Python 3.6.1' - - '', '_1', '_2' | ForEach-Object { - & 'python3.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 - $LASTEXITCODE | Should Be 0 - } - } -} diff --git a/testregex.py b/testregex.py deleted file mode 100644 index 5d90ff876..000000000 --- a/testregex.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/python - -import re - -npo12files = ['./jinek.htm', './midsomer.htm'] -npo3files = ['./keuringsdienst.htm', './pownews.htm'] - -npo12regex = r"""
\s*
\s*\s*(
)?""" -npo3regex = r"""
\s*\s*
\s*
\s*
\s*
.*?
\s*
\s*
\s*(
)?""" - -for filename in npo12files: - with open(filename) as f: - for match in re.finditer(npo12regex, f.read()): - print(match.group(1), match.group(2) is None) - print('') - -for filename in npo3files: - with open(filename) as f: - for match in re.finditer(npo3regex, f.read()): - print(match.group(1), match.group(2) is None) - print('') \ No newline at end of file From 338a6867bced757f6d1e25d72a6e627e22c9a7d2 Mon Sep 17 00:00:00 2001 From: Jan Hoek Date: Sat, 1 Apr 2017 13:44:28 +0200 Subject: [PATCH 09/10] Pester tests --- testnporecent.ps1 | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 testnporecent.ps1 diff --git a/testnporecent.ps1 b/testnporecent.ps1 new file mode 100644 index 000000000..f95778abd --- /dev/null +++ b/testnporecent.ps1 @@ -0,0 +1,43 @@ +Describe 'Flake8' { + It 'Does not return any errors' { + & flake8 /Users/jhoek/GitHub/youtube-dl/youtube_dl/extractor/npo.py | Should BeNullOrEmpty + } +} + +Describe 'Tests' { + It 'Should work in Python 2.6' { + & 'python2.6' '--version' 2>&1 | Should Be 'Python 2.6.9' + + '', '_1', '_2' | ForEach-Object { + & 'python2.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } + + It 'Should work in Python 2.7' { + & python '--version' 2>&1 | Should Be 'Python 2.7.13' + + '', '_1', '_2' | ForEach-Object { + & python /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } + + It 'Should work in Python 3.5' { + & python3 '--version' | Should Be 'Python 3.5.2' + + '', '_1', '_2' | ForEach-Object { + & python3 /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } + + It 'Should work in Python 3.6' { + & python3.6 '--version' | Should Be 'Python 3.6.1' + + '', '_1', '_2' | ForEach-Object { + & 'python3.6' /Users/jhoek/GitHub/youtube-dl/test/test_download.py "TestDownload.test_NPORecents$($_)" 2>&1 + $LASTEXITCODE | Should Be 0 + } + } +} From ffb221568f943c8a14fbd40b8d8d9a2488e544e5 Mon Sep 17 00:00:00 2001 From: Jan Hoek Date: Sat, 1 Apr 2017 14:39:39 +0200 Subject: [PATCH 10/10] Removed unicode prefix --- youtube_dl/extractor/npo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 6573095dd..408bbc36d 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -489,7 +489,7 @@ class NPORecentsIE(NPOIE): 'info_dict': { 'title': 'Keuringsdienst van Waarde', 'id': 'KN_1678993', - 'description': u'md5:5ffaf131f175d8a771e7a7884833dad2' + 'description': 'md5:5ffaf131f175d8a771e7a7884833dad2' }, 'playlist_mincount': 8 }, { @@ -498,7 +498,7 @@ class NPORecentsIE(NPOIE): 'info_dict': { 'title': 'Jinek', 'id': 'KN_1676589', - 'description': u'md5:6998986899b4903395f0cdd0670cedaf' + 'description': 'md5:6998986899b4903395f0cdd0670cedaf' }, 'playlist_mincount': 8 }, { @@ -507,7 +507,7 @@ class NPORecentsIE(NPOIE): 'info_dict': { 'title': 'Midsomer murders', 'id': 'POW_00828660', - 'description': u'md5:a8b6e9d3e3bd367be88766e3ce8e8362' + 'description': 'md5:a8b6e9d3e3bd367be88766e3ce8e8362' }, 'playlist_maxcount': 1 }]