From 200ad7687a30ba0113660a581b40c1637cbe4863 Mon Sep 17 00:00:00 2001 From: ealgase Date: Wed, 21 Nov 2018 09:36:01 -0500 Subject: [PATCH 1/9] [narando] remove separate player extractor (was causing issues), add display_id attribute --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/narando.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b7cca0c25..2c1df5efe 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -672,6 +672,7 @@ from .myvi import ( MyviEmbedIE, ) from .myvidster import MyVidsterIE +from .narando import NarandoIE from .nationalgeographic import ( NationalGeographicVideoIE, NationalGeographicIE, @@ -1482,4 +1483,3 @@ from .zingmp3 import ZingMp3IE from .zype import ZypeIE -from .narando import NarandoIE, NarandoPlayerIE diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index 6673b8007..eea445427 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -11,7 +11,8 @@ class NarandoIE(InfoExtractor): 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', 'info_dict': { - 'id': 'an-ihrem-selbstlob-erkennt-man-sie', + 'id': 'b2t4t789kxgy9g7ms4rwjvvw', + 'display_id': 'an-ihrem-selbstlob-erkennt-man-sie', 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', 'url': 'https://static.narando.com/sounds/10492/original.mp3', @@ -27,17 +28,19 @@ class NarandoIE(InfoExtractor): title = self._html_search_regex(r'

(.+?)

', webpage, 'title') player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id') - mobj = NarandoPlayerIE() - download_url = mobj._real_extract("https://narando.com/widget?r=" + player_id)['url'] + player_page = self._download_webpage('https://narando.com/widget?r=' + player_id, player_id) + download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'url') +# download_url = NarandoPlayerIE()._real_extract('https://narando.com/widget?r=' + player_id)['url'] description = self._html_search_regex(r'', webpage, 'description') return { - 'id': video_id, + 'display_id': video_id, + 'id': player_id, 'title': title, 'url': download_url, 'description': description, } - +"""to be implemented later class NarandoPlayerIE(InfoExtractor): IE_NAME = "narando:player" _VALID_URL = r'https://narando.com/widget\?r=(?P\w+)' @@ -54,9 +57,7 @@ class NarandoPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - print(video_id) webpage = self._download_webpage('https://narando.com/widget?r=' + video_id, video_id) - print(webpage) title = self._html_search_regex(r'narando \| (.+?)', webpage, 'title') download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') @@ -65,3 +66,4 @@ class NarandoPlayerIE(InfoExtractor): 'title': title, 'url': download_url, } +""" From f8a4a388804c87d6a8042896a18d5e4986b7f782 Mon Sep 17 00:00:00 2001 From: ealgase Date: Wed, 21 Nov 2018 15:20:41 -0500 Subject: [PATCH 2/9] [narando] add separate [narando:player] extractor --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/narando.py | 66 ++++++++++++++---------------- 2 files changed, 35 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2c1df5efe..c3d1b81a3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -672,7 +672,10 @@ from .myvi import ( MyviEmbedIE, ) from .myvidster import MyVidsterIE -from .narando import NarandoIE +from .narando import ( + NarandoIE, + NarandoPlayerIE, +) from .nationalgeographic import ( NationalGeographicVideoIE, NationalGeographicIE, diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index eea445427..c8a8ef30c 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -4,6 +4,33 @@ from __future__ import unicode_literals from .common import InfoExtractor +class NarandoPlayerIE(InfoExtractor): + IE_NAME = "narando:player" + _VALID_URL = r'https://narando.com/widget\?r=(?P\w+)' + _TEST = { + 'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw', + 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', + 'info_dict': { + 'id': 'b2t4t789kxgy9g7ms4rwjvvw', + 'ext': 'mp3', + 'title': 'An ihrem Selbstlob erkennt man sie', + 'url': 'https://static.narando.com/sounds/10492/original.mp3', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage('https://narando.com/widget?r=' + video_id, video_id) + title = self._html_search_regex(r'narando \| (.+?)', webpage, 'title') + download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') + + return { + 'id': video_id, + 'title': title, + 'url': download_url, + } + + class NarandoIE(InfoExtractor): IE_NAME = "narando" _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P([a-zA-Z]|-)+)' @@ -22,48 +49,17 @@ class NarandoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://narando.com/articles/' + video_id, video_id) - title = self._html_search_regex(r'

(.+?)

', webpage, 'title') - player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id') - player_page = self._download_webpage('https://narando.com/widget?r=' + player_id, player_id) - download_url = self._html_search_regex(r'.
\s*([^?]*)', player_page, 'url') -# download_url = NarandoPlayerIE()._real_extract('https://narando.com/widget?r=' + player_id)['url'] + player_url = 'https://narando.com/widget?r=' + player_id description = self._html_search_regex(r'', webpage, 'description') + return { 'display_id': video_id, 'id': player_id, 'title': title, - 'url': download_url, + 'url': player_url, 'description': description, + '_type': 'url', } - -"""to be implemented later -class NarandoPlayerIE(InfoExtractor): - IE_NAME = "narando:player" - _VALID_URL = r'https://narando.com/widget\?r=(?P\w+)' - _TEST = { - 'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw', - 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', - 'info_dict': { - 'id': 'b2t4t789kxgy9g7ms4rwjvvw', - 'ext': 'mp3', - 'title': 'An ihrem Selbstlob erkennt man sie', - 'url': 'https://static.narando.com/sounds/10492/original.mp3', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage('https://narando.com/widget?r=' + video_id, video_id) - title = self._html_search_regex(r'narando \| (.+?)', webpage, 'title') - - download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') - return { - 'id': video_id, - 'title': title, - 'url': download_url, - } -""" From b709ed1fc9c551423ed8f9cba8ed6fb27fb6bbae Mon Sep 17 00:00:00 2001 From: ealgase Date: Wed, 21 Nov 2018 19:11:22 -0500 Subject: [PATCH 3/9] [narando] remove display_id attribute --- youtube_dl/extractor/narando.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index c8a8ef30c..ffe682f9e 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -39,7 +39,6 @@ class NarandoIE(InfoExtractor): 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', 'info_dict': { 'id': 'b2t4t789kxgy9g7ms4rwjvvw', - 'display_id': 'an-ihrem-selbstlob-erkennt-man-sie', 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', 'url': 'https://static.narando.com/sounds/10492/original.mp3', @@ -56,7 +55,6 @@ class NarandoIE(InfoExtractor): description = self._html_search_regex(r'', webpage, 'description') return { - 'display_id': video_id, 'id': player_id, 'title': title, 'url': player_url, From 6892fcbcc43e1ff03e6676b223897e06b35717d0 Mon Sep 17 00:00:00 2001 From: ealgase Date: Wed, 21 Nov 2018 20:12:49 -0500 Subject: [PATCH 4/9] [narando] remove description attribute (was broken in Python 3.2) --- youtube_dl/extractor/narando.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index ffe682f9e..2b05b705b 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -42,7 +42,6 @@ class NarandoIE(InfoExtractor): 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', 'url': 'https://static.narando.com/sounds/10492/original.mp3', - 'description': u'omnisophie.com: Kaum eine Woche vergeht, dass nicht jemand mir gegenüber seine Mathematik-Unkenntnisse tränenlos beweint. „In Mathe war ich niemals gut.“ Diese Leute sagen mir das wohl, weil ich Mathematiker bin, und da gehört so ein fröhliches „Understatement“ zum Small Talk. So wie wenn ich selbst bedauernd-entschuldigend auf meine grauen Haare zeige. Ich kann eben auch nicht alles bieten... „Mathe kann ich nicht“, „Ich habe kein Internet“ oder „Ich will auch bewusst nicht alles können“ wird fast wie Eigenlob vorgetragen.', } } @@ -52,12 +51,10 @@ class NarandoIE(InfoExtractor): title = self._html_search_regex(r'

(.+?)

', webpage, 'title') player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id') player_url = 'https://narando.com/widget?r=' + player_id - description = self._html_search_regex(r'', webpage, 'description') return { 'id': player_id, 'title': title, 'url': player_url, - 'description': description, '_type': 'url', } From 0c2fc982d37efb0e17ac69fee4b82355fa45c101 Mon Sep 17 00:00:00 2001 From: ealgase Date: Fri, 30 Nov 2018 19:07:26 -0500 Subject: [PATCH 5/9] [narando] Improve coding methods as requested by dstftw in #18268 --- youtube_dl/extractor/narando.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index 2b05b705b..a6bb5f473 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class NarandoPlayerIE(InfoExtractor): IE_NAME = "narando:player" - _VALID_URL = r'https://narando.com/widget\?r=(?P\w+)' + _VALID_URL = r'https://narando\.com/widget\?r=(?P\w+)' _TEST = { 'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw', 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', @@ -14,26 +14,25 @@ class NarandoPlayerIE(InfoExtractor): 'id': 'b2t4t789kxgy9g7ms4rwjvvw', 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', - 'url': 'https://static.narando.com/sounds/10492/original.mp3', } } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://narando.com/widget?r=' + video_id, video_id) - title = self._html_search_regex(r'narando \| (.+?)', webpage, 'title') + title = self._html_search_regex(r'(.+?)', webpage, 'title') download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') - return { 'id': video_id, 'title': title, 'url': download_url, + 'vcodec': 'none', } class NarandoIE(InfoExtractor): IE_NAME = "narando" - _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P([a-zA-Z]|-)+)' + _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P.+)' _TEST = { 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', @@ -41,15 +40,14 @@ class NarandoIE(InfoExtractor): 'id': 'b2t4t789kxgy9g7ms4rwjvvw', 'ext': 'mp3', 'title': 'An ihrem Selbstlob erkennt man sie', - 'url': 'https://static.narando.com/sounds/10492/original.mp3', } } def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://narando.com/articles/' + video_id, video_id) + webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'

(.+?)

', webpage, 'title') - player_id = self._html_search_regex(r'[\n\r].*https:\/\/narando.com\/r\/\s*([^"]*)', webpage, 'player_id') + player_id = self._html_search_regex(r'\s*https://narando.com/r/([^"]*)', webpage, 'player_id') player_url = 'https://narando.com/widget?r=' + player_id return { From 00bcff6387742a69f4ebf97acc5d7da56fab8a87 Mon Sep 17 00:00:00 2001 From: ealgase Date: Fri, 30 Nov 2018 21:43:17 -0500 Subject: [PATCH 6/9] [narando] use already given URL for downloading webpage --- youtube_dl/extractor/narando.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index a6bb5f473..0a81fa851 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -19,7 +19,7 @@ class NarandoPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://narando.com/widget?r=' + video_id, video_id) + webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'(.+?)', webpage, 'title') download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') return { From fc9ea7bd133034fdd4db4074cc60443e0cdc8f23 Mon Sep 17 00:00:00 2001 From: ealgase Date: Sat, 1 Dec 2018 14:47:41 -0500 Subject: [PATCH 7/9] [narando] remove superfluous whitespace --- youtube_dl/extractor/extractors.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c3d1b81a3..33913715a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1484,5 +1484,3 @@ from .zattoo import ( from .zdf import ZDFIE, ZDFChannelIE from .zingmp3 import ZingMp3IE from .zype import ZypeIE - - From 83809b6ee1e5ead1a1199b4df553cf657ba8f37b Mon Sep 17 00:00:00 2001 From: ealgase Date: Sun, 20 Jan 2019 19:00:43 -0500 Subject: [PATCH 8/9] [narando] improve coding methods to match standards, add support for more URL formats --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/narando.py | 47 ++++++++++++++++++------------ 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 33913715a..c1e4aa308 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -674,7 +674,7 @@ from .myvi import ( from .myvidster import MyVidsterIE from .narando import ( NarandoIE, - NarandoPlayerIE, + NarandoArticleIE, ) from .nationalgeographic import ( NationalGeographicVideoIE, diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index 0a81fa851..8f8fa0cdd 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -4,9 +4,9 @@ from __future__ import unicode_literals from .common import InfoExtractor -class NarandoPlayerIE(InfoExtractor): - IE_NAME = "narando:player" - _VALID_URL = r'https://narando\.com/widget\?r=(?P\w+)' +class NarandoIE(InfoExtractor): + IE_NAME = 'narando' + _VALID_URL = r'https?://narando\.com/widget\?.*?r=(?P\w+)&?' _TEST = { 'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw', 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', @@ -21,7 +21,7 @@ class NarandoPlayerIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'(.+?)', webpage, 'title') - download_url = self._html_search_regex(r'.
\s*([^?]*)', webpage, 'download_url') + download_url = self._html_search_regex(r'
(.+)
', webpage, 'download_url') return { 'id': video_id, 'title': title, @@ -30,29 +30,40 @@ class NarandoPlayerIE(InfoExtractor): } -class NarandoIE(InfoExtractor): - IE_NAME = "narando" - _VALID_URL = r'https?://(?:www\.)?narando\.com/articles/(?P.+)' - _TEST = { - 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', - 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', - 'info_dict': { - 'id': 'b2t4t789kxgy9g7ms4rwjvvw', - 'ext': 'mp3', - 'title': 'An ihrem Selbstlob erkennt man sie', +class NarandoArticleIE(InfoExtractor): + IE_NAME = "narando:article" + _VALID_URL = r'https?://(?:www\.)?narando\.com/(articles|r)/(?P.+)' + _TESTS = [ + { + 'url': 'https://narando.com/articles/an-ihrem-selbstlob-erkennt-man-sie', + 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', + 'info_dict': { + 'id': 'b2t4t789kxgy9g7ms4rwjvvw', + 'ext': 'mp3', + 'title': 'An ihrem Selbstlob erkennt man sie', + } + }, + { + 'url': 'https://narando.com/r/b2t4t789kxgy9g7ms4rwjvvw', #alternate URL format + 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', + 'info_dict': { + 'id': 'b2t4t789kxgy9g7ms4rwjvvw', + 'ext': 'mp3', + 'title': 'An ihrem Selbstlob erkennt man sie', + } } - } + ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'

(.+?)

', webpage, 'title') - player_id = self._html_search_regex(r'\s*https://narando.com/r/([^"]*)', webpage, 'player_id') - player_url = 'https://narando.com/widget?r=' + player_id + player_id = self._html_search_regex(r'https://narando.com/r/(.+?)\"', webpage, 'player_id') + url_result = 'https://narando.com/widget?r=' + player_id return { 'id': player_id, 'title': title, - 'url': player_url, + 'url': url_result, '_type': 'url', } From 9d5e117c16b6616df795995158de165358b28c68 Mon Sep 17 00:00:00 2001 From: Elliot Algase Date: Wed, 15 May 2019 22:04:02 -0400 Subject: [PATCH 9/9] [narando] extract thumbnails --- youtube_dl/extractor/narando.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/narando.py b/youtube_dl/extractor/narando.py index 8f8fa0cdd..c250cc997 100644 --- a/youtube_dl/extractor/narando.py +++ b/youtube_dl/extractor/narando.py @@ -6,6 +6,7 @@ from .common import InfoExtractor class NarandoIE(InfoExtractor): IE_NAME = 'narando' + _THUMB_SIZES = ('small', 'square', 'medium', 'big', 'original') _VALID_URL = r'https?://narando\.com/widget\?.*?r=(?P\w+)&?' _TEST = { 'url': 'https://narando.com/widget?r=b2t4t789kxgy9g7ms4rwjvvw', @@ -22,11 +23,22 @@ class NarandoIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'(.+?)', webpage, 'title') download_url = self._html_search_regex(r'
(.+)
', webpage, 'download_url') + thumbnail_id = self._html_search_regex(r'article_picture\/(.+?)\/small\.jpg', webpage, 'thumbnail_id', fatal=False) + thumbnail_dict = [] + thumb_id = 0 + for size in self._THUMB_SIZES: + thumbnail_dict.append({ + 'url': 'https://static.narando.com/article_picture/' + thumbnail_id + '/' + size + '.jpg', + 'id': size, + 'preference': thumb_id, + }) + thumb_id += 1 return { 'id': video_id, 'title': title, 'url': download_url, 'vcodec': 'none', + 'thumbnails': thumbnail_dict, } @@ -44,7 +56,7 @@ class NarandoArticleIE(InfoExtractor): } }, { - 'url': 'https://narando.com/r/b2t4t789kxgy9g7ms4rwjvvw', #alternate URL format + 'url': 'https://narando.com/r/b2t4t789kxgy9g7ms4rwjvvw', # alternate URL format 'md5': 'd20f671f0395bab8f8285d1f6e8f965e', 'info_dict': { 'id': 'b2t4t789kxgy9g7ms4rwjvvw',