From b491341a9d0e9043513c28014d6ff8be14094053 Mon Sep 17 00:00:00 2001 From: user706 <39215612+user706@users.noreply.github.com> Date: Wed, 2 Jan 2019 23:17:16 +0100 Subject: [PATCH 1/3] [dnb] Add new extractor --- youtube_dl/extractor/dnb.py | 75 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 76 insertions(+) create mode 100644 youtube_dl/extractor/dnb.py diff --git a/youtube_dl/extractor/dnb.py b/youtube_dl/extractor/dnb.py new file mode 100644 index 000000000..ceb0fa66a --- /dev/null +++ b/youtube_dl/extractor/dnb.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +import re +import json + + +class DNBIE(InfoExtractor): + _VALID_URL = r'https?://(?:portal\.dnb\.de/audioplayer/do/show/|d-nb\.info/)(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://portal.dnb.de/audioplayer/do/show/1077188552#dcId=1546432571580&p=1', + 'md5': 'cdef1faf339db9978b27c70ef4c0516b', + 'info_dict': { + 'title': 'Leonoren-Ouvertüre III [Elektronische Ressource] : Allegro / Ludwig van Beethoven (1770-1827)', + 'id': '1077188552', + 'ext': 'mp3', + 'track': 'Track 1', + 'track_number': 1 + } + }, { + 'url': 'http://d-nb.info/1077188552', + 'md5': 'cdef1faf339db9978b27c70ef4c0516b', + 'info_dict': { + 'title': 'Leonoren-Ouvertüre III [Elektronische Ressource] : Allegro / Ludwig van Beethoven (1770-1827)', + 'id': '1077188552', + 'ext': 'mp3', + 'track': 'Track 1', + 'track_number': 1 + } + }] + + @staticmethod + def update_and_return_dic(info_dict, update_info): + ret = info_dict.copy() + ret.update(update_info) + return ret + + def _real_extract(self, url): + video_id = self._match_id(url) + url = 'https://portal.dnb.de/audioplayer/do/show/' + video_id + webpage = self._download_webpage(url, video_id) + + m = re.search(r'fdnbpl.media\s*=\s*(\[.*\]);', webpage) + objs = json.loads(m.group(1)) + + result = [] + num = 1 + for obj in objs: + thumbnail = obj.get('cover_url') + if thumbnail: + thumbnail = 'https://portal.dnb.de/' + thumbnail + + info_dict = { + 'id': obj.get('idn'), + 'title': obj.get('title'), + 'author': obj.get('author'), + 'url': 'https://portal.dnb.de/' + obj.get('media_url'), + 'ext': 'mp3', + 'thumbnail': thumbnail + } + + tracks = [type(self).update_and_return_dic(info_dict, + { + 'track': ti[1].get('title'), + 'track_number': ti[0] + }) + for ti in enumerate(obj.get('tracks'), start=num)] + result.extend(tracks) + num += len(tracks) + + return { + '_type': 'playlist', + 'entries': result + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d72f52e36..9ae771ba8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -274,6 +274,7 @@ from .democracynow import DemocracynowIE from .dfb import DFBIE from .dhm import DHMIE from .digg import DiggIE +from .dnb import DNBIE from .dotsub import DotsubIE from .douyutv import ( DouyuShowIE, From db2672eb1120dce173d98f40fb10b2f5cd087374 Mon Sep 17 00:00:00 2001 From: user706 <39215612+user706@users.noreply.github.com> Date: Wed, 2 Jan 2019 23:30:10 +0100 Subject: [PATCH 2/3] [dnb] fix id (can also have letters such as "X") --- youtube_dl/extractor/dnb.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dnb.py b/youtube_dl/extractor/dnb.py index ceb0fa66a..b58cebd19 100644 --- a/youtube_dl/extractor/dnb.py +++ b/youtube_dl/extractor/dnb.py @@ -7,7 +7,7 @@ import json class DNBIE(InfoExtractor): - _VALID_URL = r'https?://(?:portal\.dnb\.de/audioplayer/do/show/|d-nb\.info/)(?P[0-9]+)' + _VALID_URL = r'https?://(?:portal\.dnb\.de/audioplayer/do/show/|d-nb\.info/)(?P\w+)[/&]?' _TESTS = [{ 'url': 'https://portal.dnb.de/audioplayer/do/show/1077188552#dcId=1546432571580&p=1', 'md5': 'cdef1faf339db9978b27c70ef4c0516b', @@ -39,6 +39,7 @@ class DNBIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) url = 'https://portal.dnb.de/audioplayer/do/show/' + video_id + print('url', url) webpage = self._download_webpage(url, video_id) m = re.search(r'fdnbpl.media\s*=\s*(\[.*\]);', webpage) From 2444cadb8f81f4635f7a00b00d1b88f9e8067adb Mon Sep 17 00:00:00 2001 From: user706 <39215612+user706@users.noreply.github.com> Date: Wed, 2 Jan 2019 23:32:16 +0100 Subject: [PATCH 3/3] [dnb] remove debug print --- youtube_dl/extractor/dnb.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/dnb.py b/youtube_dl/extractor/dnb.py index b58cebd19..8c07b9d9e 100644 --- a/youtube_dl/extractor/dnb.py +++ b/youtube_dl/extractor/dnb.py @@ -39,7 +39,6 @@ class DNBIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) url = 'https://portal.dnb.de/audioplayer/do/show/' + video_id - print('url', url) webpage = self._download_webpage(url, video_id) m = re.search(r'fdnbpl.media\s*=\s*(\[.*\]);', webpage)