From 7ff8ad80f1442fc213a6463fa824a70d397b0745 Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Tue, 25 Dec 2018 14:29:48 +0100 Subject: [PATCH 1/8] [sverigesradio] Add extractor --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/sverigesradio.py | 105 ++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 youtube_dl/extractor/sverigesradio.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 487ef2778..26a30b4c3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1098,6 +1098,10 @@ from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE from .stv import STVPlayerIE from .sunporno import SunPornoIE +from .sverigesradio import ( + SverigesRadioEpisodeIE, + SverigesRadioPublicationIE, +) from .svt import ( SVTIE, SVTPageIE, diff --git a/youtube_dl/extractor/sverigesradio.py b/youtube_dl/extractor/sverigesradio.py new file mode 100644 index 000000000..05de31a79 --- /dev/null +++ b/youtube_dl/extractor/sverigesradio.py @@ -0,0 +1,105 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class SverigesRadioBaseIE(InfoExtractor): + _BASE_URL = 'https://sverigesradio.se/sida/playerajax' + _QUALITIES = ['high', 'medium', 'low'] + _CODING_FORMATS = { + 5: {'acodec': 'mp3', 'abr': 128}, + 11: {'acodec': 'aac', 'abr': 192}, + 12: {'acodec': 'aac', 'abr': 32}, + 13: {'acodec': 'aac', 'abr': 96}, + } + + def _extract_formats(self, query, audio_id, audio_type): + audiourls = {} + for quality in self._QUALITIES: + audiourl = self._download_json( + self._BASE_URL + '/getaudiourl', audio_id, + fatal=True, + query=dict(query, type=audio_type, quality=quality, format='iis')) + if audiourl is None: + continue + + # for some reason url can be empty, skip if so + # also skip if url has already been seen (quality parameter is ignored?) + url = audiourl.get('audioUrl') + if url is None or url == "" or url in audiourls: + continue + + audioformat = {'vcodec': 'none', 'url': url} + # add codec and bitrate if known coding format + codingformat = audiourl.get('codingFormat') + if codingformat: + audioformat.update(self._CODING_FORMATS.get(codingformat, {})) + + audiourls[url] = audioformat + + return audiourls.values() + + def _extract_audio(self, audio_type, url): + audio_id = self._match_id(url) + query = {'id': audio_id, 'type': audio_type} + + metadata = self._download_json(self._BASE_URL + '/audiometadata', audio_id, query=query) + item = metadata['items'][0] + + formats = self._extract_formats(query, audio_id, audio_type) + self._sort_formats(formats) + + return { + 'id': audio_id, + 'title': item['subtitle'], + 'formats': formats, + 'series': item.get('title'), + 'duration': int_or_none(item.get('duration')), + 'thumbnail': item.get('displayimageurl'), + 'description': item.get('description'), + } + + +class SverigesRadioPublicationIE(SverigesRadioBaseIE): + _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/sida/(?:artikel|gruppsida)\.aspx\?.*artikel=(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://sverigesradio.se/sida/artikel.aspx?programid=83&artikel=7038546', + 'md5': '6a4917e1923fccb080e5a206a5afa542', + 'info_dict': { + 'id': '7038546', + 'ext': 'm4a', + 'duration': 132, + 'series': 'Nyheter (Ekot)', + 'title': 'Esa Teittinen: Sanningen har inte kommit fram', + 'description': 'md5:daf7ce66a8f0a53d5465a5984d3839df', + 'thumbnail': 're:^https://static-cdn.sr.se/sida/images/', + }, + }, { + 'url': 'https://sverigesradio.se/sida/gruppsida.aspx?programid=3304&grupp=6247&artikel=7146887', + 'only_matching': True, + }] + + def _real_extract(self, url): + return self._extract_audio('publication', url) + + +class SverigesRadioEpisodeIE(SverigesRadioBaseIE): + _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?P[0-9]+)' + _TEST = { + 'url': 'https://sverigesradio.se/avsnitt/1140922?programid=1300', + 'md5': '20dc4d8db24228f846be390b0c59a07c', + 'info_dict': { + 'id': '1140922', + 'ext': 'mp3', + 'duration': 3307, + 'series': 'Konflikt', + 'title': 'Metoo och valen', + 'description': 'md5:fcb5c1f667f00badcc702b196f10a27e', + 'thumbnail': 're:^https://static-cdn.sr.se/sida/images/' + } + } + + def _real_extract(self, url): + return self._extract_audio('episode', url) From 280913800dff225d7171ccdbe09d7ce01fdf5d3f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 28 Apr 2019 12:03:39 +0100 Subject: [PATCH 2/8] [sverigesradio] improve extraction(closes #18635) --- youtube_dl/extractor/sverigesradio.py | 108 ++++++++++++++------------ 1 file changed, 59 insertions(+), 49 deletions(-) diff --git a/youtube_dl/extractor/sverigesradio.py b/youtube_dl/extractor/sverigesradio.py index 05de31a79..aa0691f0d 100644 --- a/youtube_dl/extractor/sverigesradio.py +++ b/youtube_dl/extractor/sverigesradio.py @@ -2,58 +2,70 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + determine_ext, + int_or_none, + str_or_none, +) class SverigesRadioBaseIE(InfoExtractor): - _BASE_URL = 'https://sverigesradio.se/sida/playerajax' - _QUALITIES = ['high', 'medium', 'low'] - _CODING_FORMATS = { - 5: {'acodec': 'mp3', 'abr': 128}, - 11: {'acodec': 'aac', 'abr': 192}, - 12: {'acodec': 'aac', 'abr': 32}, - 13: {'acodec': 'aac', 'abr': 96}, + _BASE_URL = 'https://sverigesradio.se/sida/playerajax/' + _QUALITIES = ['low', 'medium', 'high'] + _EXT_TO_CODEC_MAP = { + 'mp3': 'mp3', + 'm4a': 'aac', + } + _CODING_FORMAT_TO_ABR_MAP = { + 5: 128, + 11: 192, + 12: 32, + 13: 96, } - def _extract_formats(self, query, audio_id, audio_type): - audiourls = {} - for quality in self._QUALITIES: - audiourl = self._download_json( - self._BASE_URL + '/getaudiourl', audio_id, - fatal=True, - query=dict(query, type=audio_type, quality=quality, format='iis')) - if audiourl is None: - continue - - # for some reason url can be empty, skip if so - # also skip if url has already been seen (quality parameter is ignored?) - url = audiourl.get('audioUrl') - if url is None or url == "" or url in audiourls: - continue - - audioformat = {'vcodec': 'none', 'url': url} - # add codec and bitrate if known coding format - codingformat = audiourl.get('codingFormat') - if codingformat: - audioformat.update(self._CODING_FORMATS.get(codingformat, {})) - - audiourls[url] = audioformat - - return audiourls.values() - - def _extract_audio(self, audio_type, url): + def _real_extract(self, url): audio_id = self._match_id(url) - query = {'id': audio_id, 'type': audio_type} + query = { + 'id': audio_id, + 'type': self._AUDIO_TYPE, + } - metadata = self._download_json(self._BASE_URL + '/audiometadata', audio_id, query=query) - item = metadata['items'][0] + item = self._download_json( + self._BASE_URL + 'audiometadata', audio_id, + 'Downloading audio JSON metadata', query=query)['items'][0] + title = item['subtitle'] - formats = self._extract_formats(query, audio_id, audio_type) + query['format'] = 'iis' + urls = [] + formats = [] + for quality in self._QUALITIES: + query['quality'] = quality + audio_url_data = self._download_json( + self._BASE_URL + 'getaudiourl', audio_id, + 'Downloading %s format JSON metadata' % quality, + fatal=False, query=query) or {} + audio_url = audio_url_data.get('audioUrl') + if not audio_url or audio_url in urls: + continue + urls.append(audio_url) + ext = determine_ext(audio_url) + coding_format = audio_url_data.get('codingFormat') + abr = int_or_none(self._search_regex( + r'_a(\d+)\.m4a', audio_url, 'audio bitrate', + default=None)) or self._CODING_FORMAT_TO_ABR_MAP.get(coding_format) + formats.append({ + 'abr': abr, + 'acodec': self._EXT_TO_CODEC_MAP.get(ext), + 'ext': ext, + 'format_id': str_or_none(coding_format), + 'vcodec': 'none', + 'url': audio_url, + }) self._sort_formats(formats) return { 'id': audio_id, - 'title': item['subtitle'], + 'title': title, 'formats': formats, 'series': item.get('title'), 'duration': int_or_none(item.get('duration')), @@ -63,7 +75,8 @@ class SverigesRadioBaseIE(InfoExtractor): class SverigesRadioPublicationIE(SverigesRadioBaseIE): - _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/sida/(?:artikel|gruppsida)\.aspx\?.*artikel=(?P[0-9]+)' + IE_NAME = 'sverigesradio:publication' + _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/sida/(?:artikel|gruppsida)\.aspx\?.*?\bartikel=(?P[0-9]+)' _TESTS = [{ 'url': 'https://sverigesradio.se/sida/artikel.aspx?programid=83&artikel=7038546', 'md5': '6a4917e1923fccb080e5a206a5afa542', @@ -74,18 +87,17 @@ class SverigesRadioPublicationIE(SverigesRadioBaseIE): 'series': 'Nyheter (Ekot)', 'title': 'Esa Teittinen: Sanningen har inte kommit fram', 'description': 'md5:daf7ce66a8f0a53d5465a5984d3839df', - 'thumbnail': 're:^https://static-cdn.sr.se/sida/images/', + 'thumbnail': r're:^https?://.*\.jpg', }, }, { 'url': 'https://sverigesradio.se/sida/gruppsida.aspx?programid=3304&grupp=6247&artikel=7146887', 'only_matching': True, }] - - def _real_extract(self, url): - return self._extract_audio('publication', url) + _AUDIO_TYPE = 'publication' class SverigesRadioEpisodeIE(SverigesRadioBaseIE): + IE_NAME = 'sverigesradio:episode' _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?P[0-9]+)' _TEST = { 'url': 'https://sverigesradio.se/avsnitt/1140922?programid=1300', @@ -97,9 +109,7 @@ class SverigesRadioEpisodeIE(SverigesRadioBaseIE): 'series': 'Konflikt', 'title': 'Metoo och valen', 'description': 'md5:fcb5c1f667f00badcc702b196f10a27e', - 'thumbnail': 're:^https://static-cdn.sr.se/sida/images/' + 'thumbnail': r're:^https?://.*\.jpg', } } - - def _real_extract(self, url): - return self._extract_audio('episode', url) + _AUDIO_TYPE = 'episode' From ae8c13565eaed326179b26a91a1b0c3179eb3d07 Mon Sep 17 00:00:00 2001 From: Tobias Kunze Date: Fri, 27 Oct 2017 13:22:13 +0200 Subject: [PATCH 3/8] [ccc:playlist] Add extractor --- youtube_dl/extractor/ccc.py | 24 ++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 5 ++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py index 734702144..a4fdf74e8 100644 --- a/youtube_dl/extractor/ccc.py +++ b/youtube_dl/extractor/ccc.py @@ -75,3 +75,27 @@ class CCCIE(InfoExtractor): 'tags': event_data.get('tags'), 'formats': formats, } + + +class CCCPlaylistIE(InfoExtractor): + IE_NAME = 'media.ccc.de:lists' + _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/c/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://media.ccc.de/c/30c3', + 'info_dict': { + 'title': '30C3', + 'id': '30c3', + }, + 'playlist_count': 135, + }] + + def _real_extract(self, url): + acronym = self._match_id(url).lower() + + conf = self._download_json('https://media.ccc.de/public/conferences/' + acronym, acronym) + + return self.playlist_result( + [self.url_result(event['frontend_link']) for event in conf['events']], + acronym, + conf['title'], + ) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 26a30b4c3..c011cf981 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -177,7 +177,10 @@ from .cbsnews import ( CBSNewsLiveVideoIE, ) from .cbssports import CBSSportsIE -from .ccc import CCCIE +from .ccc import ( + CCCIE, + CCCPlaylistIE, +) from .ccma import CCMAIE from .cctv import CCTVIE from .cda import CDAIE From 24510bdcfab3e450f9e1a1b82cf7fca4183c3333 Mon Sep 17 00:00:00 2001 From: Tobias Gruetzmacher Date: Thu, 14 Mar 2019 21:30:01 +0100 Subject: [PATCH 4/8] [ccc] Extract creator --- youtube_dl/extractor/ccc.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py index a4fdf74e8..5fc473fce 100644 --- a/youtube_dl/extractor/ccc.py +++ b/youtube_dl/extractor/ccc.py @@ -1,9 +1,11 @@ +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, parse_iso8601, + try_get, ) @@ -18,15 +20,26 @@ class CCCIE(InfoExtractor): 'id': '1839', 'ext': 'mp4', 'title': 'Introduction to Processor Design', + 'creator': 'byterazor', 'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac', 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20131228', 'timestamp': 1388188800, 'duration': 3710, + 'tags': list, } }, { 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', - 'only_matching': True, + 'info_dict': { + 'id': '2835', + 'ext': 'mp4', + 'title': 'Shopshifting', + 'creator': 'Karsten Nohl, Fabian Bräunlein, dexter', + 'description': 'md5:0fade0535e9dc3076d0cbda4958a18eb', + 'upload_date': '20151227', + 'timestamp': 1451249100, + 'tags': list, + } }] def _real_extract(self, url): @@ -68,6 +81,7 @@ class CCCIE(InfoExtractor): 'id': event_id, 'display_id': display_id, 'title': event_data['title'], + 'creator': try_get(event_data, lambda x: ', '.join(x['persons'])), 'description': event_data.get('description'), 'thumbnail': event_data.get('thumb_url'), 'timestamp': parse_iso8601(event_data.get('date')), From f916abc0ac4d1dc7f3a243d791d0f57fd3848a3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 28 Apr 2019 23:05:36 +0700 Subject: [PATCH 5/8] [ccc] Improve extraction (closes #14601, closes #20355) --- youtube_dl/extractor/ccc.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py index 5fc473fce..36e6dff72 100644 --- a/youtube_dl/extractor/ccc.py +++ b/youtube_dl/extractor/ccc.py @@ -6,6 +6,7 @@ from ..utils import ( int_or_none, parse_iso8601, try_get, + url_or_none, ) @@ -30,16 +31,7 @@ class CCCIE(InfoExtractor): } }, { 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', - 'info_dict': { - 'id': '2835', - 'ext': 'mp4', - 'title': 'Shopshifting', - 'creator': 'Karsten Nohl, Fabian Bräunlein, dexter', - 'description': 'md5:0fade0535e9dc3076d0cbda4958a18eb', - 'upload_date': '20151227', - 'timestamp': 1451249100, - 'tags': list, - } + 'only_matching': True, }] def _real_extract(self, url): @@ -104,12 +96,16 @@ class CCCPlaylistIE(InfoExtractor): }] def _real_extract(self, url): - acronym = self._match_id(url).lower() + playlist_id = self._match_id(url).lower() - conf = self._download_json('https://media.ccc.de/public/conferences/' + acronym, acronym) + conf = self._download_json( + 'https://media.ccc.de/public/conferences/' + playlist_id, + playlist_id) - return self.playlist_result( - [self.url_result(event['frontend_link']) for event in conf['events']], - acronym, - conf['title'], - ) + entries = [] + for e in conf['events']: + event_url = url_or_none(e.get('frontend_link')) + if event_url: + entries.append(self.url_result(event_url, ie=CCCIE.ie_key())) + + return self.playlist_result(entries, playlist_id, conf.get('title')) From 92bc97d398cb66e4968070f9d73f02a367193c2b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 28 Apr 2019 17:37:46 +0100 Subject: [PATCH 6/8] [youtube] extract album from Music in this video section(#20301) --- youtube_dl/extractor/youtube.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 55eafb866..5e0a9e10c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -908,6 +908,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', 'track': 'Dark Walk - Position Music', 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', + 'album': 'Position Music - Production Music Vol. 143 - Dark Walk', }, 'params': { 'skip_download': True, @@ -2161,9 +2162,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): track = extract_meta('Song') artist = extract_meta('Artist') + album = extract_meta('Album') # Youtube Music Auto-generated description - album = release_date = release_year = None + release_date = release_year = None if video_description: mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P[^·]+)·(?P[^\n]+)\n+(?P[^\n]+)(?:.+?℗\s*(?P\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P[^\n]+))?', video_description) if mobj: @@ -2171,7 +2173,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): track = mobj.group('track').strip() if not artist: artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')) - album = mobj.group('album'.strip()) + if not album: + album = mobj.group('album'.strip()) release_year = mobj.group('release_year') release_date = mobj.group('release_date') if release_date: From c464e1df2c3769969b447e80b126140880d00e67 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 28 Apr 2019 17:50:47 +0100 Subject: [PATCH 7/8] [adn] fix subtitle extraction(#12724) --- youtube_dl/extractor/adn.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index 923c351e4..c95ad2173 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -65,14 +65,15 @@ class ADNIE(InfoExtractor): if subtitle_location: enc_subtitles = self._download_webpage( urljoin(self._BASE_URL, subtitle_location), - video_id, 'Downloading subtitles data', fatal=False) + video_id, 'Downloading subtitles data', fatal=False, + headers={'Origin': 'https://animedigitalnetwork.fr'}) if not enc_subtitles: return None # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( bytes_to_intlist(compat_b64decode(enc_subtitles[24:])), - bytes_to_intlist(binascii.unhexlify(self._K + '4421de0a5f0814ba')), + bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')), bytes_to_intlist(compat_b64decode(enc_subtitles[:24])) )) subtitles_json = self._parse_json( From 6e07b5a6d53f8ab8a628177e8b40f06ec5897777 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 28 Apr 2019 18:02:41 +0100 Subject: [PATCH 8/8] [dramafever] Remove extractor(closes #20868) --- youtube_dl/extractor/dramafever.py | 266 ----------------------------- youtube_dl/extractor/extractors.py | 4 - 2 files changed, 270 deletions(-) delete mode 100644 youtube_dl/extractor/dramafever.py diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py deleted file mode 100644 index db1de699f..000000000 --- a/youtube_dl/extractor/dramafever.py +++ /dev/null @@ -1,266 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import itertools -import json - -from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_urlparse, -) -from ..utils import ( - clean_html, - ExtractorError, - int_or_none, - parse_age_limit, - parse_duration, - unified_timestamp, - url_or_none, -) - - -class DramaFeverBaseIE(InfoExtractor): - _NETRC_MACHINE = 'dramafever' - - _CONSUMER_SECRET = 'DA59dtVXYLxajktV' - - _consumer_secret = None - - def _get_consumer_secret(self): - mainjs = self._download_webpage( - 'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js', - None, 'Downloading main.js', fatal=False) - if not mainjs: - return self._CONSUMER_SECRET - return self._search_regex( - r"var\s+cs\s*=\s*'([^']+)'", mainjs, - 'consumer secret', default=self._CONSUMER_SECRET) - - def _real_initialize(self): - self._consumer_secret = self._get_consumer_secret() - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - login_form = { - 'username': username, - 'password': password, - } - - try: - response = self._download_json( - 'https://www.dramafever.com/api/users/login', None, 'Logging in', - data=json.dumps(login_form).encode('utf-8'), headers={ - 'x-consumer-key': self._consumer_secret, - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404): - response = self._parse_json( - e.cause.read().decode('utf-8'), None) - else: - raise - - # Successful login - if response.get('result') or response.get('guid') or response.get('user_guid'): - return - - errors = response.get('errors') - if errors and isinstance(errors, list): - error = errors[0] - message = error.get('message') or error['reason'] - raise ExtractorError('Unable to login: %s' % message, expected=True) - raise ExtractorError('Unable to log in') - - -class DramaFeverIE(DramaFeverBaseIE): - IE_NAME = 'dramafever' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+/[0-9]+)(?:/|$)' - _TESTS = [{ - 'url': 'https://www.dramafever.com/drama/4274/1/Heirs/', - 'info_dict': { - 'id': '4274.1', - 'ext': 'wvm', - 'title': 'Heirs - Episode 1', - 'description': 'md5:362a24ba18209f6276e032a651c50bc2', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 3783, - 'timestamp': 1381354993, - 'upload_date': '20131009', - 'series': 'Heirs', - 'season_number': 1, - 'episode': 'Episode 1', - 'episode_number': 1, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1', - 'info_dict': { - 'id': '4826.4', - 'ext': 'flv', - 'title': 'Mnet Asian Music Awards 2015', - 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91', - 'episode': 'Mnet Asian Music Awards 2015 - Part 3', - 'episode_number': 4, - 'thumbnail': r're:^https?://.*\.jpg', - 'timestamp': 1450213200, - 'upload_date': '20151215', - 'duration': 5359, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/', - 'only_matching': True, - }] - - def _call_api(self, path, video_id, note, fatal=False): - return self._download_json( - 'https://www.dramafever.com/api/5/' + path, - video_id, note=note, headers={ - 'x-consumer-key': self._consumer_secret, - }, fatal=fatal) - - def _get_subtitles(self, video_id): - subtitles = {} - subs = self._call_api( - 'video/%s/subtitles/webvtt/' % video_id, video_id, - 'Downloading subtitles JSON', fatal=False) - if not subs or not isinstance(subs, list): - return subtitles - for sub in subs: - if not isinstance(sub, dict): - continue - sub_url = url_or_none(sub.get('url')) - if not sub_url: - continue - subtitles.setdefault( - sub.get('code') or sub.get('language') or 'en', []).append({ - 'url': sub_url - }) - return subtitles - - def _real_extract(self, url): - video_id = self._match_id(url).replace('/', '.') - - series_id, episode_number = video_id.split('.') - - video = self._call_api( - 'series/%s/episodes/%s/' % (series_id, episode_number), video_id, - 'Downloading video JSON') - - formats = [] - download_assets = video.get('download_assets') - if download_assets and isinstance(download_assets, dict): - for format_id, format_dict in download_assets.items(): - if not isinstance(format_dict, dict): - continue - format_url = url_or_none(format_dict.get('url')) - if not format_url: - continue - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'filesize': int_or_none(video.get('filesize')), - }) - - stream = self._call_api( - 'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON', - fatal=False) - if stream: - stream_url = stream.get('stream_url') - if stream_url: - formats.extend(self._extract_m3u8_formats( - stream_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - self._sort_formats(formats) - - title = video.get('title') or 'Episode %s' % episode_number - description = video.get('description') - thumbnail = video.get('thumbnail') - timestamp = unified_timestamp(video.get('release_date')) - duration = parse_duration(video.get('duration')) - age_limit = parse_age_limit(video.get('tv_rating')) - series = video.get('series_title') - season_number = int_or_none(video.get('season')) - - if series: - title = '%s - %s' % (series, title) - - subtitles = self.extract_subtitles(video_id) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'age_limit': age_limit, - 'series': series, - 'season_number': season_number, - 'episode_number': int_or_none(episode_number), - 'formats': formats, - 'subtitles': subtitles, - } - - -class DramaFeverSeriesIE(DramaFeverBaseIE): - IE_NAME = 'dramafever:series' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$' - _TESTS = [{ - 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/', - 'info_dict': { - 'id': '4512', - 'title': 'Cooking with Shin', - 'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1', - }, - 'playlist_count': 4, - }, { - 'url': 'http://www.dramafever.com/drama/124/IRIS/', - 'info_dict': { - 'id': '124', - 'title': 'IRIS', - 'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862', - }, - 'playlist_count': 20, - }] - - _PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-) - - def _real_extract(self, url): - series_id = self._match_id(url) - - series = self._download_json( - 'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s' - % (self._consumer_secret, series_id), - series_id, 'Downloading series JSON')['series'][series_id] - - title = clean_html(series['name']) - description = clean_html(series.get('description') or series.get('description_short')) - - entries = [] - for page_num in itertools.count(1): - episodes = self._download_json( - 'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d' - % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num), - series_id, 'Downloading episodes JSON page #%d' % page_num) - for episode in episodes.get('value', []): - episode_url = episode.get('episode_url') - if not episode_url: - continue - entries.append(self.url_result( - compat_urlparse.urljoin(url, episode_url), - 'DramaFever', episode.get('guid'))) - if page_num == episodes['num_pages']: - break - - return self.playlist_result(entries, series_id, title, description) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c011cf981..392b1f92b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -287,10 +287,6 @@ from .dplay import ( DPlayIE, DPlayItIE, ) -from .dramafever import ( - DramaFeverIE, - DramaFeverSeriesIE, -) from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE from .drtuber import DrTuberIE