diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index 923c351e4..c95ad2173 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -65,14 +65,15 @@ class ADNIE(InfoExtractor): if subtitle_location: enc_subtitles = self._download_webpage( urljoin(self._BASE_URL, subtitle_location), - video_id, 'Downloading subtitles data', fatal=False) + video_id, 'Downloading subtitles data', fatal=False, + headers={'Origin': 'https://animedigitalnetwork.fr'}) if not enc_subtitles: return None # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( bytes_to_intlist(compat_b64decode(enc_subtitles[24:])), - bytes_to_intlist(binascii.unhexlify(self._K + '4421de0a5f0814ba')), + bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')), bytes_to_intlist(compat_b64decode(enc_subtitles[:24])) )) subtitles_json = self._parse_json( diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py index 734702144..36e6dff72 100644 --- a/youtube_dl/extractor/ccc.py +++ b/youtube_dl/extractor/ccc.py @@ -1,9 +1,12 @@ +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, parse_iso8601, + try_get, + url_or_none, ) @@ -18,11 +21,13 @@ class CCCIE(InfoExtractor): 'id': '1839', 'ext': 'mp4', 'title': 'Introduction to Processor Design', + 'creator': 'byterazor', 'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac', 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20131228', 'timestamp': 1388188800, 'duration': 3710, + 'tags': list, } }, { 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', @@ -68,6 +73,7 @@ class CCCIE(InfoExtractor): 'id': event_id, 'display_id': display_id, 'title': event_data['title'], + 'creator': try_get(event_data, lambda x: ', '.join(x['persons'])), 'description': event_data.get('description'), 'thumbnail': event_data.get('thumb_url'), 'timestamp': parse_iso8601(event_data.get('date')), @@ -75,3 +81,31 @@ class CCCIE(InfoExtractor): 'tags': event_data.get('tags'), 'formats': formats, } + + +class CCCPlaylistIE(InfoExtractor): + IE_NAME = 'media.ccc.de:lists' + _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/c/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://media.ccc.de/c/30c3', + 'info_dict': { + 'title': '30C3', + 'id': '30c3', + }, + 'playlist_count': 135, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url).lower() + + conf = self._download_json( + 'https://media.ccc.de/public/conferences/' + playlist_id, + playlist_id) + + entries = [] + for e in conf['events']: + event_url = url_or_none(e.get('frontend_link')) + if event_url: + entries.append(self.url_result(event_url, ie=CCCIE.ie_key())) + + return self.playlist_result(entries, playlist_id, conf.get('title')) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py deleted file mode 100644 index db1de699f..000000000 --- a/youtube_dl/extractor/dramafever.py +++ /dev/null @@ -1,266 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import itertools -import json - -from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_urlparse, -) -from ..utils import ( - clean_html, - ExtractorError, - int_or_none, - parse_age_limit, - parse_duration, - unified_timestamp, - url_or_none, -) - - -class DramaFeverBaseIE(InfoExtractor): - _NETRC_MACHINE = 'dramafever' - - _CONSUMER_SECRET = 'DA59dtVXYLxajktV' - - _consumer_secret = None - - def _get_consumer_secret(self): - mainjs = self._download_webpage( - 'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js', - None, 'Downloading main.js', fatal=False) - if not mainjs: - return self._CONSUMER_SECRET - return self._search_regex( - r"var\s+cs\s*=\s*'([^']+)'", mainjs, - 'consumer secret', default=self._CONSUMER_SECRET) - - def _real_initialize(self): - self._consumer_secret = self._get_consumer_secret() - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - login_form = { - 'username': username, - 'password': password, - } - - try: - response = self._download_json( - 'https://www.dramafever.com/api/users/login', None, 'Logging in', - data=json.dumps(login_form).encode('utf-8'), headers={ - 'x-consumer-key': self._consumer_secret, - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404): - response = self._parse_json( - e.cause.read().decode('utf-8'), None) - else: - raise - - # Successful login - if response.get('result') or response.get('guid') or response.get('user_guid'): - return - - errors = response.get('errors') - if errors and isinstance(errors, list): - error = errors[0] - message = error.get('message') or error['reason'] - raise ExtractorError('Unable to login: %s' % message, expected=True) - raise ExtractorError('Unable to log in') - - -class DramaFeverIE(DramaFeverBaseIE): - IE_NAME = 'dramafever' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+/[0-9]+)(?:/|$)' - _TESTS = [{ - 'url': 'https://www.dramafever.com/drama/4274/1/Heirs/', - 'info_dict': { - 'id': '4274.1', - 'ext': 'wvm', - 'title': 'Heirs - Episode 1', - 'description': 'md5:362a24ba18209f6276e032a651c50bc2', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 3783, - 'timestamp': 1381354993, - 'upload_date': '20131009', - 'series': 'Heirs', - 'season_number': 1, - 'episode': 'Episode 1', - 'episode_number': 1, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1', - 'info_dict': { - 'id': '4826.4', - 'ext': 'flv', - 'title': 'Mnet Asian Music Awards 2015', - 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91', - 'episode': 'Mnet Asian Music Awards 2015 - Part 3', - 'episode_number': 4, - 'thumbnail': r're:^https?://.*\.jpg', - 'timestamp': 1450213200, - 'upload_date': '20151215', - 'duration': 5359, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/', - 'only_matching': True, - }] - - def _call_api(self, path, video_id, note, fatal=False): - return self._download_json( - 'https://www.dramafever.com/api/5/' + path, - video_id, note=note, headers={ - 'x-consumer-key': self._consumer_secret, - }, fatal=fatal) - - def _get_subtitles(self, video_id): - subtitles = {} - subs = self._call_api( - 'video/%s/subtitles/webvtt/' % video_id, video_id, - 'Downloading subtitles JSON', fatal=False) - if not subs or not isinstance(subs, list): - return subtitles - for sub in subs: - if not isinstance(sub, dict): - continue - sub_url = url_or_none(sub.get('url')) - if not sub_url: - continue - subtitles.setdefault( - sub.get('code') or sub.get('language') or 'en', []).append({ - 'url': sub_url - }) - return subtitles - - def _real_extract(self, url): - video_id = self._match_id(url).replace('/', '.') - - series_id, episode_number = video_id.split('.') - - video = self._call_api( - 'series/%s/episodes/%s/' % (series_id, episode_number), video_id, - 'Downloading video JSON') - - formats = [] - download_assets = video.get('download_assets') - if download_assets and isinstance(download_assets, dict): - for format_id, format_dict in download_assets.items(): - if not isinstance(format_dict, dict): - continue - format_url = url_or_none(format_dict.get('url')) - if not format_url: - continue - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'filesize': int_or_none(video.get('filesize')), - }) - - stream = self._call_api( - 'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON', - fatal=False) - if stream: - stream_url = stream.get('stream_url') - if stream_url: - formats.extend(self._extract_m3u8_formats( - stream_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - self._sort_formats(formats) - - title = video.get('title') or 'Episode %s' % episode_number - description = video.get('description') - thumbnail = video.get('thumbnail') - timestamp = unified_timestamp(video.get('release_date')) - duration = parse_duration(video.get('duration')) - age_limit = parse_age_limit(video.get('tv_rating')) - series = video.get('series_title') - season_number = int_or_none(video.get('season')) - - if series: - title = '%s - %s' % (series, title) - - subtitles = self.extract_subtitles(video_id) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'age_limit': age_limit, - 'series': series, - 'season_number': season_number, - 'episode_number': int_or_none(episode_number), - 'formats': formats, - 'subtitles': subtitles, - } - - -class DramaFeverSeriesIE(DramaFeverBaseIE): - IE_NAME = 'dramafever:series' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$' - _TESTS = [{ - 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/', - 'info_dict': { - 'id': '4512', - 'title': 'Cooking with Shin', - 'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1', - }, - 'playlist_count': 4, - }, { - 'url': 'http://www.dramafever.com/drama/124/IRIS/', - 'info_dict': { - 'id': '124', - 'title': 'IRIS', - 'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862', - }, - 'playlist_count': 20, - }] - - _PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-) - - def _real_extract(self, url): - series_id = self._match_id(url) - - series = self._download_json( - 'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s' - % (self._consumer_secret, series_id), - series_id, 'Downloading series JSON')['series'][series_id] - - title = clean_html(series['name']) - description = clean_html(series.get('description') or series.get('description_short')) - - entries = [] - for page_num in itertools.count(1): - episodes = self._download_json( - 'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d' - % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num), - series_id, 'Downloading episodes JSON page #%d' % page_num) - for episode in episodes.get('value', []): - episode_url = episode.get('episode_url') - if not episode_url: - continue - entries.append(self.url_result( - compat_urlparse.urljoin(url, episode_url), - 'DramaFever', episode.get('guid'))) - if page_num == episodes['num_pages']: - break - - return self.playlist_result(entries, series_id, title, description) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 487ef2778..392b1f92b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -177,7 +177,10 @@ from .cbsnews import ( CBSNewsLiveVideoIE, ) from .cbssports import CBSSportsIE -from .ccc import CCCIE +from .ccc import ( + CCCIE, + CCCPlaylistIE, +) from .ccma import CCMAIE from .cctv import CCTVIE from .cda import CDAIE @@ -284,10 +287,6 @@ from .dplay import ( DPlayIE, DPlayItIE, ) -from .dramafever import ( - DramaFeverIE, - DramaFeverSeriesIE, -) from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE from .drtuber import DrTuberIE @@ -1098,6 +1097,10 @@ from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE from .stv import STVPlayerIE from .sunporno import SunPornoIE +from .sverigesradio import ( + SverigesRadioEpisodeIE, + SverigesRadioPublicationIE, +) from .svt import ( SVTIE, SVTPageIE, diff --git a/youtube_dl/extractor/sverigesradio.py b/youtube_dl/extractor/sverigesradio.py new file mode 100644 index 000000000..aa0691f0d --- /dev/null +++ b/youtube_dl/extractor/sverigesradio.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + str_or_none, +) + + +class SverigesRadioBaseIE(InfoExtractor): + _BASE_URL = 'https://sverigesradio.se/sida/playerajax/' + _QUALITIES = ['low', 'medium', 'high'] + _EXT_TO_CODEC_MAP = { + 'mp3': 'mp3', + 'm4a': 'aac', + } + _CODING_FORMAT_TO_ABR_MAP = { + 5: 128, + 11: 192, + 12: 32, + 13: 96, + } + + def _real_extract(self, url): + audio_id = self._match_id(url) + query = { + 'id': audio_id, + 'type': self._AUDIO_TYPE, + } + + item = self._download_json( + self._BASE_URL + 'audiometadata', audio_id, + 'Downloading audio JSON metadata', query=query)['items'][0] + title = item['subtitle'] + + query['format'] = 'iis' + urls = [] + formats = [] + for quality in self._QUALITIES: + query['quality'] = quality + audio_url_data = self._download_json( + self._BASE_URL + 'getaudiourl', audio_id, + 'Downloading %s format JSON metadata' % quality, + fatal=False, query=query) or {} + audio_url = audio_url_data.get('audioUrl') + if not audio_url or audio_url in urls: + continue + urls.append(audio_url) + ext = determine_ext(audio_url) + coding_format = audio_url_data.get('codingFormat') + abr = int_or_none(self._search_regex( + r'_a(\d+)\.m4a', audio_url, 'audio bitrate', + default=None)) or self._CODING_FORMAT_TO_ABR_MAP.get(coding_format) + formats.append({ + 'abr': abr, + 'acodec': self._EXT_TO_CODEC_MAP.get(ext), + 'ext': ext, + 'format_id': str_or_none(coding_format), + 'vcodec': 'none', + 'url': audio_url, + }) + self._sort_formats(formats) + + return { + 'id': audio_id, + 'title': title, + 'formats': formats, + 'series': item.get('title'), + 'duration': int_or_none(item.get('duration')), + 'thumbnail': item.get('displayimageurl'), + 'description': item.get('description'), + } + + +class SverigesRadioPublicationIE(SverigesRadioBaseIE): + IE_NAME = 'sverigesradio:publication' + _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/sida/(?:artikel|gruppsida)\.aspx\?.*?\bartikel=(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://sverigesradio.se/sida/artikel.aspx?programid=83&artikel=7038546', + 'md5': '6a4917e1923fccb080e5a206a5afa542', + 'info_dict': { + 'id': '7038546', + 'ext': 'm4a', + 'duration': 132, + 'series': 'Nyheter (Ekot)', + 'title': 'Esa Teittinen: Sanningen har inte kommit fram', + 'description': 'md5:daf7ce66a8f0a53d5465a5984d3839df', + 'thumbnail': r're:^https?://.*\.jpg', + }, + }, { + 'url': 'https://sverigesradio.se/sida/gruppsida.aspx?programid=3304&grupp=6247&artikel=7146887', + 'only_matching': True, + }] + _AUDIO_TYPE = 'publication' + + +class SverigesRadioEpisodeIE(SverigesRadioBaseIE): + IE_NAME = 'sverigesradio:episode' + _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?P[0-9]+)' + _TEST = { + 'url': 'https://sverigesradio.se/avsnitt/1140922?programid=1300', + 'md5': '20dc4d8db24228f846be390b0c59a07c', + 'info_dict': { + 'id': '1140922', + 'ext': 'mp3', + 'duration': 3307, + 'series': 'Konflikt', + 'title': 'Metoo och valen', + 'description': 'md5:fcb5c1f667f00badcc702b196f10a27e', + 'thumbnail': r're:^https?://.*\.jpg', + } + } + _AUDIO_TYPE = 'episode' diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 55eafb866..5e0a9e10c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -908,6 +908,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', 'track': 'Dark Walk - Position Music', 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan', + 'album': 'Position Music - Production Music Vol. 143 - Dark Walk', }, 'params': { 'skip_download': True, @@ -2161,9 +2162,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): track = extract_meta('Song') artist = extract_meta('Artist') + album = extract_meta('Album') # Youtube Music Auto-generated description - album = release_date = release_year = None + release_date = release_year = None if video_description: mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P[^·]+)·(?P[^\n]+)\n+(?P[^\n]+)(?:.+?℗\s*(?P\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P[^\n]+))?', video_description) if mobj: @@ -2171,7 +2173,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): track = mobj.group('track').strip() if not artist: artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')) - album = mobj.group('album'.strip()) + if not album: + album = mobj.group('album'.strip()) release_year = mobj.group('release_year') release_date = mobj.group('release_date') if release_date: