From d06daf23dae24b0811be704283b4b63689035af3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Sep 2019 02:08:46 +0700 Subject: [PATCH 1/5] [YoutubeDL] Honour all --get-* options with --flat-playlist (closes #22493) --- youtube_dl/YoutubeDL.py | 62 +++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 6a44bc7ba..c3d1407f9 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -852,8 +852,9 @@ class YoutubeDL(object): extract_flat = self.params.get('extract_flat', False) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or extract_flat is True): - if self.params.get('forcejson', False): - self.to_stdout(json.dumps(ie_result)) + self.__forced_printings( + ie_result, self.prepare_filename(ie_result), + incomplete=True) return ie_result if result_type == 'video': @@ -1693,6 +1694,36 @@ class YoutubeDL(object): subs[lang] = f return subs + def __forced_printings(self, info_dict, filename, incomplete): + def print_mandatory(field): + if (self.params.get('force%s' % field, False) + and (not incomplete or info_dict.get(field) is not None)): + self.to_stdout(info_dict[field]) + + def print_optional(field): + if (self.params.get('force%s' % field, False) + and info_dict.get(field) is not None): + self.to_stdout(info_dict[field]) + + print_mandatory('title') + print_mandatory('id') + if self.params.get('forceurl', False) and not incomplete: + if info_dict.get('requested_formats') is not None: + for f in info_dict['requested_formats']: + self.to_stdout(f['url'] + f.get('play_path', '')) + else: + # For RTMP URLs, also include the playpath + self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) + print_optional('thumbnail') + print_optional('description') + if self.params.get('forcefilename', False) and filename is not None: + self.to_stdout(filename) + if self.params.get('forceduration', False) and info_dict.get('duration') is not None: + self.to_stdout(formatSeconds(info_dict['duration'])) + print_mandatory('format') + if self.params.get('forcejson', False): + self.to_stdout(json.dumps(info_dict)) + def process_info(self, info_dict): """Process a single resolved IE result.""" @@ -1703,9 +1734,8 @@ class YoutubeDL(object): if self._num_downloads >= int(max_downloads): raise MaxDownloadsReached() + # TODO: backward compatibility, to be removed info_dict['fulltitle'] = info_dict['title'] - if len(info_dict['title']) > 200: - info_dict['title'] = info_dict['title'][:197] + '...' if 'format' not in info_dict: info_dict['format'] = info_dict['ext'] @@ -1720,29 +1750,7 @@ class YoutubeDL(object): info_dict['_filename'] = filename = self.prepare_filename(info_dict) # Forced printings - if self.params.get('forcetitle', False): - self.to_stdout(info_dict['fulltitle']) - if self.params.get('forceid', False): - self.to_stdout(info_dict['id']) - if self.params.get('forceurl', False): - if info_dict.get('requested_formats') is not None: - for f in info_dict['requested_formats']: - self.to_stdout(f['url'] + f.get('play_path', '')) - else: - # For RTMP URLs, also include the playpath - self.to_stdout(info_dict['url'] + info_dict.get('play_path', '')) - if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None: - self.to_stdout(info_dict['thumbnail']) - if self.params.get('forcedescription', False) and info_dict.get('description') is not None: - self.to_stdout(info_dict['description']) - if self.params.get('forcefilename', False) and filename is not None: - self.to_stdout(filename) - if self.params.get('forceduration', False) and info_dict.get('duration') is not None: - self.to_stdout(formatSeconds(info_dict['duration'])) - if self.params.get('forceformat', False): - self.to_stdout(info_dict['format']) - if self.params.get('forcejson', False): - self.to_stdout(json.dumps(info_dict)) + self.__forced_printings(info_dict, filename, incomplete=False) # Do nothing else if in simulate mode if self.params.get('simulate', False): From df63cafe497d7530d887786d5a54ca11bf5e73db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Sep 2019 02:16:25 +0700 Subject: [PATCH 2/5] [byutv] Fix extraction (refs #22070) Downloading of new videos does not work due to DRM --- youtube_dl/extractor/byutv.py | 53 ++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 562c83af9..0b11bf11f 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -3,7 +3,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import parse_duration +from ..utils import ( + determine_ext, + merge_dicts, + parse_duration, + url_or_none, +) class BYUtvIE(InfoExtractor): @@ -51,7 +56,7 @@ class BYUtvIE(InfoExtractor): video_id = mobj.group('id') display_id = mobj.group('display_id') or video_id - info = self._download_json( + video = self._download_json( 'https://api.byutv.org/api3/catalog/getvideosforcontent', display_id, query={ 'contentid': video_id, @@ -62,7 +67,7 @@ class BYUtvIE(InfoExtractor): 'x-byutv-platformkey': 'xsaaw9c7y5', }) - ep = info.get('ooyalaVOD') + ep = video.get('ooyalaVOD') if ep: return { '_type': 'url_transparent', @@ -75,18 +80,38 @@ class BYUtvIE(InfoExtractor): 'thumbnail': ep.get('imageThumbnail'), } - ep = info['dvr'] - title = ep['title'] - formats = self._extract_m3u8_formats( - ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') + info = {} + formats = [] + for format_id, ep in video.items(): + if not isinstance(ep, dict): + continue + video_url = url_or_none(ep.get('videoUrl')) + if not video_url: + continue + ext = determine_ext(video_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'url': video_url, + 'format_id': format_id, + }) + merge_dicts(info, { + 'title': ep.get('title'), + 'description': ep.get('description'), + 'thumbnail': ep.get('imageThumbnail'), + 'duration': parse_duration(ep.get('length')), + }) self._sort_formats(formats) - return { + + return merge_dicts(info, { 'id': video_id, 'display_id': display_id, - 'title': title, - 'description': ep.get('description'), - 'thumbnail': ep.get('imageThumbnail'), - 'duration': parse_duration(ep.get('length')), + 'title': display_id, 'formats': formats, - } + }) From a373befa25bb521f94facb01c2cef45850c1e7c9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 24 Sep 2019 20:23:56 +0100 Subject: [PATCH 3/5] [nhk] fix video extraction(closes #22249)(closes #22353) --- youtube_dl/extractor/nhk.py | 4 ++-- youtube_dl/extractor/piksel.py | 14 +++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 241412f98..cce4bb472 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -60,8 +60,8 @@ class NhkVodIE(InfoExtractor): if is_video: info.update({ '_type': 'url_transparent', - 'ie_key': 'Ooyala', - 'url': 'ooyala:' + episode['vod_id'], + 'ie_key': 'Piksel', + 'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'], }) else: audio = episode['audio'] diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py index 401298cb8..88b6859b0 100644 --- a/youtube_dl/extractor/piksel.py +++ b/youtube_dl/extractor/piksel.py @@ -15,7 +15,7 @@ from ..utils import ( class PikselIE(InfoExtractor): - _VALID_URL = r'https?://player\.piksel\.com/v/(?P[a-z0-9]+)' + _VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P[a-z0-9_]+)' _TESTS = [ { 'url': 'http://player.piksel.com/v/ums2867l', @@ -40,6 +40,11 @@ class PikselIE(InfoExtractor): 'timestamp': 1486171129, 'upload_date': '20170204' } + }, + { + # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/ + 'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477', + 'only_matching': True, } ] @@ -52,8 +57,11 @@ class PikselIE(InfoExtractor): return mobj.group('url') def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r'data-de-program-uuid=[\'"]([a-z0-9]+)', + webpage, 'program uuid', default=display_id) app_token = self._search_regex([ r'clientAPI\s*:\s*"([^"]+)"', r'data-de-api-key\s*=\s*"([^"]+)"' From 21d3c21e6272d6ec089fc76461151f042f51aba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Sep 2019 02:39:25 +0700 Subject: [PATCH 4/5] [nhk] Add support for clips --- youtube_dl/extractor/nhk.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index cce4bb472..6a2c6cb7b 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -10,6 +10,18 @@ class NhkVodIE(InfoExtractor): # Content available only for a limited period of time. Visit # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. _TESTS = [{ + # clip + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', + 'md5': '256a1be14f48d960a7e61e2532d95ec3', + 'info_dict': { + 'id': 'a95j5iza', + 'ext': 'mp4', + 'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU", + 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5', + 'timestamp': 1565965194, + 'upload_date': '20190816', + }, + }, { 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', 'only_matching': True, }, { @@ -19,7 +31,7 @@ class NhkVodIE(InfoExtractor): 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/', 'only_matching': True, }] - _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sodesdlist/v7/episode/%s/%s/all%s.json' + _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json' def _real_extract(self, url): lang, m_type, episode_id = re.match(self._VALID_URL, url).groups() @@ -28,7 +40,10 @@ class NhkVodIE(InfoExtractor): is_video = m_type == 'video' episode = self._download_json( - self._API_URL_TEMPLATE % ('v' if is_video else 'r', episode_id, lang, '/all' if is_video else ''), + self._API_URL_TEMPLATE % ( + 'v' if is_video else 'r', + 'clip' if episode_id[:4] == '9999' else 'esd', + episode_id, lang, '/all' if is_video else ''), episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0] title = episode.get('sub_title_clean') or episode['sub_title'] From 33c1c7d80fd99024879a5f087b55b24374385e43 Mon Sep 17 00:00:00 2001 From: sofutru <54445344+sofutru@users.noreply.github.com> Date: Wed, 25 Sep 2019 02:43:34 +0700 Subject: [PATCH 5/5] [youtube] Add support for owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 05eea0e4e..a3364a14e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -397,6 +397,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/| (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/| (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/| + (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: