From 5cea6a3b4307aa0d5453551e64ab8a3d4d89e0f0 Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Mon, 29 Apr 2019 23:28:00 -0500 Subject: [PATCH] [youtube] download storyboards even with no dash manifest When youtube_include_dash_manifest was set to false the storyboards wouldn't download anymore, even if these two things are completely unrelated. --- youtube_dl/extractor/youtube.py | 161 ++++++++++++++++---------------- 1 file changed, 83 insertions(+), 78 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f60438e8b..909bec2a8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1608,6 +1608,86 @@ class YoutubeIE(YoutubeBaseInfoExtractor): playback_url, video_id, 'Marking watched', 'Unable to mark watched', fatal=False) + def _get_storyboards(self, video_id, video_info, video_webpage): + storyboards = [] + + # Try to extract storyboards from video_info + player_response = video_info.get('player_response', []) + if len(player_response) > 0 and isinstance(player_response[0], compat_str): + player_response = self._parse_json( + player_response[0], video_id, fatal=False) + if player_response and 'storyboards' in player_response: + sb_spec = [try_get(player_response, + lambda x: x['storyboards']['playerStoryboardSpecRenderer']['spec'], + compat_str)] + else: + sb_spec = [] + else: + sb_spec = video_info.get('storyboard_spec', []) + + # Try to extract storyboards from video_webpage + if len(sb_spec) == 0: + sb_index = video_webpage.find('playerStoryboardSpecRenderer') + if sb_index != -1: + sb_spec_renderer = video_webpage[sb_index:] + sb_str = sb_spec_renderer[sb_spec_renderer.find('{'):sb_spec_renderer.find('}') + 1] + sb_json = self._parse_json( + sb_str.encode("utf-8").decode("unicode_escape"), video_id, fatal=False) + sb_spec = [sb_json.get('spec')] if sb_json else [] + + # Extract information of each storyboard + for s in filter(None, sb_spec): + s_parts = s.split('|') + base_url = s_parts[0] + i = 0 + for params in s_parts[1:]: + storyboard_attrib = params.split('#') + if len(storyboard_attrib) != 8: + self._downloader.report_warning('Unable to extract storyboard') + continue + + frame_width = int_or_none(storyboard_attrib[0]) + frame_height = int_or_none(storyboard_attrib[1]) + total_frames = int_or_none(storyboard_attrib[2]) + cols = int_or_none(storyboard_attrib[3]) + rows = int_or_none(storyboard_attrib[4]) + filename = storyboard_attrib[6] + sigh = storyboard_attrib[7] + + if frame_width and frame_height and cols and rows and total_frames: + frames = cols * rows + width, height = frame_width * cols, frame_height * rows + n_images = int(math.ceil(total_frames / float(cols * rows))) + else: + self._downloader.report_warning('Unable to extract storyboard') + continue + + storyboards_url = base_url.replace('$L', compat_str(i)) + '&' + for j in range(n_images): + url = storyboards_url.replace('$N', filename).replace('$M', compat_str(j)) + 'sigh=' + sigh + if j == n_images - 1: + remaining_frames = total_frames % (cols * rows) + if remaining_frames != 0: + frames = remaining_frames + rows = int(math.ceil(float(remaining_frames) / rows)) + height = rows * frame_height + if rows == 1: + cols = remaining_frames + width = cols * frame_width + + storyboards.append({ + 'id': 'L' + compat_str(i) + '-M' + compat_str(j), + 'width': width, + 'height': height, + 'cols': cols, + 'rows': rows, + 'frames': frames, + 'url': url + }) + i += 1 + + return storyboards + @staticmethod def _extract_urls(webpage): # Embedded YouTube player @@ -1741,85 +1821,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if dash_mpd and dash_mpd not in dash_mpds: dash_mpds.append(dash_mpd) - def get_storyboards(video_info, video_webpage): - storyboards = [] - - # Try to extract storyborads from video_info - player_response = video_info.get('player_response', []) - if len(player_response) > 0 and isinstance(player_response[0], compat_str): - player_response = self._parse_json( - player_response[0], video_id, fatal=False) - if player_response and 'storyboards' in player_response: - spec = [player_response['storyboards']['playerStoryboardSpecRenderer']['spec']] - else: - spec = [] - else: - spec = video_info.get('storyboard_spec', []) - - if len(spec) == 0: - # Try to extract storyborads from video_webpage - sb_index = video_webpage.find('playerStoryboardSpecRenderer') - if sb_index != -1: - sb_spec_renderer = video_webpage[sb_index:] - sb_str = sb_spec_renderer[sb_spec_renderer.find('{'):sb_spec_renderer.find('}')+1] - sb_json = json.loads(sb_str.encode("utf-8").decode("unicode_escape")) - spec = [sb_json['spec']] - - for s in spec: - s_parts = s.split('|') - base_url = s_parts[0] - i = 0 - for params in s_parts[1:]: - storyboard_attrib = params.split('#') - if len(storyboard_attrib) != 8: - self._downloader.report_warning('Unable to extract storyboard') - continue - - frame_width = int_or_none(storyboard_attrib[0]) - frame_height = int_or_none(storyboard_attrib[1]) - total_frames = int_or_none(storyboard_attrib[2]) - cols = int_or_none(storyboard_attrib[3]) - rows = int_or_none(storyboard_attrib[4]) - filename = storyboard_attrib[6] - sigh = storyboard_attrib[7] - - if frame_width and frame_height and cols and rows and total_frames: - frames = cols * rows - width, height = frame_width * cols, frame_height * rows - n_images = int(math.ceil(total_frames / float(cols * rows))) - else: - self._downloader.report_warning('Unable to extract storyboard') - continue - - storyboards_url = base_url.replace('$L', compat_str(i)) + '&' - for j in range(n_images): - url = storyboards_url.replace('$N', filename).replace('$M', compat_str(j)) + 'sigh=' + sigh - if j == n_images-1: - remaining_frames = total_frames % (cols * rows) - if remaining_frames != 0: - frames = remaining_frames - rows = int(math.ceil(float(remaining_frames) / rows)) - height = rows * frame_height - if rows == 1: - cols = remaining_frames - width = cols * frame_width - - storyboards.append({ - 'id': 'L' + compat_str(i) + '-M' + compat_str(j), - 'width': width, - 'height': height, - 'cols': cols, - 'rows': rows, - 'frames': frames, - 'url': url - }) - i += 1 - - return storyboards - is_live = None view_count = None - storyboards = None def extract_view_count(v_info): return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) @@ -1864,7 +1867,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_response = extract_player_response(pl_response, video_id) add_dash_mpd(video_info) view_count = extract_view_count(video_info) - storyboards = get_storyboards(video_info) else: age_gate = False # Try looking directly into the video webpage @@ -2263,6 +2265,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: # don't panic if we can't find it video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0]) + # storyboards + storyboards = self._get_storyboards(video_id, video_info, video_webpage) + # upload date upload_date = self._html_search_meta( 'datePublished', video_webpage, 'upload date', default=None)