[youtube] download storyboards even with no dash manifest

When youtube_include_dash_manifest was set to false the storyboards wouldn't download anymore, even if these two things are completely unrelated.
2025-01-24 05:02:50 +08:00 · 2019-04-29 23:28:00 -05:00 · 2019-04-29 23:28:00 -05:00 · 5cea6a3b43
commit 5cea6a3b43
parent 089a84a81d
1 changed files with 83 additions and 78 deletions
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1608,6 +1608,86 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            playback_url, video_id, 'Marking watched',
            'Unable to mark watched', fatal=False)
    def _get_storyboards(self, video_id, video_info, video_webpage):
        storyboards = []
        # Try to extract storyboards from video_info
        player_response = video_info.get('player_response', [])
        if len(player_response) > 0 and isinstance(player_response[0], compat_str):
            player_response = self._parse_json(
                player_response[0], video_id, fatal=False)
            if player_response and 'storyboards' in player_response:
                sb_spec = [try_get(player_response,
                                   lambda x: x['storyboards']['playerStoryboardSpecRenderer']['spec'],
                                   compat_str)]
            else:
                sb_spec = []
        else:
            sb_spec = video_info.get('storyboard_spec', [])
        # Try to extract storyboards from video_webpage
        if len(sb_spec) == 0:
            sb_index = video_webpage.find('playerStoryboardSpecRenderer')
            if sb_index != -1:
                sb_spec_renderer = video_webpage[sb_index:]
                sb_str = sb_spec_renderer[sb_spec_renderer.find('{'):sb_spec_renderer.find('}') + 1]
                sb_json = self._parse_json(
                    sb_str.encode("utf-8").decode("unicode_escape"), video_id, fatal=False)
                sb_spec = [sb_json.get('spec')] if sb_json else []
        # Extract information of each storyboard
        for s in filter(None, sb_spec):
            s_parts = s.split('|')
            base_url = s_parts[0]
            i = 0
            for params in s_parts[1:]:
                storyboard_attrib = params.split('#')
                if len(storyboard_attrib) != 8:
                    self._downloader.report_warning('Unable to extract storyboard')
                    continue
                frame_width = int_or_none(storyboard_attrib[0])
                frame_height = int_or_none(storyboard_attrib[1])
                total_frames = int_or_none(storyboard_attrib[2])
                cols = int_or_none(storyboard_attrib[3])
                rows = int_or_none(storyboard_attrib[4])
                filename = storyboard_attrib[6]
                sigh = storyboard_attrib[7]
                if frame_width and frame_height and cols and rows and total_frames:
                    frames = cols * rows
                    width, height = frame_width * cols, frame_height * rows
                    n_images = int(math.ceil(total_frames / float(cols * rows)))
                else:
                    self._downloader.report_warning('Unable to extract storyboard')
                    continue
                storyboards_url = base_url.replace('$L', compat_str(i)) + '&'
                for j in range(n_images):
                    url = storyboards_url.replace('$N', filename).replace('$M', compat_str(j)) + 'sigh=' + sigh
                    if j == n_images - 1:
                        remaining_frames = total_frames % (cols * rows)
                        if remaining_frames != 0:
                            frames = remaining_frames
                            rows = int(math.ceil(float(remaining_frames) / rows))
                            height = rows * frame_height
                            if rows == 1:
                                cols = remaining_frames
                                width = cols * frame_width
                    storyboards.append({
                        'id': 'L' + compat_str(i) + '-M' + compat_str(j),
                        'width': width,
                        'height': height,
                        'cols': cols,
                        'rows': rows,
                        'frames': frames,
                        'url': url
                    })
                i += 1
        return storyboards
    @staticmethod
    def _extract_urls(webpage):
        # Embedded YouTube player
@ -1741,85 +1821,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            if dash_mpd and dash_mpd not in dash_mpds:
                dash_mpds.append(dash_mpd)
        def get_storyboards(video_info, video_webpage):
            storyboards = []
            # Try to extract storyborads from video_info
            player_response = video_info.get('player_response', [])
            if len(player_response) > 0 and isinstance(player_response[0], compat_str):
                player_response = self._parse_json(
                    player_response[0], video_id, fatal=False)
                if player_response and 'storyboards' in player_response:
                    spec = [player_response['storyboards']['playerStoryboardSpecRenderer']['spec']]
                else:
                    spec = []
            else:
                spec = video_info.get('storyboard_spec', [])
            if len(spec) == 0:
                # Try to extract storyborads from video_webpage
                sb_index = video_webpage.find('playerStoryboardSpecRenderer')
                if sb_index != -1:
                    sb_spec_renderer = video_webpage[sb_index:]
                    sb_str = sb_spec_renderer[sb_spec_renderer.find('{'):sb_spec_renderer.find('}')+1]
                    sb_json = json.loads(sb_str.encode("utf-8").decode("unicode_escape"))
                    spec = [sb_json['spec']]
            for s in spec:
                s_parts = s.split('|')
                base_url = s_parts[0]
                i = 0
                for params in s_parts[1:]:
                    storyboard_attrib = params.split('#')
                    if len(storyboard_attrib) != 8:
                        self._downloader.report_warning('Unable to extract storyboard')
                        continue
                    frame_width = int_or_none(storyboard_attrib[0])
                    frame_height = int_or_none(storyboard_attrib[1])
                    total_frames = int_or_none(storyboard_attrib[2])
                    cols = int_or_none(storyboard_attrib[3])
                    rows = int_or_none(storyboard_attrib[4])
                    filename = storyboard_attrib[6]
                    sigh = storyboard_attrib[7]
                    if frame_width and frame_height and cols and rows and total_frames:
                        frames = cols * rows
                        width, height = frame_width * cols, frame_height * rows
                        n_images = int(math.ceil(total_frames / float(cols * rows)))
                    else:
                        self._downloader.report_warning('Unable to extract storyboard')
                        continue
                    storyboards_url = base_url.replace('$L', compat_str(i)) + '&'
                    for j in range(n_images):
                        url = storyboards_url.replace('$N', filename).replace('$M', compat_str(j)) + 'sigh=' + sigh
                        if j == n_images-1:
                            remaining_frames = total_frames % (cols * rows)
                            if remaining_frames != 0:
                                frames = remaining_frames
                                rows = int(math.ceil(float(remaining_frames) / rows))
                                height = rows * frame_height
                                if rows == 1:
                                    cols = remaining_frames
                                    width = cols * frame_width
                        storyboards.append({
                            'id': 'L' + compat_str(i) + '-M' + compat_str(j),
                            'width': width,
                            'height': height,
                            'cols': cols,
                            'rows': rows,
                            'frames': frames,
                            'url': url
                        })
                    i += 1
            return storyboards
        is_live = None
        view_count = None
        storyboards = None
        def extract_view_count(v_info):
            return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
@ -1864,7 +1867,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                player_response = extract_player_response(pl_response, video_id)
                add_dash_mpd(video_info)
                view_count = extract_view_count(video_info)
                storyboards = get_storyboards(video_info)
        else:
            age_gate = False
            # Try looking directly into the video webpage
@ -2263,6 +2265,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        else:   # don't panic if we can't find it
            video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
        # storyboards
        storyboards = self._get_storyboards(video_id, video_info, video_webpage)
        # upload date
        upload_date = self._html_search_meta(
            'datePublished', video_webpage, 'upload date', default=None)