1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-24 00:45:38 +08:00

[youtube] download storyboards even with no dash manifest

When youtube_include_dash_manifest was set to false the
storyboards wouldn't download anymore, even if these two
things are completely unrelated.
This commit is contained in:
Marc Abonce Seguin 2019-04-29 23:28:00 -05:00
parent 089a84a81d
commit 5cea6a3b43

View File

@ -1608,6 +1608,86 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
playback_url, video_id, 'Marking watched',
'Unable to mark watched', fatal=False)
def _get_storyboards(self, video_id, video_info, video_webpage):
storyboards = []
# Try to extract storyboards from video_info
player_response = video_info.get('player_response', [])
if len(player_response) > 0 and isinstance(player_response[0], compat_str):
player_response = self._parse_json(
player_response[0], video_id, fatal=False)
if player_response and 'storyboards' in player_response:
sb_spec = [try_get(player_response,
lambda x: x['storyboards']['playerStoryboardSpecRenderer']['spec'],
compat_str)]
else:
sb_spec = []
else:
sb_spec = video_info.get('storyboard_spec', [])
# Try to extract storyboards from video_webpage
if len(sb_spec) == 0:
sb_index = video_webpage.find('playerStoryboardSpecRenderer')
if sb_index != -1:
sb_spec_renderer = video_webpage[sb_index:]
sb_str = sb_spec_renderer[sb_spec_renderer.find('{'):sb_spec_renderer.find('}') + 1]
sb_json = self._parse_json(
sb_str.encode("utf-8").decode("unicode_escape"), video_id, fatal=False)
sb_spec = [sb_json.get('spec')] if sb_json else []
# Extract information of each storyboard
for s in filter(None, sb_spec):
s_parts = s.split('|')
base_url = s_parts[0]
i = 0
for params in s_parts[1:]:
storyboard_attrib = params.split('#')
if len(storyboard_attrib) != 8:
self._downloader.report_warning('Unable to extract storyboard')
continue
frame_width = int_or_none(storyboard_attrib[0])
frame_height = int_or_none(storyboard_attrib[1])
total_frames = int_or_none(storyboard_attrib[2])
cols = int_or_none(storyboard_attrib[3])
rows = int_or_none(storyboard_attrib[4])
filename = storyboard_attrib[6]
sigh = storyboard_attrib[7]
if frame_width and frame_height and cols and rows and total_frames:
frames = cols * rows
width, height = frame_width * cols, frame_height * rows
n_images = int(math.ceil(total_frames / float(cols * rows)))
else:
self._downloader.report_warning('Unable to extract storyboard')
continue
storyboards_url = base_url.replace('$L', compat_str(i)) + '&'
for j in range(n_images):
url = storyboards_url.replace('$N', filename).replace('$M', compat_str(j)) + 'sigh=' + sigh
if j == n_images - 1:
remaining_frames = total_frames % (cols * rows)
if remaining_frames != 0:
frames = remaining_frames
rows = int(math.ceil(float(remaining_frames) / rows))
height = rows * frame_height
if rows == 1:
cols = remaining_frames
width = cols * frame_width
storyboards.append({
'id': 'L' + compat_str(i) + '-M' + compat_str(j),
'width': width,
'height': height,
'cols': cols,
'rows': rows,
'frames': frames,
'url': url
})
i += 1
return storyboards
@staticmethod
def _extract_urls(webpage):
# Embedded YouTube player
@ -1741,85 +1821,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if dash_mpd and dash_mpd not in dash_mpds:
dash_mpds.append(dash_mpd)
def get_storyboards(video_info, video_webpage):
storyboards = []
# Try to extract storyborads from video_info
player_response = video_info.get('player_response', [])
if len(player_response) > 0 and isinstance(player_response[0], compat_str):
player_response = self._parse_json(
player_response[0], video_id, fatal=False)
if player_response and 'storyboards' in player_response:
spec = [player_response['storyboards']['playerStoryboardSpecRenderer']['spec']]
else:
spec = []
else:
spec = video_info.get('storyboard_spec', [])
if len(spec) == 0:
# Try to extract storyborads from video_webpage
sb_index = video_webpage.find('playerStoryboardSpecRenderer')
if sb_index != -1:
sb_spec_renderer = video_webpage[sb_index:]
sb_str = sb_spec_renderer[sb_spec_renderer.find('{'):sb_spec_renderer.find('}')+1]
sb_json = json.loads(sb_str.encode("utf-8").decode("unicode_escape"))
spec = [sb_json['spec']]
for s in spec:
s_parts = s.split('|')
base_url = s_parts[0]
i = 0
for params in s_parts[1:]:
storyboard_attrib = params.split('#')
if len(storyboard_attrib) != 8:
self._downloader.report_warning('Unable to extract storyboard')
continue
frame_width = int_or_none(storyboard_attrib[0])
frame_height = int_or_none(storyboard_attrib[1])
total_frames = int_or_none(storyboard_attrib[2])
cols = int_or_none(storyboard_attrib[3])
rows = int_or_none(storyboard_attrib[4])
filename = storyboard_attrib[6]
sigh = storyboard_attrib[7]
if frame_width and frame_height and cols and rows and total_frames:
frames = cols * rows
width, height = frame_width * cols, frame_height * rows
n_images = int(math.ceil(total_frames / float(cols * rows)))
else:
self._downloader.report_warning('Unable to extract storyboard')
continue
storyboards_url = base_url.replace('$L', compat_str(i)) + '&'
for j in range(n_images):
url = storyboards_url.replace('$N', filename).replace('$M', compat_str(j)) + 'sigh=' + sigh
if j == n_images-1:
remaining_frames = total_frames % (cols * rows)
if remaining_frames != 0:
frames = remaining_frames
rows = int(math.ceil(float(remaining_frames) / rows))
height = rows * frame_height
if rows == 1:
cols = remaining_frames
width = cols * frame_width
storyboards.append({
'id': 'L' + compat_str(i) + '-M' + compat_str(j),
'width': width,
'height': height,
'cols': cols,
'rows': rows,
'frames': frames,
'url': url
})
i += 1
return storyboards
is_live = None
view_count = None
storyboards = None
def extract_view_count(v_info):
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
@ -1864,7 +1867,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_response = extract_player_response(pl_response, video_id)
add_dash_mpd(video_info)
view_count = extract_view_count(video_info)
storyboards = get_storyboards(video_info)
else:
age_gate = False
# Try looking directly into the video webpage
@ -2263,6 +2265,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else: # don't panic if we can't find it
video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
# storyboards
storyboards = self._get_storyboards(video_id, video_info, video_webpage)
# upload date
upload_date = self._html_search_meta(
'datePublished', video_webpage, 'upload date', default=None)