mirror of
https://github.com/l1ving/youtube-dl
synced 2025-03-07 07:07:15 +08:00
Update youtube.py
This commit is contained in:
parent
5e79527881
commit
b179aa1496
@ -70,14 +70,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
|
||||
|
||||
_YOUTUBE_CLIENT_HEADERS = {
|
||||
'x-youtube-client-name': '1',
|
||||
'x-youtube-client-version': '1.20200609.04.02',
|
||||
}
|
||||
|
||||
def _set_language(self):
|
||||
self._set_cookie(
|
||||
'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
|
||||
'.youtube.com', 'PREF', 'f1=50000000&hl=en',
|
||||
# YouTube sets the expire time to about two months
|
||||
expire_time=time.time() + 2 * 30 * 24 * 3600)
|
||||
|
||||
@ -303,11 +298,10 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
# Downloading page may result in intermittent 5xx HTTP error
|
||||
# that is usually worked around with a retry
|
||||
more = self._download_json(
|
||||
'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
|
||||
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
|
||||
'Downloading page #%s%s'
|
||||
% (page_num, ' (retry #%d)' % count if count else ''),
|
||||
transform_source=uppercase_escape,
|
||||
headers=self._YOUTUBE_CLIENT_HEADERS)
|
||||
transform_source=uppercase_escape)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
|
||||
@ -1384,7 +1378,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
funcname = self._search_regex(
|
||||
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||
r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||
# Obsolete patterns
|
||||
r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
@ -1658,63 +1652,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_id = mobj.group(2)
|
||||
return video_id
|
||||
|
||||
def _extract_chapters_from_json(self, webpage, video_id, duration):
|
||||
if not webpage:
|
||||
return
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage,
|
||||
'player args', default='{}'),
|
||||
video_id, fatal=False)
|
||||
if not player or not isinstance(player, dict):
|
||||
return
|
||||
watch_next_response = player.get('watch_next_response')
|
||||
if not isinstance(watch_next_response, compat_str):
|
||||
return
|
||||
response = self._parse_json(watch_next_response, video_id, fatal=False)
|
||||
if not response or not isinstance(response, dict):
|
||||
return
|
||||
chapters_list = try_get(
|
||||
response,
|
||||
lambda x: x['playerOverlays']
|
||||
['playerOverlayRenderer']
|
||||
['decoratedPlayerBarRenderer']
|
||||
['decoratedPlayerBarRenderer']
|
||||
['playerBar']
|
||||
['chapteredPlayerBarRenderer']
|
||||
['chapters'],
|
||||
list)
|
||||
if not chapters_list:
|
||||
return
|
||||
|
||||
def chapter_time(chapter):
|
||||
return float_or_none(
|
||||
try_get(
|
||||
chapter,
|
||||
lambda x: x['chapterRenderer']['timeRangeStartMillis'],
|
||||
int),
|
||||
scale=1000)
|
||||
chapters = []
|
||||
for next_num, chapter in enumerate(chapters_list, start=1):
|
||||
start_time = chapter_time(chapter)
|
||||
if start_time is None:
|
||||
continue
|
||||
end_time = (chapter_time(chapters_list[next_num])
|
||||
if next_num < len(chapters_list) else duration)
|
||||
if end_time is None:
|
||||
continue
|
||||
title = try_get(
|
||||
chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
|
||||
compat_str)
|
||||
chapters.append({
|
||||
'start_time': start_time,
|
||||
'end_time': end_time,
|
||||
'title': title,
|
||||
})
|
||||
return chapters
|
||||
|
||||
@staticmethod
|
||||
def _extract_chapters_from_description(description, duration):
|
||||
def _extract_chapters(description, duration):
|
||||
if not description:
|
||||
return None
|
||||
chapter_lines = re.findall(
|
||||
@ -1748,10 +1687,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
})
|
||||
return chapters
|
||||
|
||||
def _extract_chapters(self, webpage, description, video_id, duration):
|
||||
return (self._extract_chapters_from_json(webpage, video_id, duration)
|
||||
or self._extract_chapters_from_description(description, duration))
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
@ -1898,9 +1833,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
video_details = try_get(
|
||||
player_response, lambda x: x['videoDetails'], dict) or {}
|
||||
|
||||
microformat = try_get(
|
||||
player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
|
||||
|
||||
video_title = video_info.get('title', [None])[0] or video_details.get('title')
|
||||
if not video_title:
|
||||
self._downloader.report_warning('Unable to extract video title')
|
||||
@ -1930,7 +1862,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
''', replace_url, video_description)
|
||||
video_description = clean_html(video_description)
|
||||
else:
|
||||
video_description = video_details.get('shortDescription') or self._html_search_meta('description', video_webpage)
|
||||
video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
|
||||
|
||||
if not smuggled_data.get('force_singlefeed', False):
|
||||
if not self._downloader.params.get('noplaylist'):
|
||||
@ -1978,8 +1910,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
view_count = extract_view_count(video_info)
|
||||
if view_count is None and video_details:
|
||||
view_count = int_or_none(video_details.get('viewCount'))
|
||||
if view_count is None and microformat:
|
||||
view_count = int_or_none(microformat.get('viewCount'))
|
||||
|
||||
if is_live is None:
|
||||
is_live = bool_or_none(video_details.get('isLive'))
|
||||
@ -2262,8 +2192,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
||||
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
|
||||
video_webpage, 'upload date', default=None)
|
||||
if not upload_date:
|
||||
upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
video_license = self._html_search_regex(
|
||||
@ -2335,21 +2263,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
m_cat_container = self._search_regex(
|
||||
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
|
||||
video_webpage, 'categories', default=None)
|
||||
category = None
|
||||
if m_cat_container:
|
||||
category = self._html_search_regex(
|
||||
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
|
||||
default=None)
|
||||
if not category:
|
||||
category = try_get(
|
||||
microformat, lambda x: x['category'], compat_str)
|
||||
video_categories = None if category is None else [category]
|
||||
video_categories = None if category is None else [category]
|
||||
else:
|
||||
video_categories = None
|
||||
|
||||
video_tags = [
|
||||
unescapeHTML(m.group('content'))
|
||||
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
|
||||
if not video_tags:
|
||||
video_tags = try_get(video_details, lambda x: x['keywords'], list)
|
||||
|
||||
def _extract_count(count_name):
|
||||
return str_to_int(self._search_regex(
|
||||
@ -2400,7 +2324,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
errnote='Unable to download video annotations', fatal=False,
|
||||
data=urlencode_postdata({xsrf_field_name: xsrf_token}))
|
||||
|
||||
chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
|
||||
chapters = self._extract_chapters(description_original, video_duration)
|
||||
|
||||
# Look for the DASH manifest
|
||||
if self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
@ -2755,7 +2679,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
ids = []
|
||||
last_id = playlist_id[-11:]
|
||||
for n in itertools.count(1):
|
||||
url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
|
||||
url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
|
||||
new_ids = orderedSet(re.findall(
|
||||
@ -3095,7 +3019,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com user/channel playlists'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
|
||||
IE_NAME = 'youtube:playlists'
|
||||
|
||||
_TESTS = [{
|
||||
@ -3121,9 +3045,6 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
|
||||
'title': 'Chem Player',
|
||||
},
|
||||
'skip': 'Blocked',
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
@ -3268,10 +3189,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
break
|
||||
|
||||
more = self._download_json(
|
||||
'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
|
||||
'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
|
||||
'Downloading page #%s' % page_num,
|
||||
transform_source=uppercase_escape,
|
||||
headers=self._YOUTUBE_CLIENT_HEADERS)
|
||||
transform_source=uppercase_escape)
|
||||
content_html = more['content_html']
|
||||
more_widget_html = more['load_more_widget_html']
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user