1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-11 03:37:23 +08:00

Facebook - get timestamp from tahoe if missing.

This commit is contained in:
Avi Peretz 2019-01-14 13:29:24 +02:00
parent 3b983ee471
commit 4303495ee8

View File

@ -57,7 +57,8 @@ class FacebookIE(InfoExtractor):
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36' _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s' _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary' _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=%s'
_TESTS = [{ _TESTS = [{
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
@ -222,6 +223,10 @@ class FacebookIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
# no timestamp
'url': 'https://www.facebook.com/ChickenShow1996/videos/2289288568020072/',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@ -339,6 +344,7 @@ class FacebookIE(InfoExtractor):
video_id, transform_source=js_to_json, fatal=False) video_id, transform_source=js_to_json, fatal=False)
video_data = extract_from_jsmods_instances(server_js_data) video_data = extract_from_jsmods_instances(server_js_data)
tahoe_secondary_data = ''
if not video_data: if not video_data:
if not fatal_if_no_video: if not fatal_if_no_video:
return webpage, False return webpage, False
@ -352,9 +358,7 @@ class FacebookIE(InfoExtractor):
# Video info not in first request, do a secondary request using # Video info not in first request, do a secondary request using
# tahoe player specific URL # tahoe player specific URL
tahoe_data = self._download_webpage( tahoe_request_data = urlencode_postdata({
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
data=urlencode_postdata({
'__a': 1, '__a': 1,
'__pc': self._search_regex( '__pc': self._search_regex(
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage, r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
@ -365,15 +369,29 @@ class FacebookIE(InfoExtractor):
'fb_dtsg': self._search_regex( 'fb_dtsg': self._search_regex(
r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"', r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
webpage, 'dtsg token', default=''), webpage, 'dtsg token', default=''),
}),
headers={
'Content-Type': 'application/x-www-form-urlencoded',
}) })
tahoe_request_headers = {
'Content-Type': 'application/x-www-form-urlencoded',
}
tahoe_primary_data = self._download_webpage(
self._VIDEO_PAGE_TAHOE_TEMPLATE % (video_id, 'primary'), video_id,
data=tahoe_request_data,
headers=tahoe_request_headers
)
tahoe_secondary_data = self._download_webpage(
self._VIDEO_PAGE_TAHOE_TEMPLATE % (video_id, 'secondary'), video_id,
data=tahoe_request_data,
headers=tahoe_request_headers
)
tahoe_js_data = self._parse_json( tahoe_js_data = self._parse_json(
self._search_regex( self._search_regex(
r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data, r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_primary_data,
'tahoe js data', default='{}'), 'tahoe js data', default='{}'),
video_id, fatal=False) video_id, fatal=False)
video_data = extract_from_jsmods_instances(tahoe_js_data) video_data = extract_from_jsmods_instances(tahoe_js_data)
if not video_data: if not video_data:
@ -427,7 +445,10 @@ class FacebookIE(InfoExtractor):
fatal=False) or self._og_search_title(webpage, fatal=False) fatal=False) or self._og_search_title(webpage, fatal=False)
timestamp = int_or_none(self._search_regex( timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage, r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None) or self._search_regex(
r'data-utime=\\\"(\d+)\\\"', tahoe_secondary_data,
'timestamp', default=None)) 'timestamp', default=None))
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
view_count = parse_count(self._search_regex( view_count = parse_count(self._search_regex(