From 3f8ec99719cc91c9ec4fe3a6b32e4930e2c27c23 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sat, 22 Jun 2019 22:54:09 +0300 Subject: [PATCH] extrace page likes. --- youtube_dl/extractor/facebook.py | 50 ++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 1b91c9036..14cb54966 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -24,6 +24,7 @@ from ..utils import ( sanitized_Request, try_get, urlencode_postdata, + update_url_query ) @@ -481,6 +482,8 @@ class FacebookIE(InfoExtractor): 'like_count': likes_count, 'share_count': shares_count } + if uploader_id: + info_dict['uploader_like_count'] = FacebookAjax(self, webpage, uploader_id).page_likes return webpage, info_dict @@ -587,6 +590,53 @@ class FacebookTahoeData: return tahoe_request_data, tahoe_request_headers +class FacebookAjax: + HOVER_URL_TEMPLATE = 'https://www.facebook.com/ajax/hovercard/user.php?id=111&fb_dtsg_ag=x&endpoint=%2Fajax%2Fhovercard%2Fuser.php%3Fid%3D111&__a=1' + + def __init__(self, extractor, page, page_id): + self._page = page + self._page_id = page_id + self._extractor = extractor + self._hover_data = None + + def _get_hover_data(self): + if self._hover_data: + data = self._hover_data + else: + data = self._extractor._download_webpage( + self._get_request_url(self._page_id), self._page_id + ) + return '' if not data else data + + @property + def hover(self): + return self._get_hover_data() + + @property + def page_likes(self): + return parse_count( + self._extractor._search_regex(r'\/span>([\d,]+) likes', self.hover, 'uploader_likes', default=None) + ) + + def _get_request_url(self, page_id): + return update_url_query(self.HOVER_URL_TEMPLATE, + { + + 'id': page_id, + 'endpoint': '/ajax/hovercard/user.php?id=%s' % page_id, + '__a': 1, + '__pc': self._extractor._search_regex( + r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', self._page, + 'pkg cohort', default='PHASED:DEFAULT'), + '__rev': self._extractor._search_regex( + r'client_revision["\']\s*:\s*(\d+),', self._page, + 'client revision', default='3944515'), + 'fb_dtsg': self._extractor._search_regex( + r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"', + self._page, 'dtsg token', default=''), + }) + + class FacebookPluginsVideoIE(InfoExtractor): _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?Phttps.+)'