Merge branch 'fix.25.12.2018'

2025-03-11 04:30:00 +08:00 · 2019-06-23 11:33:45 +03:00 · 2019-06-23 11:33:45 +03:00 · 7c41b6cce7
commit 7c41b6cce7
parent 64a2130341 3ba6ef6ffb
5 changed files with 60 additions and 22 deletions
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@ -243,8 +243,6 @@ class FFmpegFD(ExternalFD):
            # http://trac.ffmpeg.org/ticket/6125#comment:10
            args += ['-seekable', '1' if seekable else '0']

-        args += self._configuration_args()
-
        # start_time = info_dict.get('start_time') or 0
        # if start_time:
        #     args += ['-ss', compat_str(start_time)]
@ -312,6 +310,8 @@ class FFmpegFD(ExternalFD):

        args += ['-i', url, '-c', 'copy']
        
+        args += self._configuration_args()
+
        if self.params.get('test', False):
            args += ['-fs', compat_str(self._TEST_FILE_SIZE)]

--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -2816,7 +2816,7 @@ class InfoExtractor(object):
        """ Return a compat_cookies.SimpleCookie with the cookies for the url """
        req = sanitized_Request(url)
        self._downloader.cookiejar.add_cookie_header(req)
-        return compat_cookies.SimpleCookie(req.get_header('Cookie'))
+        return compat_cookies.SimpleCookie(str(req.get_header('Cookie')))

    def _apply_first_set_cookie_header(self, url_handle, cookie):
        """
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -431,6 +431,8 @@ class FacebookIE(InfoExtractor):
            video_title = self._html_search_regex(
                r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
                webpage, 'alternative title', default=None)
+        if not video_title:
+            video_title = self._og_search_title(webpage, default=None)
        if not video_title:
            video_title = self._html_search_meta(
                'description', webpage, 'title', default=None)
@ -438,12 +440,13 @@ class FacebookIE(InfoExtractor):
            video_title = limit_length(video_title, 80)
        else:
            video_title = 'Facebook video #%s' % video_id
+
        uploader = clean_html(get_element_by_id(
            'fbPhotoPageAuthorName', webpage)) or self._search_regex(
            r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',default=None) or \
-                   self._og_search_title(webpage, default=None) or self._search_regex(
+                   self._search_regex(
                        r'\"ownerName\":"(.+?)"', tahoe_data.secondary,
-                        'uploader_id', fatal=False)
+                        'uploader_id', fatal=False) or self._og_search_title(webpage, default=None)


        timestamp = int_or_none(self._search_regex(
@ -457,18 +460,12 @@ class FacebookIE(InfoExtractor):
            'uploader_id', default=None) or self._search_regex(
            r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary,
            'uploader_id', fatal=False)
+
        thumbnail = self._og_search_thumbnail(webpage)

-        view_count = parse_count(self._search_regex(
-            r'\bpostViewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
-            default=None) or self._search_regex(
-            r'[\'\"]postViewCount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary, 'view count',
-            default=None) or self._search_regex(
-            r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
-            default=None) or self._search_regex(
-            r'[\'\"]viewCount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary, 'view count',
-            default=None)
-        )
+        view_count = parse_count(self._extract_meta_count(['postViewCount', 'viewCount'], webpage, tahoe_data, 'likes'))
+        likes_count = parse_count(self._extract_likes(webpage, tahoe_data))
+        shares_count = parse_count(self._extract_meta_count(['sharecount'], webpage, tahoe_data, 'shares'))

        info_dict = {
            'id': video_id,
@ -480,11 +477,42 @@ class FacebookIE(InfoExtractor):
            'view_count': view_count,
            'uploader_id': uploader_id,
            'is_live': is_live,
-            'live_status': live_status
+            'live_status': live_status,
+            'like_count': likes_count,
+            'share_count': shares_count
        }

        return webpage, info_dict

+    def _extract_meta_count(self, fields, webpage, tahoe_data, name, ):
+        value = None
+
+        for f in fields:
+            if value:
+                break
+            value = self._search_regex(
+                    r'\b%s\s*:\s*["\']([\d,.]+)' % f, webpage, name,
+                    default=None
+            )
+            if value:
+                break
+
+            value = self._search_regex(
+                r'[\'\"]%s[\'\"]\s*:\s*(\d+)' % f, tahoe_data.secondary, name,
+                default=None)
+
+        return value
+
+    def _extract_likes(self, webpage, tahoe_data):
+        values = re.findall(r'\blikecount\s*:\s*["\']([\d,.]+)', webpage)
+        if values:
+            return values[-1]
+
+
+        values = re.findall(r'[\'\"]\blikecount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary)
+        if values:
+            return values[-1]
+
    def _real_extract(self, url):
        video_id = self._match_id(url)

--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@ -14,6 +14,7 @@ from ..utils import (
    remove_end,
    try_get,
    xpath_text,
+    parse_count
 )

 from .periscope import PeriscopeIE
@ -165,6 +166,7 @@ class TwitterCardIE(TwitterBaseIE):
        config = None
        formats = []
        duration = None
+        view_count = None

        urls = [url]
        if path.startswith('cards/'):
@ -239,12 +241,18 @@ class TwitterCardIE(TwitterBaseIE):
            ct0 = self._get_cookies(url).get('ct0')
            if ct0:
                headers['csrf_token'] = ct0.value
-            guest_token = self._download_json(
-                '%s/guest/activate.json' % self._API_BASE, video_id,
-                'Downloading guest token', data=b'',
-                headers=headers)['guest_token']
+            guest_token_c = self._get_cookies('http://api.twitter.com/').get('gt')
+            if not guest_token_c:
+                guest_token = self._download_json(
+                    '%s/guest/activate.json' % self._API_BASE, video_id,
+                    'Downloading guest token', data=b'',
+                    headers=headers)['guest_token']
+                self._set_cookie('api.twitter.com', 'gt', guest_token)
+            else:
+                guest_token = guest_token_c.value
+
            headers['x-guest-token'] = guest_token
-            self._set_cookie('api.twitter.com', 'gt', guest_token)
+
            config = self._download_json(
                '%s/videos/tweet/config/%s.json' % (self._API_BASE, video_id),
                video_id, headers=headers)
@ -265,6 +273,7 @@ class TwitterCardIE(TwitterBaseIE):
            title = 'Twitter web player'
            thumbnail = config.get('posterImage')
            duration = float_or_none(track.get('durationMs'), scale=1000)
+            view_count = parse_count(track.get('viewCount'))

        self._remove_duplicate_formats(formats)
        self._sort_formats(formats)
@ -275,6 +284,7 @@ class TwitterCardIE(TwitterBaseIE):
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats,
+            'view_count': view_count
        }


--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = 'vc.2019.06.08'
+__version__ = 'vc.2019.06.19'