youtube-dl/youtube_dl/extractor/facebook.py

# coding: utf-8
from __future__ import unicode_literals
import re
import socket

from .common import InfoExtractor
from ..compat import (
    compat_etree_fromstring,
    compat_http_client,
    compat_urllib_error,
    compat_urllib_parse_unquote,
    compat_urllib_parse_unquote_plus
)
from ..utils import (
    clean_html,
    error_to_compat_str,
    ExtractorError,
    get_element_by_id,
    int_or_none,
    js_to_json,
    limit_length,
    parse_count,
    sanitized_Request,
    try_get,
    urlencode_postdata,
    update_url_query,
    lowercase_escape,
    parse_iso8601,
    unescapeHTML,
)

class FacebookIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                (?:
                    https?://
                        (?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/
                        (?:[^#]*?\#!/)?
                        (?:
                            (?:
                                video/video\.php|
                                photo\.php|
                                video\.php|
                                video/embed|
                                story\.php|
                                watch
                            )\?(?:.*?)(?:v|video_id|story_fbid)=|
                            [^/]+/videos/(?:[^/]+/)?|
                            [^/]+/posts/|
                            groups/[^/]+/permalink/
                        )|
                    facebook:
                )
                (?P<id>[0-9]+)
                '''
    _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
    _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
    _NETRC_MACHINE = 'facebook'
    IE_NAME = 'facebook'

    _CHROME_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'

    _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
    _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=%s'

    _TESTS = [{
        'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
        'md5': '6a40d33c0eccbb1af76cf0485a052659',
        'info_dict': {
            'id': '637842556329505',
            'ext': 'mp4',
            'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
            'uploader': 'Tennis on Facebook',
            'upload_date': '20140908',
            'timestamp': 1410199200,
        },
        'skip': 'Requires logging in',
    }, {
        'url': 'https://www.facebook.com/video.php?v=274175099429670',
        'info_dict': {
            'id': '274175099429670',
            'ext': 'mp4',
            'title': 're:^Asif Nawab Butt posted a video',
            'uploader': 'Asif Nawab Butt',
            'upload_date': '20140506',
            'timestamp': 1399398998,
            'thumbnail': r're:^https?://.*',
        },
        'expected_warnings': [
            'title'
        ]
    }, {
        'note': 'Video with DASH manifest',
        'url': 'https://www.facebook.com/video.php?v=957955867617029',
        'md5': 'b2c28d528273b323abe5c6ab59f0f030',
        'info_dict': {
            'id': '957955867617029',
            'ext': 'mp4',
            'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
            'uploader': 'Demy de Zeeuw',
            'upload_date': '20160110',
            'timestamp': 1452431627,
        },
        'skip': 'Requires logging in',
    }, {
        'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
        'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
        'info_dict': {
            'id': '544765982287235',
            'ext': 'mp4',
            'title': '"What are you doing running in the snow?"',
            'uploader': 'FailArmy',
        },
        'skip': 'Video gone',
    }, {
        'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
        'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
        'info_dict': {
            'id': '1035862816472149',
            'ext': 'mp4',
            'title': 'What the Flock Is Going On In New Zealand  Credit: ViralHog',
            'uploader': 'S. Saint',
        },
        'skip': 'Video gone',
    }, {
        'note': 'swf params escaped',
        'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
        'md5': '97ba073838964d12c70566e0085c2b91',
        'info_dict': {
            'id': '10153664894881749',
            'ext': 'mp4',
            'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
            'thumbnail': r're:^https?://.*',
            'timestamp': 1456259628,
            'upload_date': '20160223',
            'uploader': 'Barack Obama',
        },
    }, {
        # have 1080P, but only up to 720p in swf params
        'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
        'md5': '9571fae53d4165bbbadb17a94651dcdc',
        'info_dict': {
            'id': '10155529876156509',
            'ext': 'mp4',
            'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...',
            'timestamp': 1477818095,
            'upload_date': '20161030',
            'uploader': 'CNN',
            'thumbnail': r're:^https?://.*',
            'view_count': int,
        },
    }, {
        # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
        'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
        'info_dict': {
            'id': '1417995061575415',
            'ext': 'mp4',
            'title': 'md5:1db063d6a8c13faa8da727817339c857',
            'timestamp': 1486648217,
            'upload_date': '20170209',
            'uploader': 'Yaroslav Korpan',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
        'info_dict': {
            'id': '1072691702860471',
            'ext': 'mp4',
            'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
            'timestamp': 1477305000,
            'upload_date': '20161024',
            'uploader': 'La Guía Del Varón',
            'thumbnail': r're:^https?://.*',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
        'info_dict': {
            'id': '1396382447100162',
            'ext': 'mp4',
            'title': 'md5:19a428bbde91364e3de815383b54a235',
            'timestamp': 1486035494,
            'upload_date': '20170202',
            'uploader': 'Elisabeth Ahtn',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://www.facebook.com/video.php?v=10204634152394104',
        'only_matching': True,
    }, {
        'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
        'only_matching': True,
    }, {
        'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
        'only_matching': True,
    }, {
        'url': 'facebook:544765982287235',
        'only_matching': True,
    }, {
        'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
        'only_matching': True,
    }, {
        'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
        'only_matching': True,
    }, {
        'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
        'only_matching': True,
    }, {
        # no title
        'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
        'only_matching': True,
    }, {
        'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
        'info_dict': {
            'id': '359649331226507',
            'ext': 'mp4',
            'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
            'uploader': 'ESL One Dota 2',
            'timestamp': 1527084179,
            'upload_date': '20180523',
            'uploader_id': '234218833769558',
            'is_live': False
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # no timestamp
        'url': 'https://www.facebook.com/SuperNewsGames/videos/642255722780473/',
        'info_dict': {
            'timestamp': 1521221400,
            'uploader': 'Super News Games',
            'uploader_id': '229550157384367',
            'id': '642255722780473',
            'ext': 'mp4',
            'upload_date': '20180316',
            'title': 'The Voice of Nick is trying Fortnite after 100 hours of PLAYERUNKNOWN\'S BATTL...',
        },
        'params': {
            'skip_download': True,
        },
    }]

    @staticmethod
    def _extract_urls(webpage):
        urls = []
        for mobj in re.finditer(
                r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
                webpage):
            urls.append(mobj.group('url'))
        # Facebook API embed
        # see https://developers.facebook.com/docs/plugins/embedded-video-player
        for mobj in re.finditer(r'''(?x)<div[^>]+
                class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
                data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage):
            urls.append(mobj.group('url'))
        return urls

    def _login(self):
        useremail, password = self._get_login_info()
        if useremail is None:
            return

        login_page_req = sanitized_Request(self._LOGIN_URL)
        self._set_cookie('facebook.com', 'locale', 'en_US')
        login_page = self._download_webpage(login_page_req, None,
                                            note='Downloading login page',
                                            errnote='Unable to download login page')
        lsd = self._search_regex(
            r'<input type="hidden" name="lsd" value="([^"]*)"',
            login_page, 'lsd')
        lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')

        login_form = {
            'email': useremail,
            'pass': password,
            'lsd': lsd,
            'lgnrnd': lgnrnd,
            'next': 'http://facebook.com/home.php',
            'default_persistent': '0',
            'legacy_return': '1',
            'timezone': '-60',
            'trynum': '1',
        }
        request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        try:
            login_results = self._download_webpage(request, None,
                                                   note='Logging in', errnote='unable to fetch login page')
            if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
                error = self._html_search_regex(
                    r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>',
                    login_results, 'login error', default=None, group='error')
                if error:
                    raise ExtractorError('Unable to login: %s' % error, expected=True)
                self._downloader.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
                return

            fb_dtsg = self._search_regex(
                r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None)
            h = self._search_regex(
                r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None)

            if not fb_dtsg or not h:
                return

            check_form = {
                'fb_dtsg': fb_dtsg,
                'h': h,
                'name_action_selected': 'dont_save',
            }
            check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
            check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
            check_response = self._download_webpage(check_req, None,
                                                    note='Confirming login')
            if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
                self._downloader.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err))
            return

    def _real_initialize(self):
        self._login()

    def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
        req = sanitized_Request(url)
        req.add_header('User-Agent', self._CHROME_USER_AGENT)
        webpage = self._download_webpage(req, video_id)

        video_data = None

        def extract_video_data(instances):
            for item in instances:
                if item[1][0] == 'VideoConfig':
                    video_item = item[2][0]
                    if video_item.get('video_id'):
                        return video_item['videoData']

        server_js_data = self._parse_json(self._search_regex(
            r'handleServerJS\(({.+})(?:\);|,")', webpage,
            'server js data', default='{}'), video_id, fatal=False)

        if server_js_data:
            video_data = extract_video_data(server_js_data.get('instances', []))

        def extract_from_jsmods_instances(js_data):
            if js_data:
                return extract_video_data(try_get(
                    js_data, lambda x: x['jsmods']['instances'], list) or [])

        if not video_data:
            server_js_data = self._parse_json(
                self._search_regex(
                    r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)',
                    webpage, 'js data', default='{}'),
                video_id, transform_source=js_to_json, fatal=False)
            video_data = extract_from_jsmods_instances(server_js_data)

        tahoe_data = FacebookTahoeData(self, webpage, video_id)
        if not video_data:
            tahoe_js_data = self._parse_json(
                self._search_regex(
                    r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data.primary,
                    'tahoe js data', default='{}'),
                video_id, fatal=False)

            video_data = extract_from_jsmods_instances(tahoe_js_data)

        if not video_data:
            if not fatal_if_no_video:
                return webpage, False
            m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
            if m_msg is not None:
                raise ExtractorError(
                    'The video is not available, Facebook said: "%s"' % m_msg.group(1),
                    expected=True)
            elif '>You must log in to continue' in webpage:
                self.raise_login_required()

        if not video_data:
            info_dict = self.get_from_new_ui(webpage, tahoe_data, video_id)
            if info_dict:
                return webpage, info_dict

        if not video_data:
            if self._search_regex(r'newsFeedStream.*?<h1><span class.*?>(.*?)<\/span><\/h1>', webpage, "video_title") is not None:
                self.raise_login_required()
            raise ExtractorError('Cannot parse data')

        is_scheduled = '"isScheduledLive":true' in tahoe_data.secondary
        is_live_stream = video_data[0].get('is_live_stream', False)
        is_broadcast = video_data[0].get('is_broadcast', False)

        is_live, live_status = self.extract_live_info(is_scheduled, is_live_stream, is_broadcast)


        subtitles = {}
        formats = []
        for f in video_data:
            format_id = f['stream_type']
            if f and isinstance(f, dict):
                f = [f]
            if not f or not isinstance(f, list):
                continue
            for quality in ('sd', 'hd'):
                for src_type in ('src', 'src_no_ratelimit'):
                    src = f[0].get('%s_%s' % (quality, src_type))
                    if src:
                        preference = -10 if format_id == 'progressive' else 0
                        if quality == 'hd':
                            preference += 5
                        formats.append({
                            'format_id': '%s_%s_%s' % (format_id, quality, src_type),
                            'url': src,
                            'preference': preference,
                        })
            dash_manifest = f[0].get('dash_manifest')
            if dash_manifest:
                formats.extend(self._parse_mpd_formats(
                    compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
            subtitles_src = f[0].get('subtitles_src')
            if subtitles_src:
                subtitles.setdefault('en', []).append({'url': subtitles_src})
        if not formats:
            raise ExtractorError('Cannot find video formats')

        # Downloads with browser's User-Agent are rate limited. Working around
        # with non-browser User-Agent.
        for f in formats:
            f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'

        self._sort_formats(formats)

        video_title = self._extract_video_title(webpage, tahoe_data, video_id)

        def _lowercase_escape(s):
            if s:
                return lowercase_escape(s)

        uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage)) or \
                   self._search_regex(r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',default=None) or \
                   _lowercase_escape(self._search_regex(r'\"ownerName\":"(.+?)"', tahoe_data.secondary, 'uploader_id', fatal=False)) or \
                   self._search_regex(r'ownerName"\s*:\s*"([^"]+)"', webpage, 'uploader', default=None) or \
                   self._og_search_title(webpage, default=None)

        timestamp = self._resolve_timestamp(webpage, tahoe_data)
        timestamp = parse_iso8601(timestamp)
        if timestamp is None and webpage.find('Paid Partnership') == -1 or \
                (timestamp is None and webpage.find('Paid Partnership') > -1 and 'cookiefile' in self._downloader.params):
            regex_search_result_date_time = self._search_regex(r'data-utime=\\\"(\d+)\\\"', tahoe_data.secondary, 'timestamp', default=None) \
                                            or self._search_regex(r'data-utime=\\\"(\d+)\\\"', tahoe_data.primary, 'timestamp', default=None)\
                                            or self._search_regex(r'data-utime=\\\"(\d+)\\\"', webpage, 'timestamp', default=None)\
                                            or self._search_regex(r'<abbr[^>]+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)\
                                            or self._search_regex(r'<abbr[^>]+data-utime=["\'](\d+)', tahoe_data.secondary, 'timestamp', default=None)\
                                            or self._search_regex(r'<abbr[^>]+data-utime=["\'](\d+)', tahoe_data.primary, 'timestamp', default=None)
            regex_search_result_publish_time = self._search_regex(r'publish_time&quot;:([\d]+)', webpage, 'timestamp', default=None) \
                                               or self._search_regex(r'publish_time&quot;:([\d]+)', tahoe_data.primary, 'timestamp', default=None) \
                                               or self._search_regex(r'publish_time&quot;:([\d]+)', tahoe_data.secondary, 'timestamp', default=None)
            timestamp = int_or_none(regex_search_result_date_time) or int_or_none(regex_search_result_publish_time)

        uploader_id = self._resolve_uploader_id(webpage, tahoe_data)

        thumbnail = self._resolve_thumbnail(webpage, tahoe_data)

        if is_live:
            view_count = parse_count(
                self._search_regex(r'viewerCount:([\d]+)', webpage, 'views', fatal=False) or \
                self._search_regex(r'[\'\"]viewerCount[\'\"]\s*:\s*(\d+)', tahoe_data.primary, 'views', fatal=False)
            )
        else:
            view_count = parse_count(self._extract_views(webpage, tahoe_data))

        other_posts_view_count = parse_count(self._extract_meta_count(['otherPostsViewCount'], webpage, tahoe_data, 'other_post_views'))
        likes_count = parse_count(self._extract_likes(webpage, tahoe_data))
        shares_count = parse_count(self._extract_shares(webpage, tahoe_data))
        comment_count = parse_count(self._extract_comments_count(webpage, tahoe_data))

        uploader_handle = self._resolve_uploader_handle(tahoe_data, uploader_id)

        info_dict = self.build_info_dict(webpage, tahoe_data, video_id, video_title, formats, uploader, timestamp,
                                         thumbnail, view_count, uploader_id, is_live, live_status, likes_count,
                                         shares_count, subtitles, comment_count, other_posts_view_count, uploader_handle)

        return webpage, info_dict


    def get_from_new_ui(self, webpage, tahoe_data, video_id):

        video_title = self._resolve_new_ui_title(webpage, tahoe_data, video_id)

        comments_count = self._resolve_new_ui_comments_count(webpage, tahoe_data)

        likes = parse_count(self._extract_likes(webpage, tahoe_data))

        timestamp = self._resolve_new_ui_timestamp(webpage, tahoe_data)

        uploader_json = self._search_regex(r'"author":{(.+?)}', webpage, 'uploader')
        uploader_handle, uploader = self._extract_uploader_info_new_ui(uploader_json)

        uploader_id = self._resolve_uploader_id(webpage, tahoe_data)

        post_view_counts = parse_count(self._search_regex(r'"postViewCount":(.+?),', tahoe_data.secondary, 'views'))
        other_post_view_counts = parse_count(self._search_regex(r'"otherPostsViewCount":(.+?),', tahoe_data.secondary, 'other_views'))

        share_counts = parse_count(self._search_regex(r'"sharecount":(.+?),', tahoe_data.secondary, 'other_views'))

        thumbnail = self._search_regex(r'"thumbnailUrl":"(.+?)"', webpage, 'thumbnail')

        is_live, live_status = self.resolve_new_ui_live_info(webpage, tahoe_data)

        formats = self.resolve_new_ui_format(webpage)

        info_dict = self.build_info_dict(webpage, tahoe_data, video_id, video_title, formats, uploader, timestamp,
                                         thumbnail, post_view_counts, uploader_id, is_live, live_status, likes,
                                         share_counts, {}, comments_count, other_post_view_counts,
                                         uploader_handle)

        return info_dict

    def build_info_dict(self,webpage, tahoe_data, video_id, video_title=None, formats=None, uploader=None,
                        timestamp=None, thumbnail=None, view_count=None, uploader_id=None, is_live=None, live_status=None,
                        likes_count=None, shares_count=None, subtitles=None, comment_count=None, other_posts_view_count=None,
                        uploader_handle=None):
        info_dict = {
            'id': video_id,
            'title': video_title,
            'formats': formats,
            'uploader': uploader,
            'timestamp': timestamp,
            'thumbnail': thumbnail,
            'view_count': view_count,
            'uploader_id': uploader_id,
            'is_live': is_live,
            'live_status': live_status,
            'like_count': likes_count,
            'share_count': shares_count,
            'subtitles': subtitles,
            'comment_count': comment_count,
            'other_posts_view_count': other_posts_view_count,
            'uploader_handle': uploader_handle,
            '_internal_data': {
                'page': webpage,
                'api_response_list': [tahoe_data.primary, tahoe_data.secondary]
            }
        }
        if uploader_id:
            info_dict['uploader_like_count'] = FacebookAjax(self, webpage, uploader_id).page_likes

        return info_dict

    def _resolve_uploader_handle(self, tahoe_data, uploader_id):
        uploader_handle = self._search_regex(r'"video_path":"\\\/([^\/]+)\\\/', tahoe_data.primary, 'uploader_handle',
                                             fatal=False)
        if uploader_handle == uploader_id:
            uploader_handle = self._search_regex(r'href=\\"https:\\\/\\\/www.facebook.com\\\/(.+?)\\\/\\', tahoe_data.secondary,
                                               'uploader_handle',
                                                 fatal=False)

        return uploader_handle

    def _extract_meta_count(self, fields, webpage, tahoe_data, name, ):
        value = None

        for f in fields:
            if value:
                break
            value = self._search_regex(
                    r'\b%s\s*:\s*["\']([\d,.]+)' % f, webpage, name,
                    default=None
            )
            if value:
                break

            value = self._search_regex(
                r'[\'\"]%s[\'\"]\s*:\s*(\d+)' % f, tahoe_data.secondary, name,
                default=None)

        return value

    @staticmethod
    def _extract_first_pattern(pairs):
        for pattern, data_list in pairs:
            if not isinstance(data_list, list):
                data_list = [data_list]
            for data in data_list:
                values = re.findall(pattern, data)
                if values:
                    return values[-1]

    def _extract_likes(self, webpage, tahoe_data):
        pairs = (
            (r'"reaction_count"\s*:\s*{\s*"count"\s*:\s*(\d+)', [tahoe_data.secondary, webpage]),
            (r'reaction_count:{count:([\d]+)}', webpage),
            (r'\blikecount\s*:\s*["\']([\d,.]+)', webpage),
            (r'[\'\"]\blikecount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary)
        )
        return self._extract_first_pattern(pairs)

    def _extract_shares(self, webpage, tahoe_data):
        value = self._extract_meta_count(['sharecount'], webpage, tahoe_data, 'shares')
        if value:
            return value
        a = r'(\d+\w) Views'
        values = re.findall(r'"share_count"\s*:\s*{\s*"count"\s*:\s*(\d+)', tahoe_data.secondary)
        if values:
            return values[-1]

    def _extract_comments_count(self, webpage, tahoe_data):
        value = self._extract_meta_count(['commentCount'], webpage, tahoe_data, 'comment_count')
        if value:
            return value

        values = re.findall(r'Comments\s\((\d+)', tahoe_data.secondary)
        if values:
            return values[-1]

    def _extract_views(self, webpage, tahoe_data):
        value = self._extract_meta_count(['postViewCount', 'viewCount'], webpage, tahoe_data, 'likes')
        if value:
            return value

        values = re.findall(r'(\d.\d+\w?) Views', tahoe_data.secondary)
        if values:
            return values[-1]

        values = re.findall(r'(\d+\w?) Views', tahoe_data.secondary)
        if values:
            return values[-1]

        values = re.findall(r'seen_by_count":\"(\d+)\"', tahoe_data.secondary)
        if values:
            return values[-1]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
        webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)

        if info_dict:
            return info_dict

        if '/posts/' in url:
            video_id_json = self._search_regex(
                r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video ids', group='ids',
                default='')
            if video_id_json:
                entries = [
                    self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
                    for vid in self._parse_json(video_id_json, video_id)]
                return self.playlist_result(entries, video_id)

            # Single Video?
            video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 'single video id')
            return self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
        else:
            _, info_dict = self._extract_from_url(
                self._VIDEO_PAGE_TEMPLATE % video_id,
                video_id, fatal_if_no_video=True)
            return info_dict

    def _extract_video_title(self, webpage, tahoe_data, video_id):
        video_title = self._html_search_regex(
            r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
            'title', default=None)
        if not self._valid_video_title(video_title):
            video_title = self._html_search_regex(
                r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
                webpage, 'alternative title', default=None)
        if not self._valid_video_title(video_title):
            video_title = self._og_search_title(webpage, default=None)
        if not self._valid_video_title(video_title):
            video_title = self._html_search_meta(
                'description', webpage, 'title', default=None)
        if not self._valid_video_title(video_title):
            values = re.findall(r'videoTitle"\s*:\s*"(.*?)"', tahoe_data.secondary)
            if values:
                video_title = values[-1]
        if video_title:
            video_title = limit_length(video_title, 80)
        else:
            video_title = 'Facebook video #%s' % video_id
        return video_title

    def _extract_uploader_info_new_ui(self, uploader_json):
        uploader_handle = self._search_regex(r'"name":"(.+?")', uploader_json, 'uploader')
        uploader_url = self._search_regex(r'"url":"(.+?")', uploader_json, 'uploader_url')
        uploader_url_str = uploader_url.decode("utf-8")
        uploader = uploader_url_str.split('\\/')[-2]
        return uploader_handle, uploader

    def _extract_ids_info_new_ui(self, ids_json):
        ids_json_str = ids_json.decode("utf-8")
        ids = ids_json_str.split(':')
        video_id = ids[1]
        return video_id

    def resolve_new_ui_live_info(self, webpage, tahoe_data):

        is_scheduled = '"isScheduledLive":true' in tahoe_data.secondary
        is_live_stream = self._search_regex(r'"isLiveVOD":(.+?),', tahoe_data.secondary, "vod_live")
        is_broadcast = '"isLiveBroadcast":true' in webpage

        return self.extract_live_info(is_scheduled, is_live_stream, is_broadcast)


    def extract_live_info(self, is_scheduled, is_live_stream, is_broadcast):
        live_status = 'not_live'
        if is_broadcast:
            live_status = 'completed'
        if is_live_stream:
            live_status = 'live'
        if is_scheduled:
            live_status = 'upcoming'

        is_live = live_status == 'live'

        return is_live, live_status


    def resolve_new_ui_format(self, webpage):
        format_url = self.build_format_url(webpage)
        width = parse_count(self._search_regex(r'<meta property="og:video:width" content="(.+?)"', webpage, 'width'))
        height = parse_count(self._search_regex(r'<meta property="og:video:height" content="(.+?)"', webpage, 'height'))

        formats = []
        formats.append({
            'url': format_url,
            'height': width,
            'width': height,
            'ext': 'mp4',
        })
        return formats

    def build_format_url(self, webpage):
        content_url = self._search_regex(r' content="https(.+?)"', webpage, 'url', fatal=False)
        format_url = 'https%s' % content_url
        format_url = unescapeHTML(format_url)
        return format_url

    def _resolve_uploader_id(self, webpage, tahoe_data):
        uploader_id = self._search_regex(
            r'ownerid:"([\d]+)', webpage,
            'uploader_id', default=None) or self._search_regex(
            r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary,
            'uploader_id', default=None) or \
                      self._search_regex(r'\\\"page_id\\\"\s*:\s*\\\"(\d+)\\\"', tahoe_data.secondary, 'uploader_id',
                                         fatal=False) or \
                      self._search_regex(r'content_owner_id_new\\":\\"(\d+)\\"', tahoe_data.secondary, 'uploader_id',
                                         fatal=False)
        return uploader_id

    def _resolve_timestamp(self, webpage, tahoe_data):
        timestamp = self._search_regex(
            r'datePublished":"(.+?)"', webpage, 'timestamp', default=None) \
                    or self._search_regex(r'datePublished":"(.+?)"', tahoe_data.secondary, 'timestamp', default=None) \
                    or self._search_regex(r'datePublished":"(.+?)"', tahoe_data.primary, 'timestamp', default=None)
        return timestamp

    def _resolve_new_ui_title(self, webpage, tahoe_data, video_id):
        video_title = self._search_regex(r'"headline":"(.+?")', webpage, 'title', fatal=False)
        if not video_title:
            video_title = self._search_regex(r'"pageTitle">(.+?)<', webpage, 'title', fatal=False)
        if not video_title:
            video_title = self._extract_video_title(webpage, tahoe_data, video_id)
        return video_title

    def _resolve_new_ui_comments_count(self, webpage, tahoe_data):
        comments_count = parse_count(
            self._search_regex(r'"commentCount":(.+?,)', webpage, 'comments_count', fatal=False))
        if comments_count is None:
            comments_count = parse_count(
                self._search_regex(r'"commentcount":(.+?,)', tahoe_data.secondary, 'comments_count', fatal=False))
        if comments_count is None:
            comments_count = parse_count(self._extract_comments_count(webpage, tahoe_data))
        return comments_count

    def _resolve_new_ui_timestamp(self, webpage, tahoe_data):
        timestamp = self._search_regex(r'"datePublished":"(.+?)"', webpage, 'timestamp', fatal=False)
        if not timestamp:
            timestamp = self._resolve_timestamp(webpage, tahoe_data)
        timestamp = parse_iso8601(timestamp)
        return timestamp

    def _resolve_thumbnail(self, webpage, tahoe_data):
        thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
        if not thumbnail:
            thumbnail = self._search_regex(r'"subtitles_src":"(.+?")', tahoe_data.primary, 'thumbnail', fatal=False)
        return thumbnail

    def _valid_video_title(self, video_title):
        return video_title and not u'Log In or Sign Up to View' in video_title


class FacebookTahoeData:
    def __init__(self, extractor, page, video_id):
        self._page = page
        self._video_id = video_id
        self._extractor = extractor
        self._data = {}

    def _get_data(self, data_type):
        if data_type in self._data:
            data = self._data[data_type]
        else:
            req_data, headers = self._get_request_data_and_headers()
            data = self._extractor._download_webpage(
                self._extractor._VIDEO_PAGE_TAHOE_TEMPLATE % (self._video_id, data_type), self._video_id,
                data=req_data,
                headers=headers
            )
            self._data[data_type] = data
        return '' if not data else data

    @property
    def primary(self):
        return self._get_data('primary')

    @property
    def secondary(self):
        return self._get_data('secondary')

    def _get_request_data_and_headers(self):
        tahoe_request_data = urlencode_postdata(
            {
                '__a': 1,
                '__pc': self._extractor._search_regex(
                    r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', self._page,
                    'pkg cohort', default='PHASED:DEFAULT'),
                '__rev': self._extractor._search_regex(
                    r'client_revision["\']\s*:\s*(\d+),', self._page,
                    'client revision', default='3944515'),
                'fb_dtsg': self._extractor._search_regex(
                    r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
                    self._page, 'dtsg token', default=''),
            })
        tahoe_request_headers = {
            'Content-Type': 'application/x-www-form-urlencoded',
        }

        return tahoe_request_data, tahoe_request_headers


class FacebookAjax:
    HOVER_URL_TEMPLATE = 'https://www.facebook.com/ajax/hovercard/user.php?id=111&fb_dtsg_ag=x&endpoint=%2Fajax%2Fhovercard%2Fuser.php%3Fid%3D111&__a=1'

    def __init__(self, extractor, page, page_id):
        self._page = page
        self._page_id = page_id
        self._extractor = extractor
        self._hover_data = None

    def _get_hover_data(self):
        if self._hover_data:
            data = self._hover_data
        else:
            data = self._extractor._download_webpage(
                self._get_request_url(self._page_id), self._page_id
            )
        return '' if not data else data

    @property
    def hover(self):
        return self._get_hover_data()

    @property
    def page_likes(self):
        try:
            return parse_count(
                self._extractor._search_regex(r'\/span>([\d,]+) likes', self.hover, 'uploader_likes', default=None)
            )
        except Exception as e:
            self._extractor.report_warning(self._page_id + str(e))

    def _get_request_url(self, page_id):
        return update_url_query(self.HOVER_URL_TEMPLATE,
            {

                'id': page_id,
                'endpoint': '/ajax/hovercard/user.php?id=%s' % page_id,
                '__a': 1,
                '__pc': self._extractor._search_regex(
                    r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', self._page,
                    'pkg cohort', default='PHASED:DEFAULT'),
                '__rev': self._extractor._search_regex(
                    r'client_revision["\']\s*:\s*(\d+),', self._page,
                    'client revision', default='3944515'),
                'fb_dtsg': self._extractor._search_regex(
                    r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
                    self._page, 'dtsg token', default=''),
            })


class FacebookPluginsVideoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'

    _TESTS = [{
        'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
        'md5': '5954e92cdfe51fe5782ae9bda7058a07',
        'info_dict': {
            'id': '10154383743583686',
            'ext': 'mp4',
            'title': 'What to do during the haze?',
            'uploader': 'Gov.sg',
            'upload_date': '20160826',
            'timestamp': 1472184808,
        },
        'add_ie': [FacebookIE.ie_key()],
    }, {
        'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
        'only_matching': True,
    }, {
        'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        return self.url_result(
            compat_urllib_parse_unquote(self._match_id(url)),
            FacebookIE.ie_key())
-												[facebook] Add coding cookie

											
										
										
											2017-02-11 16:18:45 +07:00
+								# coding: utf-8
-												[facebook] Modernize

											
										
										
											2014-03-04 03:36:54 +01:00
+								from __future__ import unicode_literals
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								import re
 								import socket
 								from .common import InfoExtractor
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											2014-11-02 11:23:40 +01:00
+								from ..compat import (
-												[facebook] Add support for DASH manifests

											
										
										
											2016-01-30 21:31:53 +08:00
+								    compat_etree_fromstring,
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								    compat_http_client,
 								    compat_urllib_error,
-												[facebook] Use compat_urllib_parse_unquote

											
										
										
											2015-07-17 23:37:56 +06:00
+								    compat_urllib_parse_unquote,
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
+								    compat_urllib_parse_unquote_plus
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											2014-11-02 11:23:40 +01:00
+								)
 								from ..utils import (
-												[facebook] Fix extraction (closes #11926)

											
										
										
											2017-02-01 23:15:38 +07:00
+								    clean_html,
-												Rename error_to_str to error_to_compat_str

											
										
										
											2015-12-20 07:00:39 +06:00
+								    error_to_compat_str,
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								    ExtractorError,
-												[facebook] Fix extraction (closes #11926)

											
										
										
											2017-02-01 23:15:38 +07:00
+								    get_element_by_id,
-												[facebook] Extract timestamp (Closes #10508)

											
										
										
											2016-08-31 22:12:37 +07:00
+								    int_or_none,
-												[facebook] Fix extraction (closes #11926)

											
										
										
											2017-02-01 23:15:38 +07:00
+								    js_to_json,
-												[facebook] Fix support for untitled videos (Fixes #3757)

											
										
										
											2014-09-15 15:10:24 +02:00
+								    limit_length,
-												[facebook] Extract view count and update tests (closes #16942)

											
										
										
											2018-07-19 02:25:19 +07:00
+								    parse_count,
-												Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8

											
										
										
											2015-11-21 22:18:17 +06:00
+								    sanitized_Request,
-												[facebook] Fix extraction (closes #11926)

											
										
										
											2017-02-01 23:15:38 +07:00
+								    try_get,
-												[facebook] Make thumbnail and duration optional

Fixes #4425.
Looks like both properties aren't given to us anymore. For now, just fall back to not returning them.

											
										
										
											2014-12-10 15:18:34 +01:00
+								    urlencode_postdata,
-												lowercase escape channel name.

											
										
										
											2019-11-11 16:34:57 +02:00
+								    update_url_query,
-												missing metadate fix (#319)

* missing metadate fix

* timestamp fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-24 19:00:32 +03:00
+								    lowercase_escape,
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
+								    parse_iso8601,
 								    unescapeHTML,
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								)
 								class FacebookIE(InfoExtractor):
-												[facebook] Add support for embeds

Example URL: http://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html

											
										
										
											2014-01-21 18:10:14 +01:00
+								    _VALID_URL = r'''(?x)
-												[facebook] Add shortcut and reformat _VALID_URL

											
										
										
											2016-01-25 22:15:21 +06:00
+								                (?:
 								                    https?://
-												[facebook] Recognize .onion URLs (closes #11443)

											
										
										
											2016-12-15 01:01:14 +08:00
+								                        (?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/
-												[facebook] Add shortcut and reformat _VALID_URL

											
										
										
											2016-01-25 22:15:21 +06:00
+								                        (?:[^#]*?\#!/)?
 								                        (?:
 								                            (?:
 								                                video/video\.php|
 								                                photo\.php|
 								                                video\.php|
-												[facebook] Support mobile URLs (closes #8638)

											
										
										
											2016-02-23 13:17:24 +08:00
+								                                video/embed|
-												add support for facebook watch

											
										
										
											2019-11-10 13:42:48 +02:00
+								                                story\.php|
 								                                watch
-												[facebook] Support mobile URLs (closes #8638)

											
										
										
											2016-02-23 13:17:24 +08:00
+								                            )\?(?:.*?)(?:v|video_id|story_fbid)=|
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											2016-03-10 14:33:48 +08:00
+								                            [^/]+/videos/(?:[^/]+/)?|
-												[facebook] Support videos in groups

Viewing/Downloading videos in groups requires logging in, even for
those in public groups.

Fixes #6951.

											
										
										
											2016-03-11 16:20:27 +08:00
+								                            [^/]+/posts/|
 								                            groups/[^/]+/permalink/
-												[facebook] Add shortcut and reformat _VALID_URL

											
										
										
											2016-01-25 22:15:21 +06:00
+								                        )|
 								                    facebook:
 								                )
 								                (?P<id>[0-9]+)
 								                '''
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											2013-10-27 12:07:58 +01:00
+								    _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
 								    _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								    _NETRC_MACHINE = 'facebook'
-												[facebook] Modernize

											
										
										
											2014-03-04 03:36:54 +01:00
+								    IE_NAME = 'facebook'
-												[facebook] Add support for DASH manifests

											
										
										
											2016-01-30 21:31:53 +08:00
-												update facebook user agent.

											
										
										
											2020-03-22 18:21:34 +02:00
+								    _CHROME_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
-												[facebook] Add support for DASH manifests

											
										
										
											2016-01-30 21:31:53 +08:00
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											2016-03-10 14:33:48 +08:00
+								    _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
-												Facebook - get timestamp from tahoe if missing.

											
										
										
											2019-01-14 13:29:24 +02:00
+								    _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=%s'
-												[facebook] Match video.php URLs

											
										
										
											2014-08-27 11:08:47 +02:00
+								    _TESTS = [{
-												[facebook] Fix and caption if title is empty

											
										
										
											2014-09-13 09:01:57 +02:00
+								        'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
 								        'md5': '6a40d33c0eccbb1af76cf0485a052659',
-												[facebook] Modernize

											
										
										
											2014-03-04 03:36:54 +01:00
+								        'info_dict': {
-												[facebook] Fix and caption if title is empty

											
										
										
											2014-09-13 09:01:57 +02:00
+								            'id': '637842556329505',
-												[facebook] Modernize

											
										
										
											2014-03-04 03:36:54 +01:00
+								            'ext': 'mp4',
-												[facebook] Fix test case

											
										
										
											2014-09-29 05:19:56 +02:00
+								            'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
-												[facebook] add uploader value to the tests

											
										
										
											2015-08-03 00:09:21 +01:00
+								            'uploader': 'Tennis on Facebook',
-												[facebook] Extract timestamp (Closes #10508)

											
										
										
											2016-08-31 22:12:37 +07:00
+								            'upload_date': '20140908',
 								            'timestamp': 1410199200,
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											2017-10-10 23:20:38 +08:00
+								        },
 								        'skip': 'Requires logging in',
-												[facebook] Fix support for untitled videos (Fixes #3757)

											
										
										
											2014-09-15 15:10:24 +02:00
+								    }, {
 								        'url': 'https://www.facebook.com/video.php?v=274175099429670',
 								        'info_dict': {
 								            'id': '274175099429670',
 								            'ext': 'mp4',
-												[facebook] Extract view count and update tests (closes #16942)

											
										
										
											2018-07-19 02:25:19 +07:00
+								            'title': 're:^Asif Nawab Butt posted a video',
-												[facebook] add uploader value to the tests

											
										
										
											2015-08-03 00:09:21 +01:00
+								            'uploader': 'Asif Nawab Butt',
-												[facebook] Extract timestamp (Closes #10508)

											
										
										
											2016-08-31 22:12:37 +07:00
+								            'upload_date': '20140506',
 								            'timestamp': 1399398998,
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											2017-10-10 23:20:38 +08:00
+								            'thumbnail': r're:^https?://.*',
-												[facebook] Move the title extraction warning below (fixes #5820)

											
										
										
											2015-05-26 13:37:15 +08:00
+								        },
 								        'expected_warnings': [
 								            'title'
 								        ]
-												[facebook] Add support for DASH manifests

											
										
										
											2016-01-30 21:31:53 +08:00
+								    }, {
 								        'note': 'Video with DASH manifest',
 								        'url': 'https://www.facebook.com/video.php?v=957955867617029',
-												[facebook] Extract timestamp (Closes #10508)

											
										
										
											2016-08-31 22:12:37 +07:00
+								        'md5': 'b2c28d528273b323abe5c6ab59f0f030',
-												[facebook] Add support for DASH manifests

											
										
										
											2016-01-30 21:31:53 +08:00
+								        'info_dict': {
 								            'id': '957955867617029',
 								            'ext': 'mp4',
 								            'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
 								            'uploader': 'Demy de Zeeuw',
-												[facebook] Extract timestamp (Closes #10508)

											
										
										
											2016-08-31 22:12:37 +07:00
+								            'upload_date': '20160110',
 								            'timestamp': 1452431627,
-												[facebook] Add support for DASH manifests

											
										
										
											2016-01-30 21:31:53 +08:00
+								        },
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											2017-10-10 23:20:38 +08:00
+								        'skip': 'Requires logging in',
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											2016-03-10 14:33:48 +08:00
+								    }, {
 								        'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
 								        'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
 								        'info_dict': {
 								            'id': '544765982287235',
 								            'ext': 'mp4',
 								            'title': '"What are you doing running in the snow?"',
 								            'uploader': 'FailArmy',
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											2016-10-30 18:20:55 +08:00
+								        },
 								        'skip': 'Video gone',
-												[facebook] Fix for m.facebook.com URLs

											
										
										
											2016-03-10 14:58:05 +08:00
+								    }, {
 								        'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
 								        'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
 								        'info_dict': {
 								            'id': '1035862816472149',
 								            'ext': 'mp4',
 								            'title': 'What the Flock Is Going On In New Zealand  Credit: ViralHog',
 								            'uploader': 'S. Saint',
 								        },
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											2016-10-30 18:20:55 +08:00
+								        'skip': 'Video gone',
-												[facebook] Handle escaped swf params

Fixes #8713

											
										
										
											2016-03-10 15:26:32 +08:00
+								    }, {
 								        'note': 'swf params escaped',
 								        'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
 								        'md5': '97ba073838964d12c70566e0085c2b91',
 								        'info_dict': {
 								            'id': '10153664894881749',
 								            'ext': 'mp4',
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											2017-10-10 23:20:38 +08:00
+								            'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
 								            'thumbnail': r're:^https?://.*',
 								            'timestamp': 1456259628,
 								            'upload_date': '20160223',
 								            'uploader': 'Barack Obama',
-												[facebook] Handle escaped swf params

Fixes #8713

											
										
										
											2016-03-10 15:26:32 +08:00
+								        },
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											2016-10-30 18:20:55 +08:00
+								    }, {
 								        # have 1080P, but only up to 720p in swf params
 								        'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
-												[facebook] Extract view count and update tests (closes #16942)

											
										
										
											2018-07-19 02:25:19 +07:00
+								        'md5': '9571fae53d4165bbbadb17a94651dcdc',
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											2016-10-30 18:20:55 +08:00
+								        'info_dict': {
 								            'id': '10155529876156509',
 								            'ext': 'mp4',
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											2017-10-10 23:20:38 +08:00
+								            'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...',
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											2016-10-30 18:20:55 +08:00
+								            'timestamp': 1477818095,
 								            'upload_date': '20161030',
 								            'uploader': 'CNN',
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											2017-10-10 23:20:38 +08:00
+								            'thumbnail': r're:^https?://.*',
-												[facebook] Extract view count and update tests (closes #16942)

											
										
										
											2018-07-19 02:25:19 +07:00
+								            'view_count': int,
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											2016-10-30 18:20:55 +08:00
+								        },
-												[facebook] Improve JS data regex (closes #12042)

											
										
										
											2017-02-09 23:42:40 +07:00
+								    }, {
 								        # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
 								        'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
 								        'info_dict': {
 								            'id': '1417995061575415',
 								            'ext': 'mp4',
-												[facebook] Extract view count and update tests (closes #16942)

											
										
										
											2018-07-19 02:25:19 +07:00
+								            'title': 'md5:1db063d6a8c13faa8da727817339c857',
-												[facebook] Improve JS data regex (closes #12042)

											
										
										
											2017-02-09 23:42:40 +07:00
+								            'timestamp': 1486648217,
 								            'upload_date': '20170209',
 								            'uploader': 'Yaroslav Korpan',
 								        },
 								        'params': {
 								            'skip_download': True,
 								        },
-												[facebook] Relax video id matching (closes #11017, closes #12055, closes #12056)

											
										
										
											2017-02-11 01:04:09 +07:00
+								    }, {
 								        'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
 								        'info_dict': {
 								            'id': '1072691702860471',
 								            'ext': 'mp4',
 								            'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
 								            'timestamp': 1477305000,
 								            'upload_date': '20161024',
 								            'uploader': 'La Guía Del Varón',
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											2017-10-10 23:20:38 +08:00
+								            'thumbnail': r're:^https?://.*',
-												[facebook] Relax video id matching (closes #11017, closes #12055, closes #12056)

											
										
										
											2017-02-11 01:04:09 +07:00
+								        },
 								        'params': {
 								            'skip_download': True,
 								        },
 								    }, {
 								        'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
 								        'info_dict': {
 								            'id': '1396382447100162',
 								            'ext': 'mp4',
-												[facebook] Extract view count and update tests (closes #16942)

											
										
										
											2018-07-19 02:25:19 +07:00
+								            'title': 'md5:19a428bbde91364e3de815383b54a235',
-												[facebook] Relax video id matching (closes #11017, closes #12055, closes #12056)

											
										
										
											2017-02-11 01:04:09 +07:00
+								            'timestamp': 1486035494,
 								            'upload_date': '20170202',
 								            'uploader': 'Elisabeth Ahtn',
 								        },
 								        'params': {
 								            'skip_download': True,
 								        },
-												[facebook] Match video.php URLs

											
										
										
											2014-08-27 11:08:47 +02:00
+								    }, {
 								        'url': 'https://www.facebook.com/video.php?v=10204634152394104',
 								        'only_matching': True,
-												[facebook] Extend _VALID_URL

											
										
										
											2015-04-17 00:08:52 +08:00
+								    }, {
 								        'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
 								        'only_matching': True,
-												[facebook] Extend _VALID_URL take 2 (#5120)

											
										
										
											2015-04-18 16:08:24 +08:00
+								    }, {
 								        'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
 								        'only_matching': True,
-												[facebook] Add shortcut and reformat _VALID_URL

											
										
										
											2016-01-25 22:15:21 +06:00
+								    }, {
 								        'url': 'facebook:544765982287235',
 								        'only_matching': True,
-												[facebook] Support videos in groups

Viewing/Downloading videos in groups requires logging in, even for
those in public groups.

Fixes #6951.

											
										
										
											2016-03-11 16:20:27 +08:00
+								    }, {
 								        'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
 								        'only_matching': True,
-												[facebook] Relax _VALID_URL (Closes #10151)

											
										
										
											2016-07-24 04:36:49 +07:00
+								    }, {
 								        'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
 								        'only_matching': True,
-												[facebook] Recognize .onion URLs (closes #11443)

											
										
										
											2016-12-15 01:01:14 +08:00
+								    }, {
 								        'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
 								        'only_matching': True,
-												[facebook] Make title optional (closes #12443)

											
										
										
											2017-03-14 00:37:39 +07:00
+								    }, {
 								        # no title
 								        'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
 								        'only_matching': True,
-												[facebook] Add support for tahoe player videos (closes #15441)

Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.

											
										
										
											2018-05-26 02:34:22 +10:00
+								    }, {
 								        'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
 								        'info_dict': {
 								            'id': '359649331226507',
 								            'ext': 'mp4',
 								            'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
 								            'uploader': 'ESL One Dota 2',
-												Merge branch 'mark-facebook-live-videos' into fix.25.12.2018

# Conflicts:
#	youtube_dl/extractor/facebook.py

											
										
										
											2019-04-02 17:45:43 +03:00
+								            'timestamp': 1527084179,
 								            'upload_date': '20180523',
 								            'uploader_id': '234218833769558',
-												Adding is_live to info dictionary of facebook videos

											
										
										
											2019-04-02 16:22:54 +03:00
+								            'is_live': False
-												[facebook] Add support for tahoe player videos (closes #15441)

Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.

											
										
										
											2018-05-26 02:34:22 +10:00
+								        },
 								        'params': {
 								            'skip_download': True,
 								        },
-												Facebook - get timestamp from tahoe if missing.

											
										
										
											2019-01-14 13:29:24 +02:00
+								    }, {
 								        # no timestamp
-												apply comments

											
										
										
											2019-01-16 10:41:57 +02:00
+								        'url': 'https://www.facebook.com/SuperNewsGames/videos/642255722780473/',
 								        'info_dict': {
 								            'timestamp': 1521221400,
 								            'uploader': 'Super News Games',
 								            'uploader_id': '229550157384367',
 								            'id': '642255722780473',
 								            'ext': 'mp4',
 								            'upload_date': '20180316',
 								            'title': 'The Voice of Nick is trying Fortnite after 100 hours of PLAYERUNKNOWN\'S BATTL...',
 								        },
 								        'params': {
 								            'skip_download': True,
 								        },
-												[facebook] Match video.php URLs

											
										
										
											2014-08-27 11:08:47 +02:00
+								    }]
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
-												[facebook] Improve Facebook embedded detection

Related to #9938.

Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5.

											
										
										
											2016-07-02 21:33:23 +08:00
+								    @staticmethod
-												[facebook] Add support for plugin video embeds and multiple embeds (closes #13493)

											
										
										
											2017-06-27 22:25:34 +07:00
+								    def _extract_urls(webpage):
 								        urls = []
 								        for mobj in re.finditer(
 								                r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
 								                webpage):
 								            urls.append(mobj.group('url'))
-												[facebook] Improve Facebook embedded detection

Related to #9938.

Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5.

											
										
										
											2016-07-02 21:33:23 +08:00
+								        # Facebook API embed
 								        # see https://developers.facebook.com/docs/plugins/embedded-video-player
-												[facebook] Add support for plugin video embeds and multiple embeds (closes #13493)

											
										
										
											2017-06-27 22:25:34 +07:00
+								        for mobj in re.finditer(r'''(?x)<div[^>]+
-												[facebook] Improve embed detection (#5701)

											
										
										
											2016-07-03 14:11:29 +08:00
+								                class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
-												[facebook] Add support for plugin video embeds and multiple embeds (closes #13493)

											
										
										
											2017-06-27 22:25:34 +07:00
+								                data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage):
 								            urls.append(mobj.group('url'))
 								        return urls
-												[facebook] Improve Facebook embedded detection

Related to #9938.

Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5.

											
										
										
											2016-07-02 21:33:23 +08:00
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											2013-10-27 12:07:58 +01:00
+								    def _login(self):
-												remove unnecessary assignment parenthesis

											
										
										
											2018-05-26 16:12:44 +01:00
+								        useremail, password = self._get_login_info()
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								        if useremail is None:
 								            return
-												Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8

											
										
										
											2015-11-21 22:18:17 +06:00
+								        login_page_req = sanitized_Request(self._LOGIN_URL)
-												[facebook] Fix authentication

											
										
										
											2015-12-28 21:37:02 +06:00
+								        self._set_cookie('facebook.com', 'locale', 'en_US')
-												[facebook] Fix login process

It was broken and didn't work in python 3.
And use `_download_webpage` instead of `compat_urllib_request.urlopen`.

											
										
										
											2014-03-07 15:25:33 +01:00
+								        login_page = self._download_webpage(login_page_req, None,
-												PEP8: applied even more rules

											
										
										
											2014-11-23 21:39:15 +01:00
+								                                            note='Downloading login page',
 								                                            errnote='Unable to download login page')
-												[facebook] Fix login detection (#2505)

											
										
										
											2014-03-04 03:39:04 +01:00
+								        lsd = self._search_regex(
-												[facebook] Correct regexp

											
										
										
											2014-03-04 03:39:45 +01:00
+								            r'<input type="hidden" name="lsd" value="([^"]*)"',
-												[facebook] Fix login detection (#2505)

											
										
										
											2014-03-04 03:39:04 +01:00
+								            login_page, 'lsd')
-												[facebook] Modernize

											
										
										
											2014-03-04 03:36:54 +01:00
+								        lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											2013-10-27 12:07:58 +01:00
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								        login_form = {
 								            'email': useremail,
 								            'pass': password,
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											2013-10-27 12:07:58 +01:00
+								            'lsd': lsd,
 								            'lgnrnd': lgnrnd,
 								            'next': 'http://facebook.com/home.php',
 								            'default_persistent': '0',
 								            'legacy_return': '1',
 								            'timezone': '-60',
 								            'trynum': '1',
-												Fix all PEP8 issues except E501

											
										
										
											2014-11-23 22:21:46 +01:00
+								        }
-												Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8

											
										
										
											2015-11-21 22:18:17 +06:00
+								        request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											2013-10-27 12:07:58 +01:00
+								        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								        try:
-												[facebook] Fix login process

It was broken and didn't work in python 3.
And use `_download_webpage` instead of `compat_urllib_request.urlopen`.

											
										
										
											2014-03-07 15:25:33 +01:00
+								            login_results = self._download_webpage(request, None,
-												PEP8: applied even more rules

											
										
										
											2014-11-23 21:39:15 +01:00
+								                                                   note='Logging in', errnote='unable to fetch login page')
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								            if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
-												[facebook] Extract login error

											
										
										
											2015-12-28 21:20:09 +06:00
+								                error = self._html_search_regex(
 								                    r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>',
 								                    login_results, 'login error', default=None, group='error')
 								                if error:
 								                    raise ExtractorError('Unable to login: %s' % error, expected=True)
-												Fix typos

Closes #8200.

											
										
										
											2016-01-10 16:17:47 +01:00
+								                self._downloader.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								                return
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											2013-10-27 12:07:58 +01:00
-												[facebook] Fix authentication

											
										
										
											2015-12-28 21:37:02 +06:00
+								            fb_dtsg = self._search_regex(
 								                r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None)
 								            h = self._search_regex(
 								                r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None)
 								            if not fb_dtsg or not h:
 								                return
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											2013-10-27 12:07:58 +01:00
+								            check_form = {
-												[facebook] Fix authentication

											
										
										
											2015-12-28 21:37:02 +06:00
+								                'fb_dtsg': fb_dtsg,
 								                'h': h,
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											2013-10-27 12:07:58 +01:00
+								                'name_action_selected': 'dont_save',
 								            }
-												Switch codebase to use sanitized_Request instead of
compat_urllib_request.Request

[downloader/dash] Use sanitized_Request

[downloader/http] Use sanitized_Request

[atresplayer] Use sanitized_Request

[bambuser] Use sanitized_Request

[bliptv] Use sanitized_Request

[brightcove] Use sanitized_Request

[cbs] Use sanitized_Request

[ceskatelevize] Use sanitized_Request

[collegerama] Use sanitized_Request

[extractor/common] Use sanitized_Request

[crunchyroll] Use sanitized_Request

[dailymotion] Use sanitized_Request

[dcn] Use sanitized_Request

[dramafever] Use sanitized_Request

[dumpert] Use sanitized_Request

[eitb] Use sanitized_Request

[escapist] Use sanitized_Request

[everyonesmixtape] Use sanitized_Request

[extremetube] Use sanitized_Request

[facebook] Use sanitized_Request

[fc2] Use sanitized_Request

[flickr] Use sanitized_Request

[4tube] Use sanitized_Request

[gdcvault] Use sanitized_Request

[extractor/generic] Use sanitized_Request

[hearthisat] Use sanitized_Request

[hotnewhiphop] Use sanitized_Request

[hypem] Use sanitized_Request

[iprima] Use sanitized_Request

[ivi] Use sanitized_Request

[keezmovies] Use sanitized_Request

[letv] Use sanitized_Request

[lynda] Use sanitized_Request

[metacafe] Use sanitized_Request

[minhateca] Use sanitized_Request

[miomio] Use sanitized_Request

[meovideo] Use sanitized_Request

[mofosex] Use sanitized_Request

[moniker] Use sanitized_Request

[mooshare] Use sanitized_Request

[movieclips] Use sanitized_Request

[mtv] Use sanitized_Request

[myvideo] Use sanitized_Request

[neteasemusic] Use sanitized_Request

[nfb] Use sanitized_Request

[niconico] Use sanitized_Request

[noco] Use sanitized_Request

[nosvideo] Use sanitized_Request

[novamov] Use sanitized_Request

[nowness] Use sanitized_Request

[nuvid] Use sanitized_Request

[played] Use sanitized_Request

[pluralsight] Use sanitized_Request

[pornhub] Use sanitized_Request

[pornotube] Use sanitized_Request

[primesharetv] Use sanitized_Request

[promptfile] Use sanitized_Request

[qqmusic] Use sanitized_Request

[rtve] Use sanitized_Request

[safari] Use sanitized_Request

[sandia] Use sanitized_Request

[shared] Use sanitized_Request

[sharesix] Use sanitized_Request

[sina] Use sanitized_Request

[smotri] Use sanitized_Request

[sohu] Use sanitized_Request

[spankwire] Use sanitized_Request

[sportdeutschland] Use sanitized_Request

[streamcloud] Use sanitized_Request

[streamcz] Use sanitized_Request

[tapely] Use sanitized_Request

[tube8] Use sanitized_Request

[tubitv] Use sanitized_Request

[twitch] Use sanitized_Request

[twitter] Use sanitized_Request

[udemy] Use sanitized_Request

[vbox7] Use sanitized_Request

[veoh] Use sanitized_Request

[vessel] Use sanitized_Request

[vevo] Use sanitized_Request

[viddler] Use sanitized_Request

[videomega] Use sanitized_Request

[viewvster] Use sanitized_Request

[viki] Use sanitized_Request

[vk] Use sanitized_Request

[vodlocker] Use sanitized_Request

[voicerepublic] Use sanitized_Request

[wistia] Use sanitized_Request

[xfileshare] Use sanitized_Request

[xtube] Use sanitized_Request

[xvideos] Use sanitized_Request

[yandexmusic] Use sanitized_Request

[youku] Use sanitized_Request

[youporn] Use sanitized_Request

[youtube] Use sanitized_Request

[patreon] Use sanitized_Request

[extractor/common] Remove unused import

[nfb] PEP 8

											
										
										
											2015-11-21 22:18:17 +06:00
+								            check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											2013-10-27 12:07:58 +01:00
+								            check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
-												[facebook] Fix login process

It was broken and didn't work in python 3.
And use `_download_webpage` instead of `compat_urllib_request.urlopen`.

											
										
										
											2014-03-07 15:25:33 +01:00
+								            check_response = self._download_webpage(check_req, None,
-												PEP8: applied even more rules

											
										
										
											2014-11-23 21:39:15 +01:00
+								                                                    note='Confirming login')
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											2013-10-27 12:07:58 +01:00
+								            if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
-												Fix typos

Closes #8200.

											
										
										
											2016-01-10 16:17:47 +01:00
+								                self._downloader.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-												Rename error_to_str to error_to_compat_str

											
										
										
											2015-12-20 07:00:39 +06:00
+								            self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err))
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								            return
-												[facebook] Fix the login process (fixes #1244)

											
										
										
											2013-10-27 12:07:58 +01:00
+								    def _real_initialize(self):
 								        self._login()
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											2016-03-10 14:33:48 +08:00
+								    def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
 								        req = sanitized_Request(url)
-												[facebook] Add support for DASH manifests

											
										
										
											2016-01-30 21:31:53 +08:00
+								        req.add_header('User-Agent', self._CHROME_USER_AGENT)
 								        webpage = self._download_webpage(req, video_id)
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
-												[facebook] Support alternative webpage form

Fixes #8371

											
										
										
											2016-01-30 19:30:39 +08:00
+								        video_data = None
-												[facebook] Fix extraction (closes #11926)

											
										
										
											2017-02-01 23:15:38 +07:00
+								        def extract_video_data(instances):
 								            for item in instances:
 								                if item[1][0] == 'VideoConfig':
 								                    video_item = item[2][0]
-												[facebook] Relax video id matching (closes #11017, closes #12055, closes #12056)

											
										
										
											2017-02-11 01:04:09 +07:00
+								                    if video_item.get('video_id'):
-												[facebook] Fix extraction (closes #11926)

											
										
										
											2017-02-01 23:15:38 +07:00
+								                        return video_item['videoData']
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											2016-10-30 18:20:55 +08:00
+								        server_js_data = self._parse_json(self._search_regex(
-												[facebook] Fix extraction (closes #11926)

											
										
										
											2017-02-01 23:15:38 +07:00
+								            r'handleServerJS\(({.+})(?:\);|,")', webpage,
 								            'server js data', default='{}'), video_id, fatal=False)
 								        if server_js_data:
 								            video_data = extract_video_data(server_js_data.get('instances', []))
-												[facebook] Improve extraction (closes #16554)

											
										
										
											2018-06-02 01:32:18 +07:00
+								        def extract_from_jsmods_instances(js_data):
 								            if js_data:
 								                return extract_video_data(try_get(
 								                    js_data, lambda x: x['jsmods']['instances'], list) or [])
-												[facebook] Fix extraction (closes #11926)

											
										
										
											2017-02-01 23:15:38 +07:00
+								        if not video_data:
 								            server_js_data = self._parse_json(
 								                self._search_regex(
-												[facebook] fix posts video data extraction(closes #22473)

											
										
										
											2019-11-10 17:02:47 +01:00
+								                    r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)',
-												[facebook] Fix extraction (closes #11926)

											
										
										
											2017-02-01 23:15:38 +07:00
+								                    webpage, 'js data', default='{}'),
 								                video_id, transform_source=js_to_json, fatal=False)
-												[facebook] Improve extraction (closes #16554)

											
										
										
											2018-06-02 01:32:18 +07:00
+								            video_data = extract_from_jsmods_instances(server_js_data)
-												[facebook] Support alternative webpage form

Fixes #8371

											
										
										
											2016-01-30 19:30:39 +08:00
-												refactor tahoe data

											
										
										
											2019-02-03 12:00:09 +02:00
+								        tahoe_data = FacebookTahoeData(self, webpage, video_id)
-												trying to get video-date using the tahoe

											
										
										
											2020-04-22 18:04:54 +03:00
+								        if not video_data:
 								            tahoe_js_data = self._parse_json(
 								                self._search_regex(
 								                    r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data.primary,
 								                    'tahoe js data', default='{}'),
 								                video_id, fatal=False)
 								            video_data = extract_from_jsmods_instances(tahoe_js_data)
-												[facebook] Add support for tahoe player videos (closes #15441)

Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.

											
										
										
											2018-05-26 02:34:22 +10:00
+								        if not video_data:
-												[facebook] Improve extraction (closes #16554)

											
										
										
											2018-06-02 01:32:18 +07:00
+								            if not fatal_if_no_video:
 								                return webpage, False
 								            m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
 								            if m_msg is not None:
 								                raise ExtractorError(
 								                    'The video is not available, Facebook said: "%s"' % m_msg.group(1),
 								                    expected=True)
 								            elif '>You must log in to continue' in webpage:
 								                self.raise_login_required()
-												[facebook] Add support for tahoe player videos (closes #15441)

Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.

											
										
										
											2018-05-26 02:34:22 +10:00
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
+								        if not video_data:
-												fix new ui bugs (#333)

* fix new ui bugs

* fix new ui bugs- pr comments

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-08 10:38:11 +03:00
+								            info_dict = self.get_from_new_ui(webpage, tahoe_data, video_id)
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
+								            if info_dict:
 								                return webpage, info_dict
 								        if not video_data:
-												adding another situation when login is required: when we see just the title in incognito. Example: https://www.facebook.com/thegrahamnortonshow/videos/1225192167632369

											
										
										
											2019-12-08 16:17:58 +02:00
+								            if self._search_regex(r'newsFeedStream.*?<h1><span class.*?>(.*?)<\/span><\/h1>', webpage, "video_title") is not None:
 								                self.raise_login_required()
-												[facebook] Improve extraction (closes #16554)

											
										
										
											2018-06-02 01:32:18 +07:00
+								            raise ExtractorError('Cannot parse data')
-												[facebook] Extract all the formats (closes #5037)

											
										
										
											2015-02-23 18:54:15 +01:00
-												adding live_status to facebook

											
										
										
											2019-05-05 11:44:38 +03:00
+								        is_scheduled = '"isScheduledLive":true' in tahoe_data.secondary
 								        is_live_stream = video_data[0].get('is_live_stream', False)
 								        is_broadcast = video_data[0].get('is_broadcast', False)
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
+								        is_live, live_status = self.extract_live_info(is_scheduled, is_live_stream, is_broadcast)
-												adding live_status to facebook

											
										
										
											2019-05-05 11:44:38 +03:00
-												Adding is_live to info dictionary of facebook videos

											
										
										
											2019-04-02 16:22:54 +03:00
-												Merge branch 'master' of https://github.com/ytdl-org/youtube-dl into ytdl-org-master

# Conflicts:
#	youtube_dl/extractor/facebook.py

											
										
										
											2019-10-23 11:06:31 +03:00
+								        subtitles = {}
-												[facebook] Extract all the formats (closes #5037)

											
										
										
											2015-02-23 18:54:15 +01:00
+								        formats = []
-												[facebook] Remove SWF params so that 1080P are detected

Closes #11073

In the provided link, SWF params give up to 720P, and VideoConfig
gives 1080P for both best and bestvideo. I guess all Facebook videos
supports HTML5 now, so I remove the old detection for SWF params

											
										
										
											2016-10-30 18:20:55 +08:00
+								        for f in video_data:
 								            format_id = f['stream_type']
-												Fix issue downloading facebook videos

youtube-dl expects the format items to be returned as a list,
but when there's only one item Facebook returns a dict instead,
this wraps the dict in a list if necessary

											
										
										
											2016-06-22 12:52:15 +01:00
+								            if f and isinstance(f, dict):
 								                f = [f]
-												[facebook] Fix extraction (Closes #7252)

											
										
										
											2015-10-21 21:35:57 +06:00
+								            if not f or not isinstance(f, list):
 								                continue
 								            for quality in ('sd', 'hd'):
 								                for src_type in ('src', 'src_no_ratelimit'):
 								                    src = f[0].get('%s_%s' % (quality, src_type))
 								                    if src:
-												[facebook] Fix format sorting

'hd' formats should have higher priorities

											
										
										
											2016-02-24 03:43:24 +08:00
+								                        preference = -10 if format_id == 'progressive' else 0
 								                        if quality == 'hd':
 								                            preference += 5
-												[facebook] Fix extraction (Closes #7252)

											
										
										
											2015-10-21 21:35:57 +06:00
+								                        formats.append({
 								                            'format_id': '%s_%s_%s' % (format_id, quality, src_type),
 								                            'url': src,
-												[facebook] Fix format sorting

'hd' formats should have higher priorities

											
										
										
											2016-02-24 03:43:24 +08:00
+								                            'preference': preference,
-												[facebook] Fix extraction (Closes #7252)

											
										
										
											2015-10-21 21:35:57 +06:00
+								                        })
-												[facebook] Add support for DASH manifests

											
										
										
											2016-01-30 21:31:53 +08:00
+								            dash_manifest = f[0].get('dash_manifest')
 								            if dash_manifest:
-												rename _parse_mpd to _parse_mpd_formats and add default value for mpd namespace

											
										
										
											2016-02-06 14:03:48 +01:00
+								                formats.extend(self._parse_mpd_formats(
-												[facebook] use _parse_mpd

											
										
										
											2016-02-02 18:11:16 +01:00
+								                    compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
-												Merge branch 'master' of https://github.com/ytdl-org/youtube-dl into ytdl-org-master

# Conflicts:
#	youtube_dl/extractor/facebook.py

											
										
										
											2019-10-23 11:06:31 +03:00
+								            subtitles_src = f[0].get('subtitles_src')
 								            if subtitles_src:
 								                subtitles.setdefault('en', []).append({'url': subtitles_src})
-												[facebook] Extract all the formats (closes #5037)

											
										
										
											2015-02-23 18:54:15 +01:00
+								        if not formats:
 								            raise ExtractorError('Cannot find video formats')
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
-												[facebook] Bypass download rate limits (closes #21018)

											
										
										
											2019-10-19 23:56:36 +07:00
+								        # Downloads with browser's User-Agent are rate limited. Working around
 								        # with non-browser User-Agent.
 								        for f in formats:
 								            f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
-												[facebook] Add support for DASH manifests

											
										
										
											2016-01-30 21:31:53 +08:00
+								        self._sort_formats(formats)
-												fixing missing video-title

											
										
										
											2020-03-18 12:14:42 +02:00
+								        video_title = self._extract_video_title(webpage, tahoe_data, video_id)
-												using the ownerName regex before using _og_search_title to get the uploader name

											
										
										
											2019-06-19 13:54:53 +03:00
-												.

											
										
										
											2020-03-10 17:23:40 +02:00
+								        def _lowercase_escape(s):
 								            if s:
 								                return lowercase_escape(s)
-												adding another option for extracting uploader

											
										
										
											2020-03-11 18:46:58 +02:00
+								        uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage)) or \
 								                   self._search_regex(r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',default=None) or \
 								                   _lowercase_escape(self._search_regex(r'\"ownerName\":"(.+?)"', tahoe_data.secondary, 'uploader_id', fatal=False)) or \
 								                   self._search_regex(r'ownerName"\s*:\s*"([^"]+)"', webpage, 'uploader', default=None) or \
 								                   self._og_search_title(webpage, default=None)
-												Wrong title facebook crawled videos before fresh (#337)

* timestamp conditions fix

* title and thumbnail fix

* pr fix

* title and thumbnail fix

* timestamp order

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-16 16:20:45 +03:00
-												fix new ui bugs (#333)

* fix new ui bugs

* fix new ui bugs- pr comments

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-08 10:38:11 +03:00
+								        timestamp = self._resolve_timestamp(webpage, tahoe_data)
-												Facebook timestamp fix (#320)

* missing metadate fix

* timestamp fix

* timestamp conditions fix

* timestamp conditions fix

* timestamp conditions fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-25 16:17:52 +03:00
+								        timestamp = parse_iso8601(timestamp)
-												Wrong title facebook crawled videos before fresh (#337)

* timestamp conditions fix

* title and thumbnail fix

* pr fix

* title and thumbnail fix

* timestamp order

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-16 16:20:45 +03:00
+								        if timestamp is None and webpage.find('Paid Partnership') == -1 or \
 								                (timestamp is None and webpage.find('Paid Partnership') > -1 and 'cookiefile' in self._downloader.params):
 								            regex_search_result_date_time = self._search_regex(r'data-utime=\\\"(\d+)\\\"', tahoe_data.secondary, 'timestamp', default=None) \
 								                                            or self._search_regex(r'data-utime=\\\"(\d+)\\\"', tahoe_data.primary, 'timestamp', default=None)\
 								                                            or self._search_regex(r'data-utime=\\\"(\d+)\\\"', webpage, 'timestamp', default=None)\
 								                                            or self._search_regex(r'<abbr[^>]+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)\
 								                                            or self._search_regex(r'<abbr[^>]+data-utime=["\'](\d+)', tahoe_data.secondary, 'timestamp', default=None)\
 								                                            or self._search_regex(r'<abbr[^>]+data-utime=["\'](\d+)', tahoe_data.primary, 'timestamp', default=None)
 								            regex_search_result_publish_time = self._search_regex(r'publish_time&quot;:([\d]+)', webpage, 'timestamp', default=None) \
 								                                               or self._search_regex(r'publish_time&quot;:([\d]+)', tahoe_data.primary, 'timestamp', default=None) \
 								                                               or self._search_regex(r'publish_time&quot;:([\d]+)', tahoe_data.secondary, 'timestamp', default=None)
-												Facebook timestamp fix (#320)

* missing metadate fix

* timestamp fix

* timestamp conditions fix

* timestamp conditions fix

* timestamp conditions fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-25 16:17:52 +03:00
+								            timestamp = int_or_none(regex_search_result_date_time) or int_or_none(regex_search_result_publish_time)
-												fix uploader id

											
										
										
											2020-06-07 17:00:16 +03:00
+								        uploader_id = self._resolve_uploader_id(webpage, tahoe_data)
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
-												Wrong title facebook crawled videos before fresh (#337)

* timestamp conditions fix

* title and thumbnail fix

* pr fix

* title and thumbnail fix

* timestamp order

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-16 16:20:45 +03:00
+								        thumbnail = self._resolve_thumbnail(webpage, tahoe_data)
-												get facebook live current views.

											
										
										
											2019-06-25 12:32:07 +03:00
+								        if is_live:
-												handle another case.

											
										
										
											2019-06-27 10:53:46 +03:00
+								            view_count = parse_count(
 								                self._search_regex(r'viewerCount:([\d]+)', webpage, 'views', fatal=False) or \
 								                self._search_regex(r'[\'\"]viewerCount[\'\"]\s*:\s*(\d+)', tahoe_data.primary, 'views', fatal=False)
 								            )
-												get facebook live current views.

											
										
										
											2019-06-25 12:32:07 +03:00
+								        else:
-												adding option for views extraction

											
										
										
											2020-03-11 18:48:24 +02:00
+								            view_count = parse_count(self._extract_views(webpage, tahoe_data))
-												handle another case.

											
										
										
											2019-06-27 10:53:46 +03:00
-												add other posts views.

											
										
										
											2020-01-10 15:33:20 +02:00
+								        other_posts_view_count = parse_count(self._extract_meta_count(['otherPostsViewCount'], webpage, tahoe_data, 'other_post_views'))
-												extract shares and likes.

											
										
										
											2019-06-20 13:03:30 +03:00
+								        likes_count = parse_count(self._extract_likes(webpage, tahoe_data))
-												adding option to shares extraction

											
										
										
											2020-03-11 18:50:13 +02:00
+								        shares_count = parse_count(self._extract_shares(webpage, tahoe_data))
-												adding option to extract comments count

											
										
										
											2020-03-11 18:50:42 +02:00
+								        comment_count = parse_count(self._extract_comments_count(webpage, tahoe_data))
-												[facebook] Extract view count and update tests (closes #16942)

											
										
										
											2018-07-19 02:25:19 +07:00
-												fix facebook channel handle.

											
										
										
											2020-04-05 17:39:33 +03:00
+								        uploader_handle = self._resolve_uploader_handle(tahoe_data, uploader_id)
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
 								        info_dict = self.build_info_dict(webpage, tahoe_data, video_id, video_title, formats, uploader, timestamp,
 								                                         thumbnail, view_count, uploader_id, is_live, live_status, likes_count,
 								                                         shares_count, subtitles, comment_count, other_posts_view_count, uploader_handle)
 								        return webpage, info_dict
-												fix new ui bugs (#333)

* fix new ui bugs

* fix new ui bugs- pr comments

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-08 10:38:11 +03:00
+								    def get_from_new_ui(self, webpage, tahoe_data, video_id):
 								        video_title = self._resolve_new_ui_title(webpage, tahoe_data, video_id)
 								        comments_count = self._resolve_new_ui_comments_count(webpage, tahoe_data)
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
 								        likes = parse_count(self._extract_likes(webpage, tahoe_data))
-												fix new ui bugs (#333)

* fix new ui bugs

* fix new ui bugs- pr comments

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-08 10:38:11 +03:00
+								        timestamp = self._resolve_new_ui_timestamp(webpage, tahoe_data)
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
 								        uploader_json = self._search_regex(r'"author":{(.+?)}', webpage, 'uploader')
 								        uploader_handle, uploader = self._extract_uploader_info_new_ui(uploader_json)
-												fix uploader id

											
										
										
											2020-06-07 17:00:16 +03:00
+								        uploader_id = self._resolve_uploader_id(webpage, tahoe_data)
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
 								        post_view_counts = parse_count(self._search_regex(r'"postViewCount":(.+?),', tahoe_data.secondary, 'views'))
 								        other_post_view_counts = parse_count(self._search_regex(r'"otherPostsViewCount":(.+?),', tahoe_data.secondary, 'other_views'))
 								        share_counts = parse_count(self._search_regex(r'"sharecount":(.+?),', tahoe_data.secondary, 'other_views'))
-												fix new ui bugs (#333)

* fix new ui bugs

* fix new ui bugs- pr comments

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-08 10:38:11 +03:00
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
+								        thumbnail = self._search_regex(r'"thumbnailUrl":"(.+?)"', webpage, 'thumbnail')
-												fix new ui bugs (#333)

* fix new ui bugs

* fix new ui bugs- pr comments

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-08 10:38:11 +03:00
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
+								        is_live, live_status = self.resolve_new_ui_live_info(webpage, tahoe_data)
 								        formats = self.resolve_new_ui_format(webpage)
-												fix new ui bugs (#333)

* fix new ui bugs

* fix new ui bugs- pr comments

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-08 10:38:11 +03:00
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
+								        info_dict = self.build_info_dict(webpage, tahoe_data, video_id, video_title, formats, uploader, timestamp,
-												fix uploader id

											
										
										
											2020-06-07 17:00:16 +03:00
+								                                         thumbnail, post_view_counts, uploader_id, is_live, live_status, likes,
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
+								                                         share_counts, {}, comments_count, other_post_view_counts,
 								                                         uploader_handle)
 								        return info_dict
 								    def build_info_dict(self,webpage, tahoe_data, video_id, video_title=None, formats=None, uploader=None,
 								                        timestamp=None, thumbnail=None, view_count=None, uploader_id=None, is_live=None, live_status=None,
 								                        likes_count=None, shares_count=None, subtitles=None, comment_count=None, other_posts_view_count=None,
 								                        uploader_handle=None):
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											2016-03-10 14:33:48 +08:00
+								        info_dict = {
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								            'id': video_id,
 								            'title': video_title,
-												[facebook] Extract all the formats (closes #5037)

											
										
										
											2015-02-23 18:54:15 +01:00
+								            'formats': formats,
-												[facebook] extract uploader

											
										
										
											2015-08-02 22:52:12 +01:00
+								            'uploader': uploader,
-												[facebook] Extract timestamp (Closes #10508)

											
										
										
											2016-08-31 22:12:37 +07:00
+								            'timestamp': timestamp,
-												[facebook] Support thumbnails (closes #14416)

											
										
										
											2017-10-10 23:20:38 +08:00
+								            'thumbnail': thumbnail,
-												[facebook] Extract view count and update tests (closes #16942)

											
										
										
											2018-07-19 02:25:19 +07:00
+								            'view_count': view_count,
-												Merge branch 'mark-facebook-live-videos' into fix.25.12.2018

# Conflicts:
#	youtube_dl/extractor/facebook.py

											
										
										
											2019-04-02 17:45:43 +03:00
+								            'uploader_id': uploader_id,
-												adding live_status to facebook

											
										
										
											2019-05-05 11:44:38 +03:00
+								            'is_live': is_live,
-												extract shares and likes.

											
										
										
											2019-06-20 13:03:30 +03:00
+								            'live_status': live_status,
-												.

											
										
										
											2019-06-20 13:14:42 +03:00
+								            'like_count': likes_count,
-												Merge branch 'master' of https://github.com/ytdl-org/youtube-dl into ytdl-org-master

# Conflicts:
#	youtube_dl/extractor/facebook.py

											
										
										
											2019-10-23 11:06:31 +03:00
+								            'share_count': shares_count,
 								            'subtitles': subtitles,
-												add other posts views.

											
										
										
											2020-01-10 15:33:20 +02:00
+								            'comment_count': comment_count,
-												extract internal data.

											
										
										
											2020-03-01 08:53:01 +02:00
+								            'other_posts_view_count': other_posts_view_count,
-												add uploader handle.

											
										
										
											2020-03-31 17:32:15 +03:00
+								            'uploader_handle': uploader_handle,
-												.

											
										
										
											2020-03-01 09:01:39 +02:00
+								            '_internal_data': {
 								                'page': webpage,
 								                'api_response_list': [tahoe_data.primary, tahoe_data.secondary]
 								            }
-												Move Facebook into its own file

											
										
										
											2013-06-23 20:59:45 +02:00
+								        }
-												extrace page likes.

											
										
										
											2019-06-22 22:54:09 +03:00
+								        if uploader_id:
 								            info_dict['uploader_like_count'] = FacebookAjax(self, webpage, uploader_id).page_likes
-												[facebook:post] Add extractor (Closes #8321)

											
										
										
											2016-01-25 22:18:34 +06:00
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
+								        return info_dict
-												[facebook:post] Add extractor (Closes #8321)

											
										
										
											2016-01-25 22:18:34 +06:00
-												fix facebook channel handle.

											
										
										
											2020-04-05 17:39:33 +03:00
+								    def _resolve_uploader_handle(self, tahoe_data, uploader_id):
 								        uploader_handle = self._search_regex(r'"video_path":"\\\/([^\/]+)\\\/', tahoe_data.primary, 'uploader_handle',
 								                                             fatal=False)
 								        if uploader_handle == uploader_id:
 								            uploader_handle = self._search_regex(r'href=\\"https:\\\/\\\/www.facebook.com\\\/(.+?)\\\/\\', tahoe_data.secondary,
 								                                               'uploader_handle',
 								                                                 fatal=False)
 								        return uploader_handle
-												extract shares and likes.

											
										
										
											2019-06-20 13:03:30 +03:00
+								    def _extract_meta_count(self, fields, webpage, tahoe_data, name, ):
 								        value = None
 								        for f in fields:
 								            if value:
 								                break
 								            value = self._search_regex(
 								                    r'\b%s\s*:\s*["\']([\d,.]+)' % f, webpage, name,
 								                    default=None
 								            )
 								            if value:
 								                break
 								            value = self._search_regex(
 								                r'[\'\"]%s[\'\"]\s*:\s*(\d+)' % f, tahoe_data.secondary, name,
 								                default=None)
 								        return value
-												facebook fix reactions as likes.

											
										
										
											2020-06-10 16:16:36 +03:00
+								    @staticmethod
 								    def _extract_first_pattern(pairs):
 								        for pattern, data_list in pairs:
 								            if not isinstance(data_list, list):
 								                data_list = [data_list]
 								            for data in data_list:
 								                values = re.findall(pattern, data)
 								                if values:
 								                    return values[-1]
-												adding option for likes extraction

											
										
										
											2020-03-11 18:49:08 +02:00
-												facebook fix reactions as likes.

											
										
										
											2020-06-10 16:16:36 +03:00
+								    def _extract_likes(self, webpage, tahoe_data):
 								        pairs = (
 								            (r'"reaction_count"\s*:\s*{\s*"count"\s*:\s*(\d+)', [tahoe_data.secondary, webpage]),
 								            (r'reaction_count:{count:([\d]+)}', webpage),
 								            (r'\blikecount\s*:\s*["\']([\d,.]+)', webpage),
 								            (r'[\'\"]\blikecount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary)
 								        )
 								        return self._extract_first_pattern(pairs)
-												adding option to shares extraction

											
										
										
											2020-03-11 18:50:13 +02:00
 								    def _extract_shares(self, webpage, tahoe_data):
 								        value = self._extract_meta_count(['sharecount'], webpage, tahoe_data, 'shares')
 								        if value:
 								            return value
 								        a = r'(\d+\w) Views'
 								        values = re.findall(r'"share_count"\s*:\s*{\s*"count"\s*:\s*(\d+)', tahoe_data.secondary)
 								        if values:
-												extract shares and likes.

											
										
										
											2019-06-20 13:03:30 +03:00
+								            return values[-1]
-												adding option to extract comments count

											
										
										
											2020-03-11 18:50:42 +02:00
+								    def _extract_comments_count(self, webpage, tahoe_data):
 								        value = self._extract_meta_count(['commentCount'], webpage, tahoe_data, 'comment_count')
 								        if value:
 								            return value
 								        values = re.findall(r'Comments\s\((\d+)', tahoe_data.secondary)
 								        if values:
 								            return values[-1]
-												adding option for views extraction

											
										
										
											2020-03-11 18:48:24 +02:00
+								    def _extract_views(self, webpage, tahoe_data):
 								        value = self._extract_meta_count(['postViewCount', 'viewCount'], webpage, tahoe_data, 'likes')
 								        if value:
 								            return value
-												fix_view_count_facebook

											
										
										
											2020-05-27 15:33:51 +03:00
+								        values = re.findall(r'(\d.\d+\w?) Views', tahoe_data.secondary)
 								        if values:
 								            return values[-1]
-												adding option for views extraction

											
										
										
											2020-03-11 18:48:24 +02:00
+								        values = re.findall(r'(\d+\w?) Views', tahoe_data.secondary)
 								        if values:
 								            return values[-1]
-												Added regex options for view count and uploader id

											
										
										
											2020-04-22 13:22:47 -07:00
+								        values = re.findall(r'seen_by_count":\"(\d+)\"', tahoe_data.secondary)
 								        if values:
 								            return values[-1]
-												[facebook:post] Add extractor (Closes #8321)

											
										
										
											2016-01-25 22:18:34 +06:00
+								    def _real_extract(self, url):
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											2016-03-10 14:33:48 +08:00
+								        video_id = self._match_id(url)
 								        real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
 								        webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)
-												[facebook:post] Add extractor (Closes #8321)

											
										
										
											2016-01-25 22:18:34 +06:00
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											2016-03-10 14:33:48 +08:00
+								        if info_dict:
 								            return info_dict
-												[facebook:post] Add extractor (Closes #8321)

											
										
										
											2016-01-25 22:18:34 +06:00
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											2016-03-10 14:33:48 +08:00
+								        if '/posts/' in url:
-												[facebook] Support single-video ID links

I stumbled upon this at https://www.facebook.com/bwfbadminton/posts/10157127020046316 . No idea how prevalent it is yet.

											
										
										
											2020-06-14 13:17:51 +02:00
+								            video_id_json = self._search_regex(
 								                r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video ids', group='ids',
 								                default='')
 								            if video_id_json:
 								                entries = [
 								                    self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
 								                    for vid in self._parse_json(video_id_json, video_id)]
 								                return self.playlist_result(entries, video_id)
 								            # Single Video?
 								            video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 'single video id')
 								            return self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
-												[facebook] Merge FacebookPostIE into FacebookIE

Fixes #8713

											
										
										
											2016-03-10 14:33:48 +08:00
+								        else:
 								            _, info_dict = self._extract_from_url(
 								                self._VIDEO_PAGE_TEMPLATE % video_id,
 								                video_id, fatal_if_no_video=True)
 								            return info_dict
-												[facebook:plugins:video] Add extractor (Closes #10530)

											
										
										
											2016-09-02 21:13:50 +07:00
-												fixing missing video-title

											
										
										
											2020-03-18 12:14:42 +02:00
+								    def _extract_video_title(self, webpage, tahoe_data, video_id):
 								        video_title = self._html_search_regex(
 								            r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
 								            'title', default=None)
-												Wrong title facebook crawled videos before fresh (#337)

* timestamp conditions fix

* title and thumbnail fix

* pr fix

* title and thumbnail fix

* timestamp order

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-16 16:20:45 +03:00
+								        if not self._valid_video_title(video_title):
-												fixing missing video-title

											
										
										
											2020-03-18 12:14:42 +02:00
+								            video_title = self._html_search_regex(
 								                r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
 								                webpage, 'alternative title', default=None)
-												Wrong title facebook crawled videos before fresh (#337)

* timestamp conditions fix

* title and thumbnail fix

* pr fix

* title and thumbnail fix

* timestamp order

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-16 16:20:45 +03:00
+								        if not self._valid_video_title(video_title):
-												fixing missing video-title

											
										
										
											2020-03-18 12:14:42 +02:00
+								            video_title = self._og_search_title(webpage, default=None)
-												Wrong title facebook crawled videos before fresh (#337)

* timestamp conditions fix

* title and thumbnail fix

* pr fix

* title and thumbnail fix

* timestamp order

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-16 16:20:45 +03:00
+								        if not self._valid_video_title(video_title):
-												fixing missing video-title

											
										
										
											2020-03-18 12:14:42 +02:00
+								            video_title = self._html_search_meta(
 								                'description', webpage, 'title', default=None)
-												Wrong title facebook crawled videos before fresh (#337)

* timestamp conditions fix

* title and thumbnail fix

* pr fix

* title and thumbnail fix

* timestamp order

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-16 16:20:45 +03:00
+								        if not self._valid_video_title(video_title):
-												fixing missing video-title

											
										
										
											2020-03-18 12:14:42 +02:00
+								            values = re.findall(r'videoTitle"\s*:\s*"(.*?)"', tahoe_data.secondary)
 								            if values:
 								                video_title = values[-1]
 								        if video_title:
 								            video_title = limit_length(video_title, 80)
 								        else:
 								            video_title = 'Facebook video #%s' % video_id
 								        return video_title
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
+								    def _extract_uploader_info_new_ui(self, uploader_json):
 								        uploader_handle = self._search_regex(r'"name":"(.+?")', uploader_json, 'uploader')
 								        uploader_url = self._search_regex(r'"url":"(.+?")', uploader_json, 'uploader_url')
 								        uploader_url_str = uploader_url.decode("utf-8")
 								        uploader = uploader_url_str.split('\\/')[-2]
 								        return uploader_handle, uploader
 								    def _extract_ids_info_new_ui(self, ids_json):
 								        ids_json_str = ids_json.decode("utf-8")
 								        ids = ids_json_str.split(':')
 								        video_id = ids[1]
-												fix uploader id

											
										
										
											2020-06-07 17:00:16 +03:00
+								        return video_id
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
 								    def resolve_new_ui_live_info(self, webpage, tahoe_data):
 								        is_scheduled = '"isScheduledLive":true' in tahoe_data.secondary
 								        is_live_stream = self._search_regex(r'"isLiveVOD":(.+?),', tahoe_data.secondary, "vod_live")
 								        is_broadcast = '"isLiveBroadcast":true' in webpage
 								        return self.extract_live_info(is_scheduled, is_live_stream, is_broadcast)
 								    def extract_live_info(self, is_scheduled, is_live_stream, is_broadcast):
 								        live_status = 'not_live'
 								        if is_broadcast:
 								            live_status = 'completed'
-												fix title, subscription, comments, live

											
										
										
											2020-06-07 15:18:18 +03:00
+								        if is_live_stream:
 								            live_status = 'live'
 								        if is_scheduled:
 								            live_status = 'upcoming'
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
 								        is_live = live_status == 'live'
 								        return is_live, live_status
 								    def resolve_new_ui_format(self, webpage):
 								        format_url = self.build_format_url(webpage)
 								        width = parse_count(self._search_regex(r'<meta property="og:video:width" content="(.+?)"', webpage, 'width'))
 								        height = parse_count(self._search_regex(r'<meta property="og:video:height" content="(.+?)"', webpage, 'height'))
 								        formats = []
 								        formats.append({
 								            'url': format_url,
 								            'height': width,
 								            'width': height,
 								            'ext': 'mp4',
 								        })
 								        return formats
 								    def build_format_url(self, webpage):
 								        content_url = self._search_regex(r' content="https(.+?)"', webpage, 'url', fatal=False)
 								        format_url = 'https%s' % content_url
 								        format_url = unescapeHTML(format_url)
 								        return format_url
-												fix uploader id

											
										
										
											2020-06-07 17:00:16 +03:00
+								    def _resolve_uploader_id(self, webpage, tahoe_data):
 								        uploader_id = self._search_regex(
 								            r'ownerid:"([\d]+)', webpage,
 								            'uploader_id', default=None) or self._search_regex(
 								            r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary,
 								            'uploader_id', default=None) or \
 								                      self._search_regex(r'\\\"page_id\\\"\s*:\s*\\\"(\d+)\\\"', tahoe_data.secondary, 'uploader_id',
 								                                         fatal=False) or \
 								                      self._search_regex(r'content_owner_id_new\\":\\"(\d+)\\"', tahoe_data.secondary, 'uploader_id',
 								                                         fatal=False)
 								        return uploader_id
-												Facebook new ui (#323)

* fix_view_count_facebook

* facebook_new_ui_metadata

* facebook_new_ui_live_info_fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-05-31 11:42:38 +03:00
-												fix new ui bugs (#333)

* fix new ui bugs

* fix new ui bugs- pr comments

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-08 10:38:11 +03:00
+								    def _resolve_timestamp(self, webpage, tahoe_data):
 								        timestamp = self._search_regex(
 								            r'datePublished":"(.+?)"', webpage, 'timestamp', default=None) \
 								                    or self._search_regex(r'datePublished":"(.+?)"', tahoe_data.secondary, 'timestamp', default=None) \
 								                    or self._search_regex(r'datePublished":"(.+?)"', tahoe_data.primary, 'timestamp', default=None)
 								        return timestamp
 								    def _resolve_new_ui_title(self, webpage, tahoe_data, video_id):
 								        video_title = self._search_regex(r'"headline":"(.+?")', webpage, 'title', fatal=False)
 								        if not video_title:
 								            video_title = self._search_regex(r'"pageTitle">(.+?)<', webpage, 'title', fatal=False)
 								        if not video_title:
 								            video_title = self._extract_video_title(webpage, tahoe_data, video_id)
 								        return video_title
 								    def _resolve_new_ui_comments_count(self, webpage, tahoe_data):
 								        comments_count = parse_count(
 								            self._search_regex(r'"commentCount":(.+?,)', webpage, 'comments_count', fatal=False))
 								        if comments_count is None:
 								            comments_count = parse_count(
 								                self._search_regex(r'"commentcount":(.+?,)', tahoe_data.secondary, 'comments_count', fatal=False))
 								        if comments_count is None:
 								            comments_count = parse_count(self._extract_comments_count(webpage, tahoe_data))
 								        return comments_count
 								    def _resolve_new_ui_timestamp(self, webpage, tahoe_data):
 								        timestamp = self._search_regex(r'"datePublished":"(.+?)"', webpage, 'timestamp', fatal=False)
 								        if not timestamp:
 								            timestamp = self._resolve_timestamp(webpage, tahoe_data)
 								        timestamp = parse_iso8601(timestamp)
 								        return timestamp
-												Wrong title facebook crawled videos before fresh (#337)

* timestamp conditions fix

* title and thumbnail fix

* pr fix

* title and thumbnail fix

* timestamp order

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-16 16:20:45 +03:00
+								    def _resolve_thumbnail(self, webpage, tahoe_data):
 								        thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
 								        if not thumbnail:
 								            thumbnail = self._search_regex(r'"subtitles_src":"(.+?")', tahoe_data.primary, 'thumbnail', fatal=False)
 								        return thumbnail
 								    def _valid_video_title(self, video_title):
 								        return video_title and not u'Log In or Sign Up to View' in video_title
-												fix new ui bugs (#333)

* fix new ui bugs

* fix new ui bugs- pr comments

Co-authored-by: bhodaya <bhodaya@videocites.com>
											
										
										
											2020-06-08 10:38:11 +03:00
-												refactor tahoe data

											
										
										
											2019-02-03 12:00:09 +02:00
+								class FacebookTahoeData:
 								    def __init__(self, extractor, page, video_id):
 								        self._page = page
 								        self._video_id = video_id
 								        self._extractor = extractor
 								        self._data = {}
 								    def _get_data(self, data_type):
 								        if data_type in self._data:
 								            data = self._data[data_type]
 								        else:
 								            req_data, headers = self._get_request_data_and_headers()
 								            data = self._extractor._download_webpage(
 								                self._extractor._VIDEO_PAGE_TAHOE_TEMPLATE % (self._video_id, data_type), self._video_id,
 								                data=req_data,
 								                headers=headers
 								            )
-												optimize tahoe data.

											
										
										
											2019-12-09 08:40:21 +02:00
+								            self._data[data_type] = data
-												refactor tahoe data

											
										
										
											2019-02-03 12:00:09 +02:00
+								        return '' if not data else data
 								    @property
 								    def primary(self):
 								        return self._get_data('primary')
 								    @property
 								    def secondary(self):
 								        return self._get_data('secondary')
 								    def _get_request_data_and_headers(self):
 								        tahoe_request_data = urlencode_postdata(
 								            {
 								                '__a': 1,
 								                '__pc': self._extractor._search_regex(
 								                    r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', self._page,
 								                    'pkg cohort', default='PHASED:DEFAULT'),
 								                '__rev': self._extractor._search_regex(
 								                    r'client_revision["\']\s*:\s*(\d+),', self._page,
 								                    'client revision', default='3944515'),
 								                'fb_dtsg': self._extractor._search_regex(
 								                    r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
 								                    self._page, 'dtsg token', default=''),
 								            })
 								        tahoe_request_headers = {
 								            'Content-Type': 'application/x-www-form-urlencoded',
 								        }
 								        return tahoe_request_data, tahoe_request_headers
-												extrace page likes.

											
										
										
											2019-06-22 22:54:09 +03:00
+								class FacebookAjax:
 								    HOVER_URL_TEMPLATE = 'https://www.facebook.com/ajax/hovercard/user.php?id=111&fb_dtsg_ag=x&endpoint=%2Fajax%2Fhovercard%2Fuser.php%3Fid%3D111&__a=1'
 								    def __init__(self, extractor, page, page_id):
 								        self._page = page
 								        self._page_id = page_id
 								        self._extractor = extractor
 								        self._hover_data = None
 								    def _get_hover_data(self):
 								        if self._hover_data:
 								            data = self._hover_data
 								        else:
 								            data = self._extractor._download_webpage(
 								                self._get_request_url(self._page_id), self._page_id
 								            )
 								        return '' if not data else data
 								    @property
 								    def hover(self):
 								        return self._get_hover_data()
 								    @property
 								    def page_likes(self):
-												not fatal for now.

											
										
										
											2019-06-22 22:59:11 +03:00
+								        try:
 								            return parse_count(
 								                self._extractor._search_regex(r'\/span>([\d,]+) likes', self.hover, 'uploader_likes', default=None)
 								            )
-												print exception.

											
										
										
											2019-06-23 09:57:17 +03:00
+								        except Exception as e:
 								            self._extractor.report_warning(self._page_id + str(e))
-												extrace page likes.

											
										
										
											2019-06-22 22:54:09 +03:00
+								    def _get_request_url(self, page_id):
 								        return update_url_query(self.HOVER_URL_TEMPLATE,
 								            {
 								                'id': page_id,
 								                'endpoint': '/ajax/hovercard/user.php?id=%s' % page_id,
 								                '__a': 1,
 								                '__pc': self._extractor._search_regex(
 								                    r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', self._page,
 								                    'pkg cohort', default='PHASED:DEFAULT'),
 								                '__rev': self._extractor._search_regex(
 								                    r'client_revision["\']\s*:\s*(\d+),', self._page,
 								                    'client revision', default='3944515'),
 								                'fb_dtsg': self._extractor._search_regex(
 								                    r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
 								                    self._page, 'dtsg token', default=''),
 								            })
-												[facebook:plugins:video] Add extractor (Closes #10530)

											
										
										
											2016-09-02 21:13:50 +07:00
+								class FacebookPluginsVideoIE(InfoExtractor):
 								    _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
 								    _TESTS = [{
 								        'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
 								        'md5': '5954e92cdfe51fe5782ae9bda7058a07',
 								        'info_dict': {
 								            'id': '10154383743583686',
 								            'ext': 'mp4',
 								            'title': 'What to do during the haze?',
 								            'uploader': 'Gov.sg',
 								            'upload_date': '20160826',
 								            'timestamp': 1472184808,
 								        },
 								        'add_ie': [FacebookIE.ie_key()],
 								    }, {
 								        'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
 								        'only_matching': True,
 								    }, {
 								        'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
 								        'only_matching': True,
 								    }]
 								    def _real_extract(self, url):
 								        return self.url_result(
 								            compat_urllib_parse_unquote(self._match_id(url)),
 								            FacebookIE.ie_key())