From 79367a98208fbf01d6e04b6747a6e01d0b1f8b9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sat, 14 Jul 2018 18:05:06 +0700
Subject: [PATCH 1/8] [pornhub] Improve extraction and extract all formats
 (closes #12166, closes #15891, closes #16262, closes #16959)

---
 youtube_dl/extractor/pornhub.py | 125 ++++++++++++++++++++------------
 1 file changed, 79 insertions(+), 46 deletions(-)
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 23e24d216..97f988da4 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -4,28 +4,21 @@ from __future__ import unicode_literals
 import functools
 import itertools
 import operator
-# import os
 import re
 
 from .common import InfoExtractor
 from ..compat import (
     compat_HTTPError,
-    # compat_urllib_parse_unquote,
-    # compat_urllib_parse_unquote_plus,
-    # compat_urllib_parse_urlparse,
+    compat_str,
 )
 from ..utils import (
     ExtractorError,
     int_or_none,
     js_to_json,
     orderedSet,
-    # sanitized_Request,
     remove_quotes,
     str_to_int,
 )
-# from ..aes import (
-#     aes_decrypt_text
-# )
 
 
 class PornHubIE(InfoExtractor):
@@ -62,7 +55,7 @@ class PornHubIE(InfoExtractor):
             'id': '1331683002',
             'ext': 'mp4',
             'title': '重庆婷婷女王足交',
-            'uploader': 'cj397186295',
+            'uploader': 'Unknown',
             'duration': 1753,
             'view_count': int,
             'like_count': int,
@@ -121,7 +114,7 @@ class PornHubIE(InfoExtractor):
             self._set_cookie('pornhub.com', 'platform', platform)
             return self._download_webpage(
                 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
-                video_id)
+                video_id, 'Downloading %s webpage' % platform)
 
         webpage = dl_webpage('pc')
 
@@ -134,48 +127,19 @@ class PornHubIE(InfoExtractor):
                 'PornHub said: %s' % error_msg,
                 expected=True, video_id=video_id)
 
-        tv_webpage = dl_webpage('tv')
-
-        assignments = self._search_regex(
-            r'(var.+?mediastring.+?)</script>', tv_webpage,
-            'encoded url').split(';')
-
-        js_vars = {}
-
-        def parse_js_value(inp):
-            inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
-            if '+' in inp:
-                inps = inp.split('+')
-                return functools.reduce(
-                    operator.concat, map(parse_js_value, inps))
-            inp = inp.strip()
-            if inp in js_vars:
-                return js_vars[inp]
-            return remove_quotes(inp)
-
-        for assn in assignments:
-            assn = assn.strip()
-            if not assn:
-                continue
-            assn = re.sub(r'var\s+', '', assn)
-            vname, value = assn.split('=', 1)
-            js_vars[vname] = parse_js_value(value)
-
-        video_url = js_vars['mediastring']
-
-        title = self._search_regex(
-            r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)
-
         # video_title from flashvars contains whitespace instead of non-ASCII (see
         # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
         # on that anymore.
-        title = title or self._html_search_meta(
+        title = self._html_search_meta(
             'twitter:title', webpage, default=None) or self._search_regex(
             (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
              r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
              r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
             webpage, 'title', group='title')
 
+        video_urls = []
+        video_urls_set = set()
+
         flashvars = self._parse_json(
             self._search_regex(
                 r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
@@ -183,8 +147,78 @@ class PornHubIE(InfoExtractor):
         if flashvars:
             thumbnail = flashvars.get('image_url')
             duration = int_or_none(flashvars.get('video_duration'))
+            media_definitions = flashvars.get('mediaDefinitions')
+            if isinstance(media_definitions, list):
+                for definition in media_definitions:
+                    if not isinstance(definition, dict):
+                        continue
+                    video_url = definition.get('videoUrl')
+                    if not video_url or not isinstance(video_url, compat_str):
+                        continue
+                    if video_url in video_urls_set:
+                        continue
+                    video_urls_set.add(video_url)
+                    video_urls.append(
+                        (video_url, int_or_none(definition.get('quality'))))
         else:
-            title, thumbnail, duration = [None] * 3
+            thumbnail, duration = [None] * 2
+
+        if not video_urls:
+            tv_webpage = dl_webpage('tv')
+
+            assignments = self._search_regex(
+                r'(var.+?mediastring.+?)</script>', tv_webpage,
+                'encoded url').split(';')
+
+            js_vars = {}
+
+            def parse_js_value(inp):
+                inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
+                if '+' in inp:
+                    inps = inp.split('+')
+                    return functools.reduce(
+                        operator.concat, map(parse_js_value, inps))
+                inp = inp.strip()
+                if inp in js_vars:
+                    return js_vars[inp]
+                return remove_quotes(inp)
+
+            for assn in assignments:
+                assn = assn.strip()
+                if not assn:
+                    continue
+                assn = re.sub(r'var\s+', '', assn)
+                vname, value = assn.split('=', 1)
+                js_vars[vname] = parse_js_value(value)
+
+            video_url = js_vars['mediastring']
+            if video_url not in video_urls_set:
+                video_urls.append((video_url, None))
+                video_urls_set.add(video_url)
+
+        for mobj in re.finditer(
+                r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
+                webpage):
+            video_url = mobj.group('url')
+            if video_url not in video_urls_set:
+                video_urls.append((video_url, None))
+                video_urls_set.add(video_url)
+
+        formats = []
+        for video_url, height in video_urls:
+            tbr = None
+            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
+            if mobj:
+                if not height:
+                    height = int(mobj.group('height'))
+                tbr = int(mobj.group('tbr'))
+            formats.append({
+                'url': video_url,
+                'format_id': '%dp' % height if height else None,
+                'height': height,
+                'tbr': tbr,
+            })
+        self._sort_formats(formats)
 
         video_uploader = self._html_search_regex(
             r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
@@ -210,7 +244,6 @@ class PornHubIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'url': video_url,
             'uploader': video_uploader,
             'title': title,
             'thumbnail': thumbnail,
@@ -219,7 +252,7 @@ class PornHubIE(InfoExtractor):
             'like_count': like_count,
             'dislike_count': dislike_count,
             'comment_count': comment_count,
-            # 'formats': formats,
+            'formats': formats,
             'age_limit': 18,
             'tags': tags,
             'categories': categories,

From 905eef2b06f1e890b1dfd228aa4fa1fa2308d687 Mon Sep 17 00:00:00 2001
From: Jakub Wilk <jwilk@jwilk.net>
Date: Wed, 18 Jul 2018 18:47:26 +0200
Subject: [PATCH 2/8] [imgur] Allow digits in filename extension

---
 youtube_dl/extractor/imgur.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py
index 2901960a5..ecc958a17 100644
--- a/youtube_dl/extractor/imgur.py
+++ b/youtube_dl/extractor/imgur.py
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class ImgurIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
+    _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$'
 
     _TESTS = [{
         'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor):
     }, {
         'url': 'http://imgur.com/r/aww/VQcQPhM',
         'only_matching': True,
+    }, {
+        'url': 'https://i.imgur.com/crGpqCV.mp4',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):

From bd21ead2a20ff16ec8cb10da72526103471069d6 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Wed, 18 Jul 2018 18:29:18 +0100
Subject: [PATCH 3/8] [extractor/common] add support for DASH and MSS formats
 extraction in SMIL manifests

---
 youtube_dl/extractor/common.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 5d4db54d5..b8bbaf81a 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1859,9 +1859,7 @@ class InfoExtractor(object):
                         'height': height,
                     })
                 formats.extend(m3u8_formats)
-                continue
-
-            if src_ext == 'f4m':
+            elif src_ext == 'f4m':
                 f4m_url = src_url
                 if not f4m_params:
                     f4m_params = {
@@ -1871,9 +1869,13 @@ class InfoExtractor(object):
                 f4m_url += '&' if '?' in f4m_url else '?'
                 f4m_url += compat_urllib_parse_urlencode(f4m_params)
                 formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
-                continue
-
-            if src_url.startswith('http') and self._is_valid_url(src, video_id):
+            elif src_ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    src_url, video_id, mpd_id='dash', fatal=False))
+            elif re.search(r'\.ism/[Mm]anifest', src_url):
+                formats.extend(self._extract_ism_formats(
+                    src_url, video_id, ism_id='mss', fatal=False))
+            elif src_url.startswith('http') and self._is_valid_url(src, video_id):
                 http_count += 1
                 formats.append({
                     'url': src_url,
@@ -1884,7 +1886,6 @@ class InfoExtractor(object):
                     'width': width,
                     'height': height,
                 })
-                continue
 
         return formats
 

From 371dcc1dd4b29001910005c1d3e416db204cc262 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Wed, 18 Jul 2018 18:31:40 +0100
Subject: [PATCH 4/8] [theplatform] add support for theplatform Top-level
 domain customization(#16977)

---
 youtube_dl/extractor/theplatform.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index b1a985ff6..e7dc6071c 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -32,13 +32,14 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
 
 
 class ThePlatformBaseIE(OnceIE):
+    _TP_TLD = 'com'
     def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
         meta = self._download_xml(
             smil_url, video_id, note=note, query={'format': 'SMIL'},
             headers=self.geo_verification_headers())
         error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
         if error_element is not None and error_element.attrib['src'].startswith(
-                'http://link.theplatform.com/s/errorFiles/Unavailable.'):
+                'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD):
             raise ExtractorError(error_element.attrib['abstract'], expected=True)
 
         smil_formats = self._parse_smil_formats(
@@ -66,7 +67,7 @@ class ThePlatformBaseIE(OnceIE):
         return formats, subtitles
 
     def _download_theplatform_metadata(self, path, video_id):
-        info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
+        info_url = 'http://link.theplatform.%s/s/%s?format=preview' % (self._TP_TLD, path)
         return self._download_json(info_url, video_id)
 
     def _parse_theplatform_metadata(self, info):

From 38f1eb0ac3be5d3b61e7722db0024e61cf98eb69 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Wed, 18 Jul 2018 18:33:33 +0100
Subject: [PATCH 5/8] [mediaset] fix extraction(closes #16977)

---
 youtube_dl/extractor/mediaset.py | 155 +++++++++++++++----------------
 1 file changed, 74 insertions(+), 81 deletions(-)

diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py
index 9f2b60dcc..57f97409d 100644
--- a/youtube_dl/extractor/mediaset.py
+++ b/youtube_dl/extractor/mediaset.py
@@ -3,75 +3,75 @@ from __future__ import unicode_literals
 
 import re
 
-from .common import InfoExtractor
-from ..compat import compat_str
+from .theplatform import ThePlatformBaseIE
 from ..utils import (
-    determine_ext,
-    parse_duration,
-    try_get,
-    unified_strdate,
+    ExtractorError,
+    int_or_none,
+    update_url_query,
 )
 
 
-class MediasetIE(InfoExtractor):
+class MediasetIE(ThePlatformBaseIE):
+    _TP_TLD = 'eu'
     _VALID_URL = r'''(?x)
                     (?:
                         mediaset:|
                         https?://
-                            (?:www\.)?video\.mediaset\.it/
+                            (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
                             (?:
                                 (?:video|on-demand)/(?:[^/]+/)+[^/]+_|
-                                player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=
+                                player/index\.html\?.*?\bprogramGuid=
                             )
-                    )(?P<id>[0-9]+)
+                    )(?P<id>[0-9A-Z]{16})
                     '''
     _TESTS = [{
         # full episode
-        'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html',
+        'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',
         'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
         'info_dict': {
-            'id': '661824',
+            'id': 'FAFU000000661824',
             'ext': 'mp4',
             'title': 'Quarta puntata',
-            'description': 'md5:7183696d6df570e3412a5ef74b27c5e2',
+            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
             'thumbnail': r're:^https?://.*\.jpg$',
-            'duration': 1414,
-            'creator': 'mediaset',
+            'duration': 1414.26,
             'upload_date': '20161107',
             'series': 'Hello Goodbye',
-            'categories': ['reality'],
+            'timestamp': 1478532900,
+            'uploader': 'Rete 4',
+            'uploader_id': 'R4',
         },
-        'expected_warnings': ['is not a supported codec'],
     }, {
-        'url': 'http://www.video.mediaset.it/video/matrix/full_chiambretti/puntata-del-25-maggio_846685.html',
-        'md5': '1276f966ac423d16ba255ce867de073e',
+        'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
+        'md5': '288532f0ad18307705b01e581304cd7b',
         'info_dict': {
-            'id': '846685',
+            'id': 'F309013801000501',
             'ext': 'mp4',
             'title': 'Puntata del 25 maggio',
-            'description': 'md5:ee2e456e3eb1dba5e814596655bb5296',
+            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
             'thumbnail': r're:^https?://.*\.jpg$',
-            'duration': 6565,
-            'creator': 'mediaset',
-            'upload_date': '20180525',
+            'duration': 6565.007,
+            'upload_date': '20180526',
             'series': 'Matrix',
-            'categories': ['infotainment'],
+            'timestamp': 1527326245,
+            'uploader': 'Canale 5',
+            'uploader_id': 'C5',
         },
         'expected_warnings': ['HTTP Error 403: Forbidden'],
     }, {
         # clip
-        'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
+        'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
         'only_matching': True,
     }, {
         # iframe simple
-        'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true',
+        'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
         'only_matching': True,
     }, {
         # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
-        'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true',
+        'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
         'only_matching': True,
     }, {
-        'url': 'mediaset:661824',
+        'url': 'mediaset:FAFU000000665924',
         'only_matching': True,
     }]
 
@@ -84,61 +84,54 @@ class MediasetIE(InfoExtractor):
                 webpage)]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        video = self._download_json(
-            'https://www.video.mediaset.it/html/metainfo.sjson',
-            video_id, 'Downloading media info', query={
-                'id': video_id
-            })['video']
-
-        title = video['title']
-        media_id = video.get('guid') or video_id
-
-        video_list = self._download_json(
-            'http://cdnsel01.mediaset.net/GetCdn2018.aspx',
-            video_id, 'Downloading video CDN JSON', query={
-                'streamid': media_id,
-                'format': 'json',
-            })['videoList']
+        guid = self._match_id(url)
+        tp_path = 'PR1GhC/media/guid/2702976343/' + guid
+        info = self._extract_theplatform_metadata(tp_path, guid)
 
         formats = []
-        for format_url in video_list:
-            ext = determine_ext(format_url)
-            if ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
-            elif ext == 'mpd':
-                formats.extend(self._extract_mpd_formats(
-                    format_url, video_id, mpd_id='dash', fatal=False))
-            elif ext == 'ism' or '.ism' in format_url:
-                formats.extend(self._extract_ism_formats(
-                    format_url, video_id, ism_id='mss', fatal=False))
-            else:
-                formats.append({
-                    'url': format_url,
-                    'format_id': determine_ext(format_url),
-                })
+        subtitles = {}
+        first_e = None
+        for asset_type in ('SD', 'HD'):
+            for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'):
+                try:
+                    tp_formats, tp_subtitles = self._extract_theplatform_smil(
+                        update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
+                            'mbr': 'true',
+                            'formats': f,
+                            'assetTypes': asset_type,
+                        }), guid, 'Downloading %s %s SMIL data' % (f, asset_type))
+                except ExtractorError as e:
+                    if not first_e:
+                        first_e = e
+                    break
+                for tp_f in tp_formats:
+                    tp_f['quality'] = 1 if asset_type == 'HD' else 0
+                formats.extend(tp_formats)
+                subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+        if first_e and not formats:
+            raise first_e
         self._sort_formats(formats)
 
-        creator = try_get(
-            video, lambda x: x['brand-info']['publisher'], compat_str)
-        category = try_get(
-            video, lambda x: x['brand-info']['category'], compat_str)
-        categories = [category] if category else None
+        fields = []
+        for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))):
+            fields.extend(templ % repl for repl in repls)
+        feed_data = self._download_json(
+            'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid,
+            guid, fatal=False, query={'fields': ','.join(fields)})
+        if feed_data:
+            publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
+            info.update({
+                'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
+                'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
+                'series': feed_data.get('mediasetprogram$brandTitle'),
+                'uploader': publish_info.get('description'),
+                'uploader_id': publish_info.get('channel'),
+                'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
+            })
 
-        return {
-            'id': video_id,
-            'title': title,
-            'description': video.get('short-description'),
-            'thumbnail': video.get('thumbnail'),
-            'duration': parse_duration(video.get('duration')),
-            'creator': creator,
-            'upload_date': unified_strdate(video.get('production-date')),
-            'webpage_url': video.get('url'),
-            'series': video.get('brand-value'),
-            'season': video.get('season'),
-            'categories': categories,
+        info.update({
+            'id': guid,
             'formats': formats,
-        }
+            'subtitles': subtitles,
+        })
+        return info

From c63f5fb8633dda1b0a673c90d537e93497a8d62d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Thu, 19 Jul 2018 01:59:00 +0700
Subject: [PATCH 6/8] [slutload] Fix and improve extraction (closes #17001)

---
 youtube_dl/extractor/slutload.py | 57 +++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py
index 6fc2ff60d..661f9e59d 100644
--- a/youtube_dl/extractor/slutload.py
+++ b/youtube_dl/extractor/slutload.py
@@ -1,12 +1,10 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
 class SlutloadIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
+    _VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P<id>[^/]+)'
     _TESTS = [{
         'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
         'md5': '868309628ba00fd488cf516a113fd717',
@@ -16,33 +14,52 @@ class SlutloadIE(InfoExtractor):
             'title': 'virginie baisee en cam',
             'age_limit': 18,
             'thumbnail': r're:https?://.*?\.jpg'
-        }
+        },
     }, {
         # mobile site
         'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
         'only_matching': True,
+    }, {
+        'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url)
-        webpage = self._download_webpage(desktop_url, video_id)
+        embed_page = self._download_webpage(
+            'http://www.slutload.com/embed_player/%s' % video_id, video_id,
+            'Downloading embed page', fatal=False)
 
-        video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
-                                              webpage, 'title').strip()
+        if embed_page:
+            def extract(what):
+                return self._html_search_regex(
+                    r'data-video-%s=(["\'])(?P<url>(?:(?!\1).)+)\1' % what,
+                    embed_page, 'video %s' % what, default=None, group='url')
 
-        video_url = self._html_search_regex(
-            r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
-            webpage, 'video URL')
-        thumbnail = self._html_search_regex(
-            r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"',
-            webpage, 'thumbnail', fatal=False)
+            video_url = extract('url')
+            if video_url:
+                title = self._html_search_regex(
+                    r'<title>([^<]+)', embed_page, 'title', default=video_id)
+                return {
+                    'id': video_id,
+                    'url': video_url,
+                    'title': title,
+                    'thumbnail': extract('preview'),
+                    'age_limit': 18
+                }
 
-        return {
+        webpage = self._download_webpage(
+            'http://www.slutload.com/video/_/%s/' % video_id, video_id)
+        title = self._html_search_regex(
+            r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip()
+        info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+        info.update({
             'id': video_id,
-            'url': video_url,
-            'title': video_title,
-            'thumbnail': thumbnail,
-            'age_limit': 18
-        }
+            'title': title,
+            'age_limit': 18,
+        })
+        return info

From 8da17f96803faa35ab19352bdcd2777011d8812a Mon Sep 17 00:00:00 2001
From: bato3 <bato3@bandyci.org>
Date: Wed, 18 Jul 2018 21:04:05 +0200
Subject: [PATCH 7/8] [dailymotion] Improve description extraction (closes
 #16984)

---
 youtube_dl/extractor/dailymotion.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 9a74906cb..8f5f57b98 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -144,7 +144,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
 
         age_limit = self._rta_search(webpage)
 
-        description = self._og_search_description(webpage) or self._html_search_meta(
+        description = self._og_search_description(
+            webpage, default=None) or self._html_search_meta(
             'description', webpage, 'description')
 
         view_count_str = self._search_regex(

From 11330f5121732f80e3d6ba1c34102955427eb04b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Thu, 19 Jul 2018 02:25:19 +0700
Subject: [PATCH 8/8] [facebook] Extract view count and update tests (closes
 #16942)

---
 youtube_dl/extractor/facebook.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 8a9ed96c2..f78479b92 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -20,6 +20,7 @@ from ..utils import (
     int_or_none,
     js_to_json,
     limit_length,
+    parse_count,
     sanitized_Request,
     try_get,
     urlencode_postdata,
@@ -75,7 +76,7 @@ class FacebookIE(InfoExtractor):
         'info_dict': {
             'id': '274175099429670',
             'ext': 'mp4',
-            'title': 'Asif Nawab Butt posted a video to his Timeline.',
+            'title': 're:^Asif Nawab Butt posted a video',
             'uploader': 'Asif Nawab Butt',
             'upload_date': '20140506',
             'timestamp': 1399398998,
@@ -133,7 +134,7 @@ class FacebookIE(InfoExtractor):
     }, {
         # have 1080P, but only up to 720p in swf params
         'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
-        'md5': '0d9813160b146b3bc8744e006027fcc6',
+        'md5': '9571fae53d4165bbbadb17a94651dcdc',
         'info_dict': {
             'id': '10155529876156509',
             'ext': 'mp4',
@@ -142,6 +143,7 @@ class FacebookIE(InfoExtractor):
             'upload_date': '20161030',
             'uploader': 'CNN',
             'thumbnail': r're:^https?://.*',
+            'view_count': int,
         },
     }, {
         # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
@@ -149,7 +151,7 @@ class FacebookIE(InfoExtractor):
         'info_dict': {
             'id': '1417995061575415',
             'ext': 'mp4',
-            'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
+            'title': 'md5:1db063d6a8c13faa8da727817339c857',
             'timestamp': 1486648217,
             'upload_date': '20170209',
             'uploader': 'Yaroslav Korpan',
@@ -176,7 +178,7 @@ class FacebookIE(InfoExtractor):
         'info_dict': {
             'id': '1396382447100162',
             'ext': 'mp4',
-            'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
+            'title': 'md5:19a428bbde91364e3de815383b54a235',
             'timestamp': 1486035494,
             'upload_date': '20170202',
             'uploader': 'Elisabeth Ahtn',
@@ -426,6 +428,10 @@ class FacebookIE(InfoExtractor):
             'timestamp', default=None))
         thumbnail = self._og_search_thumbnail(webpage)
 
+        view_count = parse_count(self._search_regex(
+            r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
+            default=None))
+
         info_dict = {
             'id': video_id,
             'title': video_title,
@@ -433,6 +439,7 @@ class FacebookIE(InfoExtractor):
             'uploader': uploader,
             'timestamp': timestamp,
             'thumbnail': thumbnail,
+            'view_count': view_count,
         }
 
         return webpage, info_dict