Merge branch 'master' of https://github.com/rg3/youtube-dl

2025-01-24 11:32:51 +08:00 · 2017-02-10 10:17:10 +07:00 · 2017-02-10 10:17:10 +07:00 · 34b8970708
commit 34b8970708
parent c59d4abb0e e01bfc19c3
12 changed files with 143 additions and 19 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.07**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.10**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2017.02.07
+[debug] youtube-dl version 2017.02.10
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/15
+++ b/15
@ -1,3 +1,18 @@
+version 2017.02.10
+
+Extractors
+* [xtube] Fix extraction (#12023)
+* [pornhub] Fix extraction (#12007, #12018)
+* [facebook] Improve JS data regular expression (#12042)
+* [kaltura] Improve embed partner id extraction (#12041)
+ [sprout] Add support for sproutonline.com
+* [6play] Improve extraction
+ [scrippsnetworks:watch] Add support for Scripps Networks sites (#10765)
+ [go] Add support for Adobe Pass authentication (#11468, #10831)
+* [6play] Fix extraction (#12011)
+ [nbc] Add support for Adobe Pass authentication (#12006)
+
+
 version 2017.02.07

 Core
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -11,6 +11,7 @@
 - **4tube**
 - **56.com**
 - **5min**
+ - **6play**
 - **8tracks**
 - **91porn**
 - **9c9media**
@ -667,6 +668,7 @@
 - **screen.yahoo:search**: Yahoo screen search
 - **Screencast**
 - **ScreencastOMatic**
+ - **scrippsnetworks:watch**
 - **Seeker**
 - **SenateISVP**
 - **SendtoNews**
@ -676,7 +678,6 @@
 - **Shared**: shared.sx
 - **ShowRoomLive**
 - **Sina**
- - **SixPlay**
 - **skynewsarabia:article**
 - **skynewsarabia:video**
 - **SkySports**
@ -711,6 +712,7 @@
 - **SportBoxEmbed**
 - **SportDeutschland**
 - **Sportschau**
+ - **Sprout**
 - **sr:mediathek**: Saarländischer Rundfunk
 - **SRGSSR**
 - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@ -275,7 +275,7 @@ class FFmpegFD(ExternalFD):
                args += ['-f', 'mpegts']
            else:
                args += ['-f', 'mp4']
-                if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
+                if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
                    args += ['-bsf:a', 'aac_adtstoasc']
        elif protocol == 'rtmp':
            args += ['-f', 'flv']
--- a/youtube_dl/extractor/commonmistakes.py
+++ b/youtube_dl/extractor/commonmistakes.py
@ -7,7 +7,7 @@ from ..utils import ExtractorError
 class CommonMistakesIE(InfoExtractor):
    IE_DESC = False  # Do not list
    _VALID_URL = r'''(?x)
-        (?:url|URL)
+        (?:url|URL)$
    '''

    _TESTS = [{
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1019,6 +1019,7 @@ from .tvplay import (
    TVPlayIE,
    ViafreeIE,
 )
+from .tvplayer import TVPlayerIE
 from .tweakers import TweakersIE
 from .twentyfourvideo import TwentyFourVideoIE
 from .twentymin import TwentyMinutenIE
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@ -134,6 +134,20 @@ class FacebookIE(InfoExtractor):
            'upload_date': '20161030',
            'uploader': 'CNN',
        },
+    }, {
+        # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
+        'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
+        'info_dict': {
+            'id': '1417995061575415',
+            'ext': 'mp4',
+            'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
+            'timestamp': 1486648217,
+            'upload_date': '20170209',
+            'uploader': 'Yaroslav Korpan',
+        },
+        'params': {
+            'skip_download': True,
+        },
    }, {
        'url': 'https://www.facebook.com/video.php?v=10204634152394104',
        'only_matching': True,
@ -262,7 +276,7 @@ class FacebookIE(InfoExtractor):
        if not video_data:
            server_js_data = self._parse_json(
                self._search_regex(
-                    r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet',
+                    r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)',
                    webpage, 'js data', default='{}'),
                video_id, transform_source=js_to_json, fatal=False)
            if server_js_data:
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@ -23,11 +23,11 @@ class KalturaIE(InfoExtractor):
                (?:
                    kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
                    https?://
-                        (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/
+                        (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
                        (?:
                            (?:
                                # flash player
-                                index\.php/kwidget|
+                                index\.php/(?:kwidget|extwidget/preview)|
                                # html5 player
                                html5/html5lib/[^/]+/mwEmbedFrame\.php
                            )
@ -94,6 +94,14 @@ class KalturaIE(InfoExtractor):
            'params': {
                'skip_download': True,
            },
+        },
+        {
+            'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
+            'only_matching': True,
+        },
+        {
+            'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
+            'only_matching': True,
        }
    ]

@ -112,7 +120,7 @@ class KalturaIE(InfoExtractor):
            re.search(
                r'''(?xs)
                    (?P<q1>["\'])
-                        (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
+                        (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
                    (?P=q1).*?
                    (?:
                        entry_?[Ii]d|
@ -209,6 +217,8 @@ class KalturaIE(InfoExtractor):
                partner_id = params['wid'][0][1:]
            elif 'p' in params:
                partner_id = params['p'][0]
+            elif 'partner_id' in params:
+                partner_id = params['partner_id'][0]
            else:
                raise ExtractorError('Invalid URL', expected=True)
            if 'entry_id' in params:
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@ -156,11 +156,17 @@ class PornHubIE(InfoExtractor):
        comment_count = self._extract_count(
            r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')

+        video_variables = {}
+        for video_variablename, quote, video_variable in re.findall(
+                r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage):
+            video_variables[video_variablename] = video_variable
+
        video_urls = []
-        for quote, video_url in re.findall(
-                r'player_quality_[0-9]{3,4}p\s*=\s*(["\'])(.+?)\1;', webpage):
-            video_urls.append(compat_urllib_parse_unquote(re.sub(
-                r'{0}\s*\+\s*{0}'.format(quote), '', video_url)))
+        for encoded_video_url in re.findall(
+                r'player_quality_[0-9]{3,4}p\s*=(.+?);', webpage):
+            for varname, varval in video_variables.items():
+                encoded_video_url = encoded_video_url.replace(varname, varval)
+            video_urls.append(re.sub(r'[\s+]', '', encoded_video_url))

        if webpage.find('"encrypted":true') != -1:
            password = compat_urllib_parse_unquote_plus(
--- a/youtube_dl/extractor/tvplayer.py
+++ b/youtube_dl/extractor/tvplayer.py
@ -0,0 +1,75 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+    extract_attributes,
+    urlencode_postdata,
+    ExtractorError,
+)
+
+
+class TVPlayerIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P<id>[^/?#]+)'
+    _TEST = {
+        'url': 'http://tvplayer.com/watch/bbcone',
+        'info_dict': {
+            'id': '89',
+            'ext': 'mp4',
+            'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        }
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+
+        current_channel = extract_attributes(self._search_regex(
+            r'(<div[^>]+class="[^"]*current-channel[^"]*"[^>]*>)',
+            webpage, 'channel element'))
+        title = current_channel['data-name']
+
+        resource_id = self._search_regex(
+            r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id')
+        platform = self._search_regex(
+            r'platform\s*=\s*"([^"]+)"', webpage, 'platform')
+        token = self._search_regex(
+            r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null')
+        validate = self._search_regex(
+            r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null')
+
+        try:
+            response = self._download_json(
+                'http://api.tvplayer.com/api/v2/stream/live',
+                resource_id, headers={
+                    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+                }, data=urlencode_postdata({
+                    'service': 1,
+                    'platform': platform,
+                    'id': resource_id,
+                    'token': token,
+                    'validate': validate,
+                }))['tvplayer']['response']
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError):
+                response = self._parse_json(
+                    e.cause.read().decode(), resource_id)['tvplayer']['response']
+                raise ExtractorError(
+                    '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
+            raise
+
+        formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4')
+        self._sort_formats(formats)
+
+        return {
+            'id': resource_id,
+            'display_id': display_id,
+            'title': self._live_title(title),
+            'formats': formats,
+            'is_live': True,
+        }
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@ -53,14 +53,15 @@ class XTubeIE(InfoExtractor):

        if not display_id:
            display_id = video_id
-            url = 'http://www.xtube.com/watch.php?v=%s' % video_id
+            url = 'http://www.xtube.com/video-watch/-%s' % video_id

        req = sanitized_Request(url)
        req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1')
        webpage = self._download_webpage(req, display_id)

        sources = self._parse_json(self._search_regex(
-            r'sources\s*:\s*({.+?}),', webpage, 'sources'), video_id)
+            r'(["\'])sources\1\s*:\s*(?P<sources>{.+?}),',
+            webpage, 'sources', group='sources'), video_id)

        formats = []
        for format_id, format_url in sources.items():
@ -81,10 +82,10 @@ class XTubeIE(InfoExtractor):
             r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
            webpage, 'uploader', fatal=False)
        duration = parse_duration(self._search_regex(
-            r'<dt>Runtime:</dt>\s*<dd>([^<]+)</dd>',
+            r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
            webpage, 'duration', fatal=False))
        view_count = str_to_int(self._search_regex(
-            r'<dt>Views:</dt>\s*<dd>([\d,\.]+)</dd>',
+            r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>',
            webpage, 'view count', fatal=False))
        comment_count = str_to_int(self._html_search_regex(
            r'>Comments? \(([\d,\.]+)\)<',
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2017.02.07'
+__version__ = '2017.02.10'