diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 20a726fc4..0c1569fd4 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.07** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.02.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.02.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2017.02.07 +[debug] youtube-dl version 2017.02.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 7e2afaacf..d4c8081f7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version 2017.02.10 + +Extractors +* [xtube] Fix extraction (#12023) +* [pornhub] Fix extraction (#12007, #12018) +* [facebook] Improve JS data regular expression (#12042) +* [kaltura] Improve embed partner id extraction (#12041) ++ [sprout] Add support for sproutonline.com +* [6play] Improve extraction ++ [scrippsnetworks:watch] Add support for Scripps Networks sites (#10765) ++ [go] Add support for Adobe Pass authentication (#11468, #10831) +* [6play] Fix extraction (#12011) ++ [nbc] Add support for Adobe Pass authentication (#12006) + + version 2017.02.07 Core diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2d82cc321..76882f3b5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -11,6 +11,7 @@ - **4tube** - **56.com** - **5min** + - **6play** - **8tracks** - **91porn** - **9c9media** @@ -667,6 +668,7 @@ - **screen.yahoo:search**: Yahoo screen search - **Screencast** - **ScreencastOMatic** + - **scrippsnetworks:watch** - **Seeker** - **SenateISVP** - **SendtoNews** @@ -676,7 +678,6 @@ - **Shared**: shared.sx - **ShowRoomLive** - **Sina** - - **SixPlay** - **skynewsarabia:article** - **skynewsarabia:video** - **SkySports** @@ -711,6 +712,7 @@ - **SportBoxEmbed** - **SportDeutschland** - **Sportschau** + - **Sprout** - **sr:mediathek**: Saarländischer Rundfunk - **SRGSSR** - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 41e37261d..bdd3545a2 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -275,7 +275,7 @@ class FFmpegFD(ExternalFD): args += ['-f', 'mpegts'] else: args += ['-f', 'mp4'] - if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): + if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): args += ['-bsf:a', 'aac_adtstoasc'] elif protocol == 'rtmp': args += ['-f', 'flv'] diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py index 2f86e2381..d3ed4a9a4 100644 --- a/youtube_dl/extractor/commonmistakes.py +++ b/youtube_dl/extractor/commonmistakes.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class CommonMistakesIE(InfoExtractor): IE_DESC = False # Do not list _VALID_URL = r'''(?x) - (?:url|URL) + (?:url|URL)$ ''' _TESTS = [{ diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3445e7d40..5115e1a0c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1019,6 +1019,7 @@ from .tvplay import ( TVPlayIE, ViafreeIE, ) +from .tvplayer import TVPlayerIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index b325c8200..4a3c839f4 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -134,6 +134,20 @@ class FacebookIE(InfoExtractor): 'upload_date': '20161030', 'uploader': 'CNN', }, + }, { + # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall + 'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/', + 'info_dict': { + 'id': '1417995061575415', + 'ext': 'mp4', + 'title': 'md5:a7b86ca673f51800cd54687b7f4012fe', + 'timestamp': 1486648217, + 'upload_date': '20170209', + 'uploader': 'Yaroslav Korpan', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, @@ -262,7 +276,7 @@ class FacebookIE(InfoExtractor): if not video_data: server_js_data = self._parse_json( self._search_regex( - r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet', + r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)', webpage, 'js data', default='{}'), video_id, transform_source=js_to_json, fatal=False) if server_js_data: diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 5ef382f9f..54374ea76 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -23,11 +23,11 @@ class KalturaIE(InfoExtractor): (?: kaltura:(?P\d+):(?P[0-9a-z_]+)| https?:// - (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/ + (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/ (?: (?: # flash player - index\.php/kwidget| + index\.php/(?:kwidget|extwidget/preview)| # html5 player html5/html5lib/[^/]+/mwEmbedFrame\.php ) @@ -94,6 +94,14 @@ class KalturaIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, + { + 'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', + 'only_matching': True, + }, + { + 'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', + 'only_matching': True, } ] @@ -112,7 +120,7 @@ class KalturaIE(InfoExtractor): re.search( r'''(?xs) (?P["\']) - (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P\d+)(?:(?!(?P=q1)).)* + (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P\d+)(?:(?!(?P=q1)).)* (?P=q1).*? (?: entry_?[Ii]d| @@ -209,6 +217,8 @@ class KalturaIE(InfoExtractor): partner_id = params['wid'][0][1:] elif 'p' in params: partner_id = params['p'][0] + elif 'partner_id' in params: + partner_id = params['partner_id'][0] else: raise ExtractorError('Invalid URL', expected=True) if 'entry_id' in params: diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 017f6c552..818d99c1f 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -156,11 +156,17 @@ class PornHubIE(InfoExtractor): comment_count = self._extract_count( r'All Comments\s*\(([\d,.]+)\)', webpage, 'comment') + video_variables = {} + for video_variablename, quote, video_variable in re.findall( + r'(player_quality_[0-9]{3,4}p\w+)\s*=\s*(["\'])(.+?)\2;', webpage): + video_variables[video_variablename] = video_variable + video_urls = [] - for quote, video_url in re.findall( - r'player_quality_[0-9]{3,4}p\s*=\s*(["\'])(.+?)\1;', webpage): - video_urls.append(compat_urllib_parse_unquote(re.sub( - r'{0}\s*\+\s*{0}'.format(quote), '', video_url))) + for encoded_video_url in re.findall( + r'player_quality_[0-9]{3,4}p\s*=(.+?);', webpage): + for varname, varval in video_variables.items(): + encoded_video_url = encoded_video_url.replace(varname, varval) + video_urls.append(re.sub(r'[\s+]', '', encoded_video_url)) if webpage.find('"encrypted":true') != -1: password = compat_urllib_parse_unquote_plus( diff --git a/youtube_dl/extractor/tvplayer.py b/youtube_dl/extractor/tvplayer.py new file mode 100644 index 000000000..b6537141a --- /dev/null +++ b/youtube_dl/extractor/tvplayer.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + extract_attributes, + urlencode_postdata, + ExtractorError, +) + + +class TVPlayerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tvplayer\.com/watch/(?P[^/?#]+)' + _TEST = { + 'url': 'http://tvplayer.com/watch/bbcone', + 'info_dict': { + 'id': '89', + 'ext': 'mp4', + 'title': r're:^BBC One [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + current_channel = extract_attributes(self._search_regex( + r'(]+class="[^"]*current-channel[^"]*"[^>]*>)', + webpage, 'channel element')) + title = current_channel['data-name'] + + resource_id = self._search_regex( + r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id') + platform = self._search_regex( + r'platform\s*=\s*"([^"]+)"', webpage, 'platform') + token = self._search_regex( + r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null') + validate = self._search_regex( + r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null') + + try: + response = self._download_json( + 'http://api.tvplayer.com/api/v2/stream/live', + resource_id, headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + }, data=urlencode_postdata({ + 'service': 1, + 'platform': platform, + 'id': resource_id, + 'token': token, + 'validate': validate, + }))['tvplayer']['response'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + response = self._parse_json( + e.cause.read().decode(), resource_id)['tvplayer']['response'] + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, response['error']), expected=True) + raise + + formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': resource_id, + 'display_id': display_id, + 'title': self._live_title(title), + 'formats': formats, + 'is_live': True, + } diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 83bc1fef2..11717fe98 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -53,14 +53,15 @@ class XTubeIE(InfoExtractor): if not display_id: display_id = video_id - url = 'http://www.xtube.com/watch.php?v=%s' % video_id + url = 'http://www.xtube.com/video-watch/-%s' % video_id req = sanitized_Request(url) req.add_header('Cookie', 'age_verified=1; cookiesAccepted=1') webpage = self._download_webpage(req, display_id) sources = self._parse_json(self._search_regex( - r'sources\s*:\s*({.+?}),', webpage, 'sources'), video_id) + r'(["\'])sources\1\s*:\s*(?P{.+?}),', + webpage, 'sources', group='sources'), video_id) formats = [] for format_id, format_url in sources.items(): @@ -81,10 +82,10 @@ class XTubeIE(InfoExtractor): r']+class="nickname"[^>]*>([^<]+)'), webpage, 'uploader', fatal=False) duration = parse_duration(self._search_regex( - r'
Runtime:
\s*
([^<]+)
', + r'
Runtime:?
\s*
([^<]+)
', webpage, 'duration', fatal=False)) view_count = str_to_int(self._search_regex( - r'
Views:
\s*
([\d,\.]+)
', + r'
Views:?
\s*
([\d,\.]+)
', webpage, 'view count', fatal=False)) comment_count = str_to_int(self._html_search_regex( r'>Comments? \(([\d,\.]+)\)<', diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a73e9d89c..a8395ce04 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2017.02.07' +__version__ = '2017.02.10'