From 3bb33568121126809e965dfacf542828d3606c10 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 25 Mar 2016 15:43:29 +0800 Subject: [PATCH 0001/3599] [douyutv] Extend _VALID_URL --- youtube_dl/extractor/douyutv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index bcb670945..3915cb182 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -10,7 +10,7 @@ from ..compat import (compat_str, compat_basestring) class DouyuTVIE(InfoExtractor): IE_DESC = '斗鱼' - _VALID_URL = r'https?://(?:www\.)?douyutv\.com/(?P[A-Za-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?P[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://www.douyutv.com/iseven', 'info_dict': { @@ -60,6 +60,9 @@ class DouyuTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'http://www.douyu.com/xiaocang', + 'only_matching': True, }] def _real_extract(self, url): From d7f62b049a07d59265f679d13d736f5f8b096ce4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 25 Mar 2016 15:45:40 +0800 Subject: [PATCH 0002/3599] [iqiyi] Update enc_key --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 1a4c64713..ffcea30ad 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -501,7 +501,7 @@ class IqiyiIE(InfoExtractor): def get_enc_key(self, video_id): # TODO: automatic key extraction # last update at 2016-01-22 for Zombie::bite - enc_key = '8ed797d224d043e7ac23d95b70227d32' + enc_key = '4a1caba4b4465345366f28da7c117d20' return enc_key def _extract_playlist(self, webpage): From 0be8314dc86a2630863ea89c94ee827a4b97b846 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 25 Mar 2016 09:27:18 +0100 Subject: [PATCH 0003/3599] release 2016.03.25 --- CONTRIBUTING.md | 2 +- README.md | 6 ++++-- docs/supportedsites.md | 8 ++++++++ youtube_dl/version.py | 2 +- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c996f03ab..0df6193fb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -85,7 +85,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file If you want to create a build of youtube-dl yourself, you'll need * python -* make +* make (both GNU make and BSD make are supported) * pandoc * zip * nosetests diff --git a/README.md b/README.md index fcc12d2b3..52b2a85a3 100644 --- a/README.md +++ b/README.md @@ -164,6 +164,8 @@ which means you can modify it, redistribute it or use it however you like. (e.g. 50K or 4.2M) -R, --retries RETRIES Number of retries (default is 10), or "infinite". + --fragment-retries RETRIES Number of retries for a fragment (default + is 10), or "infinite" (DASH only) --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) (default is 1024) --no-resize-buffer Do not automatically adjust the buffer @@ -376,8 +378,8 @@ which means you can modify it, redistribute it or use it however you like. --no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default - --embed-subs Embed subtitles in the video (only for mkv - and mp4 videos) + --embed-subs Embed subtitles in the video (only for mp4, + webm and mkv videos) --embed-thumbnail Embed thumbnail in the audio as cover art --add-metadata Write metadata to the video file --metadata-from-title FORMAT Parse additional metadata like song title / diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 3415efc45..00b8c247c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -74,6 +74,7 @@ - **Bigflix** - **Bild**: Bild.de - **BiliBili** + - **BioBioChileTV** - **BleacherReport** - **BleacherReportCMS** - **blinkx** @@ -100,6 +101,7 @@ - **CBSNews**: CBS News - **CBSNewsLiveVideo**: CBS News Live Videos - **CBSSports** + - **CDA** - **CeskaTelevize** - **channel9**: Channel 9 - **Chaturbate** @@ -244,6 +246,7 @@ - **GPUTechConf** - **Groupon** - **Hark** + - **HBO** - **HearThisAt** - **Heise** - **HellPorno** @@ -344,6 +347,7 @@ - **MiTele**: mitele.es - **mixcloud** - **MLB** + - **Mnet** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **Mofosex** - **Mojvideo** @@ -440,6 +444,7 @@ - **OnionStudios** - **Ooyala** - **OoyalaExternal** + - **Openload** - **OraTV** - **orf:fm4**: radio FM4 - **orf:iptv**: iptv.ORF.at @@ -525,6 +530,7 @@ - **RUTV**: RUTV.RU - **Ruutu** - **safari**: safaribooksonline.com online video + - **safari:api** - **safari:course**: safaribooksonline.com online courses - **Sandia**: Sandia National Laboratories - **Sapo**: SAPO Vídeos @@ -618,6 +624,7 @@ - **TheOnion** - **ThePlatform** - **ThePlatformFeed** + - **TheScene** - **TheSixtyOne** - **TheStar** - **ThisAmericanLife** @@ -786,6 +793,7 @@ - **youtube:channel**: YouTube.com channels - **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication) - **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication) + - **youtube:live**: YouTube.com live streams - **youtube:playlist**: YouTube.com playlists - **youtube:playlists**: YouTube.com user/channel playlists - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6b2c5fac9..2291ed783 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.03.18' +__version__ = '2016.03.25' From 4db441de72415f8262279e48d0b15ebc9e1da369 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 25 Mar 2016 19:51:28 +0600 Subject: [PATCH 0004/3599] [once] Relax _VALID_URL (Closes #8976) --- youtube_dl/extractor/once.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py index 5db949b17..1bf96ea56 100644 --- a/youtube_dl/extractor/once.py +++ b/youtube_dl/extractor/once.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class OnceIE(InfoExtractor): - _VALID_URL = r'https?://once\.unicornmedia\.com/now/[^/]+/[^/]+/(?P[^/]+)/(?P[^/]+)/(?:[^/]+/)?(?P[^/]+)/content\.(?:once|m3u8|mp4)' + _VALID_URL = r'https?://.+?\.unicornmedia\.com/now/[^/]+/[^/]+/(?P[^/]+)/(?P[^/]+)/(?:[^/]+/)?(?P[^/]+)/content\.(?:once|m3u8|mp4)' ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8' PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4' From 2156f16ca7babde4c5fa813dbe4e7ac1a2f758d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 25 Mar 2016 20:14:34 +0600 Subject: [PATCH 0005/3599] [thescene] Fix extraction and improve style (Closes #8978) --- youtube_dl/extractor/thescene.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py index 08d666eaf..3e4e14031 100644 --- a/youtube_dl/extractor/thescene.py +++ b/youtube_dl/extractor/thescene.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urlparse from ..utils import qualities @@ -21,17 +21,21 @@ class TheSceneIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) - player_url = compat_urllib_parse.urljoin( + + player_url = compat_urlparse.urljoin( url, self._html_search_regex( r'id=\'js-player-script\'[^>]+src=\'(.+?)\'', webpage, 'player url')) - self.to_screen(player_url) - player = self._download_webpage(player_url, player_url) - info = self._parse_json(self._search_regex(r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'), display_id) + player = self._download_webpage(player_url, display_id) + info = self._parse_json( + self._search_regex( + r'(?m)var\s+video\s+=\s+({.+?});$', player, 'info json'), + display_id) - qualities_order = qualities(['low', 'high']) + qualities_order = qualities(('low', 'high')) formats = [{ 'format_id': '{0}-{1}'.format(f['type'].split('/')[0], f['quality']), 'url': f['src'], @@ -41,8 +45,8 @@ class TheSceneIE(InfoExtractor): return { 'id': info['id'], + 'display_id': display_id, 'title': info['title'], 'formats': formats, 'thumbnail': info.get('poster_frame'), - 'display_id': display_id, } From 15707c7e024f1f29e7abd8ddaa362196ef2d4af6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Mar 2016 01:46:57 +0600 Subject: [PATCH 0006/3599] [compat] Add compat_urllib_parse_urlencode and eliminate encode_dict encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode Closes #8974 --- youtube_dl/compat.py | 26 ++++++++++++++++++++ youtube_dl/extractor/addanime.py | 4 +-- youtube_dl/extractor/animeondemand.py | 3 +-- youtube_dl/extractor/atresplayer.py | 4 +-- youtube_dl/extractor/bambuser.py | 4 +-- youtube_dl/extractor/camdemy.py | 4 +-- youtube_dl/extractor/ceskatelevize.py | 4 +-- youtube_dl/extractor/cloudy.py | 4 +-- youtube_dl/extractor/comedycentral.py | 4 +-- youtube_dl/extractor/common.py | 4 +-- youtube_dl/extractor/condenast.py | 4 +-- youtube_dl/extractor/crunchyroll.py | 8 +++--- youtube_dl/extractor/daum.py | 6 ++--- youtube_dl/extractor/dcn.py | 8 +++--- youtube_dl/extractor/dramafever.py | 4 +-- youtube_dl/extractor/eroprofile.py | 4 +-- youtube_dl/extractor/fc2.py | 5 ++-- youtube_dl/extractor/fivemin.py | 4 +-- youtube_dl/extractor/flickr.py | 4 +-- youtube_dl/extractor/funimation.py | 5 ++-- youtube_dl/extractor/gdcvault.py | 4 +-- youtube_dl/extractor/hotnewhiphop.py | 4 +-- youtube_dl/extractor/hypem.py | 4 +-- youtube_dl/extractor/internetvideoarchive.py | 4 +-- youtube_dl/extractor/iqiyi.py | 8 +++--- youtube_dl/extractor/ivideon.py | 4 +-- youtube_dl/extractor/kaltura.py | 4 +-- youtube_dl/extractor/laola1tv.py | 6 ++--- youtube_dl/extractor/leeco.py | 8 +++--- youtube_dl/extractor/lynda.py | 6 ++--- youtube_dl/extractor/matchtv.py | 4 +-- youtube_dl/extractor/metacafe.py | 4 +-- youtube_dl/extractor/minhateca.py | 4 +-- youtube_dl/extractor/mitele.py | 5 ++-- youtube_dl/extractor/moevideo.py | 4 +-- youtube_dl/extractor/moniker.py | 4 +-- youtube_dl/extractor/mooshare.py | 4 +-- youtube_dl/extractor/mtv.py | 4 +-- youtube_dl/extractor/muzu.py | 8 +++--- youtube_dl/extractor/myvideo.py | 4 +-- youtube_dl/extractor/naver.py | 6 ++--- youtube_dl/extractor/nba.py | 4 +-- youtube_dl/extractor/neteasemusic.py | 4 +-- youtube_dl/extractor/nextmovie.py | 4 +-- youtube_dl/extractor/nfb.py | 4 +-- youtube_dl/extractor/nhl.py | 6 ++--- youtube_dl/extractor/nick.py | 4 +-- youtube_dl/extractor/niconico.py | 7 +++--- youtube_dl/extractor/noco.py | 4 +-- youtube_dl/extractor/novamov.py | 3 +-- youtube_dl/extractor/npr.py | 4 +-- youtube_dl/extractor/ooyala.py | 4 +-- youtube_dl/extractor/patreon.py | 2 +- youtube_dl/extractor/played.py | 4 +-- youtube_dl/extractor/playtvak.py | 4 +-- youtube_dl/extractor/pluralsight.py | 4 +-- youtube_dl/extractor/porn91.py | 4 +-- youtube_dl/extractor/primesharetv.py | 4 +-- youtube_dl/extractor/promptfile.py | 4 +-- youtube_dl/extractor/prosiebensat1.py | 10 +++----- youtube_dl/extractor/shahid.py | 4 +-- youtube_dl/extractor/shared.py | 4 +-- youtube_dl/extractor/sharesix.py | 4 +-- youtube_dl/extractor/sina.py | 4 +-- youtube_dl/extractor/smotri.py | 6 ++--- youtube_dl/extractor/sohu.py | 4 +-- youtube_dl/extractor/soundcloud.py | 12 ++++----- youtube_dl/extractor/streamcloud.py | 4 +-- youtube_dl/extractor/telecinco.py | 4 +-- youtube_dl/extractor/tubitv.py | 4 +-- youtube_dl/extractor/twitch.py | 9 +++---- youtube_dl/extractor/udemy.py | 6 ++--- youtube_dl/extractor/vbox7.py | 4 +-- youtube_dl/extractor/viddler.py | 4 +-- youtube_dl/extractor/vimeo.py | 13 +++++----- youtube_dl/extractor/vk.py | 4 +-- youtube_dl/extractor/vlive.py | 4 +-- youtube_dl/extractor/vodlocker.py | 4 +-- youtube_dl/extractor/xfileshare.py | 5 ++-- youtube_dl/extractor/yahoo.py | 3 ++- youtube_dl/extractor/yandexmusic.py | 4 +-- youtube_dl/extractor/youku.py | 4 +-- youtube_dl/extractor/youtube.py | 23 +++++++++-------- youtube_dl/utils.py | 14 +++-------- 84 files changed, 229 insertions(+), 222 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index dbb91a6ef..76b6b0e38 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -169,6 +169,31 @@ except ImportError: # Python 2 string = string.replace('+', ' ') return compat_urllib_parse_unquote(string, encoding, errors) +try: + from urllib.parse import urlencode as compat_urllib_parse_urlencode +except ImportError: # Python 2 + # Python 2 will choke in urlencode on mixture of byte and unicode strings. + # Possible solutions are to either port it from python 3 with all + # the friends or manually ensure input query contains only byte strings. + # We will stick with latter thus recursively encoding the whole query. + def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'): + def encode_elem(e): + if isinstance(e, dict): + e = encode_dict(e) + elif isinstance(e, (list, tuple,)): + e = encode_list(e) + elif isinstance(e, compat_str): + e = e.encode(encoding) + return e + + def encode_dict(d): + return dict((encode_elem(k), encode_elem(v)) for k, v in d.items()) + + def encode_list(l): + return [encode_elem(e) for e in l] + + return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq) + try: from urllib.request import DataHandler as compat_urllib_request_DataHandler except ImportError: # Python < 3.4 @@ -588,6 +613,7 @@ __all__ = [ 'compat_urllib_parse_unquote', 'compat_urllib_parse_unquote_plus', 'compat_urllib_parse_unquote_to_bytes', + 'compat_urllib_parse_urlencode', 'compat_urllib_parse_urlparse', 'compat_urllib_request', 'compat_urllib_request_DataHandler', diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index fb1cc02e1..55a9322a7 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -6,7 +6,7 @@ from .common import InfoExtractor from ..compat import ( compat_HTTPError, compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, ) from ..utils import ( @@ -60,7 +60,7 @@ class AddAnimeIE(InfoExtractor): confirm_url = ( parsed_url.scheme + '://' + parsed_url.netloc + action + '?' + - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) self._download_webpage( confirm_url, video_id, diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py index 2cede55a7..9b01e38f5 100644 --- a/youtube_dl/extractor/animeondemand.py +++ b/youtube_dl/extractor/animeondemand.py @@ -9,7 +9,6 @@ from ..compat import ( ) from ..utils import ( determine_ext, - encode_dict, extract_attributes, ExtractorError, sanitized_Request, @@ -71,7 +70,7 @@ class AnimeOnDemandIE(InfoExtractor): post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) request = sanitized_Request( - post_url, urlencode_postdata(encode_dict(login_form))) + post_url, urlencode_postdata(login_form)) request.add_header('Referer', self._LOGIN_URL) response = self._download_webpage( diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index b8f9ae005..f9568cb5b 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -8,7 +8,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( int_or_none, @@ -86,7 +86,7 @@ class AtresPlayerIE(InfoExtractor): } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) request.add_header('Content-Type', 'application/x-www-form-urlencoded') response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index da986e063..1a2eef48d 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -5,7 +5,7 @@ import itertools from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_str, ) from ..utils import ( @@ -58,7 +58,7 @@ class BambuserIE(InfoExtractor): } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) request.add_header('Referer', self._LOGIN_URL) response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py index dd4d96cec..6ffbeabd3 100644 --- a/youtube_dl/extractor/camdemy.py +++ b/youtube_dl/extractor/camdemy.py @@ -6,7 +6,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -139,7 +139,7 @@ class CamdemyFolderIE(InfoExtractor): parsed_url = list(compat_urlparse.urlparse(url)) query = dict(compat_urlparse.parse_qsl(parsed_url[4])) query.update({'displayMode': 'list'}) - parsed_url[4] = compat_urllib_parse.urlencode(query) + parsed_url[4] = compat_urllib_parse_urlencode(query) final_url = compat_urlparse.urlunparse(parsed_url) page = self._download_webpage(final_url, folder_id) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index b355111cb..d93108df5 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -5,8 +5,8 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, ) from ..utils import ( @@ -102,7 +102,7 @@ class CeskaTelevizeIE(InfoExtractor): req = sanitized_Request( 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', - data=compat_urllib_parse.urlencode(data)) + data=compat_urllib_parse_urlencode(data)) req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('x-addr', '127.0.0.1') diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index 0fa720ee8..9e267e6c0 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -6,7 +6,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_HTTPError, ) from ..utils import ( @@ -64,7 +64,7 @@ class CloudyIE(InfoExtractor): 'errorUrl': error_url, }) - data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form)) + data_url = self._API_URL % (video_host, compat_urllib_parse_urlencode(form)) player_data = self._download_webpage( data_url, video_id, 'Downloading player data') data = compat_parse_qs(player_data) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 5b1b99675..0c59102e0 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -5,7 +5,7 @@ import re from .mtv import MTVServicesInfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -201,7 +201,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): # Correct cc.com in uri uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri) - index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri})) + index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse_urlencode({'uri': uri})) idoc = self._download_xml( index_url, epTitle, 'Downloading show index', 'Unable to download episode index') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 770105a5b..b412fd030 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -21,7 +21,7 @@ from ..compat import ( compat_os_name, compat_str, compat_urllib_error, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -1300,7 +1300,7 @@ class InfoExtractor(object): 'plugin': 'flowplayer-3.2.0.1', } f4m_url += '&' if '?' in f4m_url else '?' - f4m_url += compat_urllib_parse.urlencode(f4m_params) + f4m_url += compat_urllib_parse_urlencode(f4m_params) formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) continue diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 054978ff2..e8f2b5a07 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urlparse, ) @@ -97,7 +97,7 @@ class CondeNastIE(InfoExtractor): video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id') player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id') target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target') - data = compat_urllib_parse.urlencode({'videoId': video_id, + data = compat_urllib_parse_urlencode({'videoId': video_id, 'playerId': player_id, 'target': target, }) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 85fa7a725..7746f1be3 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -11,8 +11,8 @@ from math import pow, sqrt, floor from .common import InfoExtractor from ..compat import ( compat_etree_fromstring, - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, ) @@ -78,7 +78,7 @@ class CrunchyrollBaseIE(InfoExtractor): # See https://github.com/rg3/youtube-dl/issues/7202. qs['skip_wall'] = ['1'] return compat_urlparse.urlunparse( - parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True))) + parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) class CrunchyrollIE(CrunchyrollBaseIE): @@ -308,7 +308,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) playerdata_req = sanitized_Request(playerdata_url) - playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) + playerdata_req.data = compat_urllib_parse_urlencode({'current_page': webpage_url}) playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') @@ -322,7 +322,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text streamdata_req = sanitized_Request( 'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s' % (stream_id, stream_format, stream_quality), - compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8')) + compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8')) streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') streamdata = self._download_xml( streamdata_req, video_id, diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index c84c51058..86024a745 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -8,8 +8,8 @@ import itertools from .common import InfoExtractor from ..compat import ( compat_parse_qs, - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -70,7 +70,7 @@ class DaumIE(InfoExtractor): def _real_extract(self, url): video_id = compat_urllib_parse_unquote(self._match_id(url)) - query = compat_urllib_parse.urlencode({'vid': video_id}) + query = compat_urllib_parse_urlencode({'vid': video_id}) movie_data = self._download_json( 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query, video_id, 'Downloading video formats info') @@ -86,7 +86,7 @@ class DaumIE(InfoExtractor): formats = [] for format_el in movie_data['output_list']['output_list']: profile = format_el['profile'] - format_query = compat_urllib_parse.urlencode({ + format_query = compat_urllib_parse_urlencode({ 'vid': video_id, 'profile': profile, }) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 15a1c40f7..982ed94ea 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -6,7 +6,7 @@ import base64 from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_str, ) from ..utils import ( @@ -106,7 +106,7 @@ class DCNVideoIE(DCNBaseIE): webpage = self._download_webpage( 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'id': video_data['id'], 'user_id': video_data['user_id'], 'signature': video_data['signature'], @@ -133,7 +133,7 @@ class DCNLiveIE(DCNBaseIE): webpage = self._download_webpage( 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), 'signature': channel_data['signature'], @@ -174,7 +174,7 @@ class DCNSeasonIE(InfoExtractor): data['show_id'] = show_id request = sanitized_Request( 'http://admin.mangomolo.com/analytics/index.php/plus/show', - compat_urllib_parse.urlencode(data), + compat_urllib_parse_urlencode(data), { 'Origin': 'http://www.dcndigital.ae', 'Content-Type': 'application/x-www-form-urlencoded' diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index d35e88881..2101acaaf 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -6,7 +6,7 @@ import itertools from .amp import AMPIE from ..compat import ( compat_HTTPError, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -50,7 +50,7 @@ class DramaFeverBaseIE(AMPIE): } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py index 7fcd0151d..297f8a6f5 100644 --- a/youtube_dl/extractor/eroprofile.py +++ b/youtube_dl/extractor/eroprofile.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, unescapeHTML @@ -43,7 +43,7 @@ class EroProfileIE(InfoExtractor): if username is None: return - query = compat_urllib_parse.urlencode({ + query = compat_urllib_parse_urlencode({ 'username': username, 'password': password, 'url': 'http://www.eroprofile.com/', diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 508684d2e..cacf61973 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -5,12 +5,11 @@ import hashlib from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, ) from ..utils import ( - encode_dict, ExtractorError, sanitized_Request, ) @@ -57,7 +56,7 @@ class FC2IE(InfoExtractor): 'Submit': ' Login ', } - login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8') + login_data = compat_urllib_parse_urlencode(login_form_strs).encode('utf-8') request = sanitized_Request( 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dl/extractor/fivemin.py index 67d50a386..6b8345416 100644 --- a/youtube_dl/extractor/fivemin.py +++ b/youtube_dl/extractor/fivemin.py @@ -4,8 +4,8 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, compat_parse_qs, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urlparse, ) @@ -109,7 +109,7 @@ class FiveMinIE(InfoExtractor): response = self._download_json( 'https://syn.5min.com/handlers/SenseHandler.ashx?' + - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'func': 'GetResults', 'playlist': video_id, 'sid': sid, diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 18f439df9..0a3de1498 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, @@ -42,7 +42,7 @@ class FlickrIE(InfoExtractor): } if secret: query['secret'] = secret - data = self._download_json(self._API_BASE_URL + compat_urllib_parse.urlencode(query), video_id, note) + data = self._download_json(self._API_BASE_URL + compat_urllib_parse_urlencode(query), video_id, note) if data['stat'] != 'ok': raise ExtractorError(data['message']) return data diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 0f37ed786..1eb528f31 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -5,7 +5,6 @@ from .common import InfoExtractor from ..utils import ( clean_html, determine_ext, - encode_dict, int_or_none, sanitized_Request, ExtractorError, @@ -54,10 +53,10 @@ class FunimationIE(InfoExtractor): (username, password) = self._get_login_info() if username is None: return - data = urlencode_postdata(encode_dict({ + data = urlencode_postdata({ 'email_field': username, 'password_field': password, - })) + }) login_request = sanitized_Request('http://www.funimation.com/login', data, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0', 'Content-Type': 'application/x-www-form-urlencoded' diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 3befd3e7b..cc8fa45d2 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( remove_end, HEADRequest, @@ -123,7 +123,7 @@ class GDCVaultIE(InfoExtractor): 'password': password, } - request = sanitized_Request(login_url, compat_urllib_parse.urlencode(login_form)) + request = sanitized_Request(login_url, compat_urllib_parse_urlencode(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self._download_webpage(request, display_id, 'Logging in') start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index efc3e8429..152d2a98a 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import base64 from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, HEADRequest, @@ -35,7 +35,7 @@ class HotNewHipHopIE(InfoExtractor): r'"contentUrl" content="(.*?)"', webpage, 'content URL') return self.url_result(video_url, ie='Youtube') - reqdata = compat_urllib_parse.urlencode([ + reqdata = compat_urllib_parse_urlencode([ ('mediaType', 's'), ('mediaId', video_id), ]) diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index e0ab31802..f7c913054 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -4,7 +4,7 @@ import json import time from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, @@ -28,7 +28,7 @@ class HypemIE(InfoExtractor): track_id = self._match_id(url) data = {'ax': 1, 'ts': time.time()} - request = sanitized_Request(url + '?' + compat_urllib_parse.urlencode(data)) + request = sanitized_Request(url + '?' + compat_urllib_parse_urlencode(data)) response, urlh = self._download_webpage_handle( request, track_id, 'Downloading webpage with the url') diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 483cc6f9e..e60145b3d 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_urlparse, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( xpath_with_ns, @@ -38,7 +38,7 @@ class InternetVideoArchiveIE(InfoExtractor): # Other player ids return m3u8 urls cleaned_dic['playerid'] = '247' cleaned_dic['videokbrate'] = '100000' - return compat_urllib_parse.urlencode(cleaned_dic) + return compat_urllib_parse_urlencode(cleaned_dic) def _real_extract(self, url): query = compat_urlparse.urlparse(url).query diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index ffcea30ad..9e8c9432a 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -14,7 +14,7 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, ) from ..utils import ( @@ -322,7 +322,7 @@ class IqiyiIE(InfoExtractor): 'bird_t': timestamp, } validation_result = self._download_json( - 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse.urlencode(validation_params), None, + 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params), None, note='Validate credentials', errnote='Unable to validate credentials') MSG_MAP = { @@ -456,7 +456,7 @@ class IqiyiIE(InfoExtractor): 'QY00001': auth_result['data']['u'], }) api_video_url += '?' if '?' not in api_video_url else '&' - api_video_url += compat_urllib_parse.urlencode(param) + api_video_url += compat_urllib_parse_urlencode(param) js = self._download_json( api_video_url, video_id, note='Download video info of segment %d for format %s' % (segment_index + 1, format_id)) @@ -494,7 +494,7 @@ class IqiyiIE(InfoExtractor): } api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ - compat_urllib_parse.urlencode(param) + compat_urllib_parse_urlencode(param) raw_data = self._download_json(api_url, video_id) return raw_data diff --git a/youtube_dl/extractor/ivideon.py b/youtube_dl/extractor/ivideon.py index 617dc8c07..3ca824f79 100644 --- a/youtube_dl/extractor/ivideon.py +++ b/youtube_dl/extractor/ivideon.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import qualities @@ -62,7 +62,7 @@ class IvideonIE(InfoExtractor): quality = qualities(self._QUALITIES) formats = [{ - 'url': 'https://streaming.ivideon.com/flv/live?%s' % compat_urllib_parse.urlencode({ + 'url': 'https://streaming.ivideon.com/flv/live?%s' % compat_urllib_parse_urlencode({ 'server': server_id, 'camera': camera_id, 'sessionId': 'demo', diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 44d7c84a1..a65697ff5 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -6,7 +6,7 @@ import base64 from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, compat_parse_qs, ) @@ -71,7 +71,7 @@ class KalturaIE(InfoExtractor): for k, v in a.items(): params['%d:%s' % (i, k)] = v - query = compat_urllib_parse.urlencode(params) + query = compat_urllib_parse_urlencode(params) url = self._API_BASE + query data = self._download_json(url, video_id, *args, **kwargs) diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index 41d80bc12..d9dc067d2 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -90,7 +90,7 @@ class Laola1TvIE(InfoExtractor): hd_doc = self._download_xml( 'http://www.laola1.tv/server/hd_video.php?%s' - % compat_urllib_parse.urlencode({ + % compat_urllib_parse_urlencode({ 'play': video_id, 'partner': partner_id, 'portal': portal, @@ -108,7 +108,7 @@ class Laola1TvIE(InfoExtractor): req = sanitized_Request( 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' % - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'videoId': video_id, 'target': VS_TARGETS.get(kind, '2'), 'label': _v('label'), diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index 462b752dd..375fdaed1 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -11,7 +11,7 @@ from .common import InfoExtractor from ..compat import ( compat_ord, compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( determine_ext, @@ -122,7 +122,7 @@ class LeIE(InfoExtractor): 'domain': 'www.le.com' } play_json_req = sanitized_Request( - 'http://api.le.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params) + 'http://api.le.com/mms/out/video/playJson?' + compat_urllib_parse_urlencode(params) ) cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') if cn_verification_proxy: @@ -151,7 +151,7 @@ class LeIE(InfoExtractor): for format_id in formats: if format_id in dispatch: media_url = playurl['domain'][0] + dispatch[format_id][0] - media_url += '&' + compat_urllib_parse.urlencode({ + media_url += '&' + compat_urllib_parse_urlencode({ 'm3v': 1, 'format': 1, 'expect': 3, @@ -305,7 +305,7 @@ class LetvCloudIE(InfoExtractor): } self.sign_data(data) return self._download_json( - 'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse.urlencode(data), + 'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse_urlencode(data), media_id, 'Downloading playJson data for type %s' % cf) play_json = get_play_json(cf, time.time()) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index d4e1ae99d..df50cb655 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -6,7 +6,7 @@ import json from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -36,7 +36,7 @@ class LyndaBaseIE(InfoExtractor): 'stayPut': 'false' } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) login_page = self._download_webpage( request, None, 'Logging in as %s' % username) @@ -65,7 +65,7 @@ class LyndaBaseIE(InfoExtractor): 'stayPut': 'false', } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form).encode('utf-8')) + self._LOGIN_URL, compat_urllib_parse_urlencode(confirm_form).encode('utf-8')) login_page = self._download_webpage( request, None, 'Confirming log in and log out from another device') diff --git a/youtube_dl/extractor/matchtv.py b/youtube_dl/extractor/matchtv.py index 28e0dfe63..e33bfde3b 100644 --- a/youtube_dl/extractor/matchtv.py +++ b/youtube_dl/extractor/matchtv.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import random from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( sanitized_Request, xpath_text, @@ -29,7 +29,7 @@ class MatchTVIE(InfoExtractor): def _real_extract(self, url): video_id = 'matchtv-live' request = sanitized_Request( - 'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse.urlencode({ + 'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse_urlencode({ 'ts': '', 'quality': 'SD', 'contentId': '561d2c0df7159b37178b4567', diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index c31e8798a..0e4865446 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -5,8 +5,8 @@ import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, ) from ..utils import ( determine_ext, @@ -117,7 +117,7 @@ class MetacafeIE(InfoExtractor): 'filters': '0', 'submit': "Continue - I'm over 18", } - request = sanitized_Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form)) + request = sanitized_Request(self._FILTER_POST, compat_urllib_parse_urlencode(disclaimer_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self.report_age_confirmation() self._download_webpage(request, None, False, 'Unable to confirm age') diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py index e46b23a6f..6ec53c303 100644 --- a/youtube_dl/extractor/minhateca.py +++ b/youtube_dl/extractor/minhateca.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( int_or_none, parse_duration, @@ -39,7 +39,7 @@ class MinhatecaIE(InfoExtractor): ] req = sanitized_Request( 'http://minhateca.com.br/action/License/Download', - data=compat_urllib_parse.urlencode(token_data)) + data=compat_urllib_parse_urlencode(token_data)) req.add_header('Content-Type', 'application/x-www-form-urlencoded') data = self._download_json( req, video_id, note='Downloading metadata') diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 9e584860a..76ced7928 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -2,11 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( - encode_dict, get_element_by_attribute, int_or_none, ) @@ -60,7 +59,7 @@ class MiTeleIE(InfoExtractor): 'sta': '0', } media = self._download_json( - '%s/?%s' % (gat, compat_urllib_parse.urlencode(encode_dict(token_data))), + '%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)), display_id, 'Downloading %s JSON' % location['loc']) file_ = media.get('file') if not file_: diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index d930b9634..89cdd4600 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -5,7 +5,7 @@ import json import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, @@ -77,7 +77,7 @@ class MoeVideoIE(InfoExtractor): ], ] r_json = json.dumps(r) - post = compat_urllib_parse.urlencode({'r': r_json}) + post = compat_urllib_parse_urlencode({'r': r_json}) req = sanitized_Request(self._API_URL, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index f6bf94f2f..c5ce693f1 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -5,7 +5,7 @@ import os.path import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, remove_start, @@ -88,7 +88,7 @@ class MonikerIE(InfoExtractor): fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) data = dict(fields) - post = compat_urllib_parse.urlencode(data) + post = compat_urllib_parse_urlencode(data) headers = { b'Content-Type': b'application/x-www-form-urlencoded', } diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py index f010f52d5..ee3947f43 100644 --- a/youtube_dl/extractor/mooshare.py +++ b/youtube_dl/extractor/mooshare.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, @@ -58,7 +58,7 @@ class MooshareIE(InfoExtractor): } request = sanitized_Request( - 'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form)) + 'http://mooshare.biz/%s' % video_id, compat_urllib_parse_urlencode(download_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self._sleep(5, video_id) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 824bbcb4e..640ee3d93 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -4,7 +4,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_str, ) from ..utils import ( @@ -171,7 +171,7 @@ class MTVServicesInfoExtractor(InfoExtractor): data = {'uri': uri} if self._LANG: data['lang'] = self._LANG - return compat_urllib_parse.urlencode(data) + return compat_urllib_parse_urlencode(data) def _get_videos_info(self, uri): video_id = self._id_from_uri(uri) diff --git a/youtube_dl/extractor/muzu.py b/youtube_dl/extractor/muzu.py index 1e9cf8de9..cbc800481 100644 --- a/youtube_dl/extractor/muzu.py +++ b/youtube_dl/extractor/muzu.py @@ -1,9 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, -) +from ..compat import compat_urllib_parse_urlencode class MuzuTVIE(InfoExtractor): @@ -25,7 +23,7 @@ class MuzuTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - info_data = compat_urllib_parse.urlencode({ + info_data = compat_urllib_parse_urlencode({ 'format': 'json', 'url': url, }) @@ -41,7 +39,7 @@ class MuzuTVIE(InfoExtractor): if video_info.get('v%s' % quality): break - data = compat_urllib_parse.urlencode({ + data = compat_urllib_parse_urlencode({ 'ai': video_id, # Even if each time you watch a video the hash changes, # it seems to work for different videos, and it will work diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index c83a1eab5..6d447a493 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -9,8 +9,8 @@ import json from .common import InfoExtractor from ..compat import ( compat_ord, - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -112,7 +112,7 @@ class MyVideoIE(InfoExtractor): encxml = compat_urllib_parse_unquote(b) if not params.get('domain'): params['domain'] = 'www.myvideo.de' - xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params)) + xmldata_url = '%s?%s' % (encxml, compat_urllib_parse_urlencode(params)) if 'flash_playertype=MTV' in xmldata_url: self._downloader.report_warning('avoiding MTV player') xmldata_url = ( diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 1f5fc2145..6d6f69b44 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -53,8 +53,8 @@ class NaverIE(InfoExtractor): raise ExtractorError('couldn\'t extract vid and key') vid = m_id.group(1) key = m_id.group(2) - query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key, }) - query_urls = compat_urllib_parse.urlencode({ + query = compat_urllib_parse_urlencode({'vid': vid, 'inKey': key, }) + query_urls = compat_urllib_parse_urlencode({ 'masterVid': vid, 'protocol': 'p2p', 'inKey': key, diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index 3e2b3e599..d896b0d04 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -6,7 +6,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -97,7 +97,7 @@ class NBAIE(InfoExtractor): _PAGE_SIZE = 30 def _fetch_page(self, team, video_id, page): - search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse.urlencode({ + search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse_urlencode({ 'type': 'teamvideo', 'start': page * self._PAGE_SIZE + 1, 'npp': (page + 1) * self._PAGE_SIZE + 1, diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dl/extractor/neteasemusic.py index 7830616f8..0d36474fa 100644 --- a/youtube_dl/extractor/neteasemusic.py +++ b/youtube_dl/extractor/neteasemusic.py @@ -8,7 +8,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_str, compat_itertools_count, ) @@ -153,7 +153,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'ids': '[%s]' % song_id } info = self.query_api( - 'song/detail?' + compat_urllib_parse.urlencode(params), + 'song/detail?' + compat_urllib_parse_urlencode(params), song_id, 'Downloading song info')['songs'][0] formats = self.extract_formats(info) diff --git a/youtube_dl/extractor/nextmovie.py b/youtube_dl/extractor/nextmovie.py index 657ae77a0..9ccd7d774 100644 --- a/youtube_dl/extractor/nextmovie.py +++ b/youtube_dl/extractor/nextmovie.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode class NextMovieIE(MTVServicesInfoExtractor): @@ -20,7 +20,7 @@ class NextMovieIE(MTVServicesInfoExtractor): }] def _get_feed_query(self, uri): - return compat_urllib_parse.urlencode({ + return compat_urllib_parse_urlencode({ 'feed': '1505', 'mgid': uri, }) diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index 5bd15f7a7..ba1eefafc 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import sanitized_Request @@ -40,7 +40,7 @@ class NFBIE(InfoExtractor): request = sanitized_Request( 'https://www.nfb.ca/film/%s/player_config' % video_id, - compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii')) + compat_urllib_parse_urlencode({'getConfig': 'true'}).encode('ascii')) request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 8d5ce46ad..c1dea8b6c 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -7,7 +7,7 @@ import os from .common import InfoExtractor from ..compat import ( compat_urlparse, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse ) from ..utils import ( @@ -38,7 +38,7 @@ class NHLBaseInfoExtractor(InfoExtractor): parsed_url = compat_urllib_parse_urlparse(initial_video_url) filename, ext = os.path.splitext(parsed_url.path) path = '%s_sd%s' % (filename, ext) - data = compat_urllib_parse.urlencode({ + data = compat_urllib_parse_urlencode({ 'type': 'fvod', 'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:]) }) @@ -211,7 +211,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor): r'tab0"[^>]*?>(.*?)', webpage, 'playlist title', flags=re.DOTALL).lower().capitalize() - data = compat_urllib_parse.urlencode({ + data = compat_urllib_parse_urlencode({ 'cid': cat_id, # This is the default value 'count': 12, diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index b62819ae5..ce065f2b0 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode class NickIE(MTVServicesInfoExtractor): @@ -54,7 +54,7 @@ class NickIE(MTVServicesInfoExtractor): }] def _get_feed_query(self, uri): - return compat_urllib_parse.urlencode({ + return compat_urllib_parse_urlencode({ 'feed': 'nick_arc_player_prime', 'mgid': uri, }) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 586e52a4a..688f0a124 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -7,11 +7,10 @@ import datetime from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( - encode_dict, ExtractorError, int_or_none, parse_duration, @@ -101,7 +100,7 @@ class NiconicoIE(InfoExtractor): 'mail': username, 'password': password, } - login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('utf-8') + login_data = compat_urllib_parse_urlencode(login_form_strs).encode('utf-8') request = sanitized_Request( 'https://secure.nicovideo.jp/secure/login', login_data) login_results = self._download_webpage( @@ -141,7 +140,7 @@ class NiconicoIE(InfoExtractor): r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') # Get flv info - flv_info_data = compat_urllib_parse.urlencode({ + flv_info_data = compat_urllib_parse_urlencode({ 'k': thumb_play_key, 'v': video_id }) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index ec7317a2f..8f4b69a6f 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -8,7 +8,7 @@ import hashlib from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -75,7 +75,7 @@ class NocoIE(InfoExtractor): 'username': username, 'password': password, } - request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) + request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse_urlencode(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') login = self._download_json(request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index d68c1ad79..a131f7dbd 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -7,7 +7,6 @@ from ..compat import compat_urlparse from ..utils import ( ExtractorError, NO_DEFAULT, - encode_dict, sanitized_Request, urlencode_postdata, ) @@ -73,7 +72,7 @@ class NovaMovIE(InfoExtractor): if not post_url.startswith('http'): post_url = compat_urlparse.urljoin(url, post_url) request = sanitized_Request( - post_url, urlencode_postdata(encode_dict(fields))) + post_url, urlencode_postdata(fields)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Referer', post_url) webpage = self._download_webpage( diff --git a/youtube_dl/extractor/npr.py b/youtube_dl/extractor/npr.py index a3f0abb4e..1777aa10b 100644 --- a/youtube_dl/extractor/npr.py +++ b/youtube_dl/extractor/npr.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( int_or_none, qualities, @@ -38,7 +38,7 @@ class NprIE(InfoExtractor): playlist_id = self._match_id(url) config = self._download_json( - 'http://api.npr.org/query?%s' % compat_urllib_parse.urlencode({ + 'http://api.npr.org/query?%s' % compat_urllib_parse_urlencode({ 'id': playlist_id, 'fields': 'titles,audio,show', 'format': 'json', diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 20b984288..16f040191 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -9,7 +9,7 @@ from ..utils import ( ExtractorError, unsmuggle_url, ) -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode class OoyalaBaseIE(InfoExtractor): @@ -35,7 +35,7 @@ class OoyalaBaseIE(InfoExtractor): for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'): auth_data = self._download_json( self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'domain': domain, 'supportedFormats': supported_format }), diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index ec8876c28..229750665 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -65,7 +65,7 @@ class PatreonIE(InfoExtractor): request = sanitized_Request( 'https://www.patreon.com/processLogin', - compat_urllib_parse.urlencode(login_form).encode('utf-8') + compat_urllib_parse_urlencode(login_form).encode('utf-8') ) login_page = self._download_webpage(request, None, note='Logging in as %s' % username) diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index 2856af96f..63065622b 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -5,7 +5,7 @@ import re import os.path from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, @@ -40,7 +40,7 @@ class PlayedIE(InfoExtractor): self._sleep(2, video_id) - post = compat_urllib_parse.urlencode(data) + post = compat_urllib_parse_urlencode(data) headers = { b'Content-Type': b'application/x-www-form-urlencoded', } diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dl/extractor/playtvak.py index e360404f7..1e8096a25 100644 --- a/youtube_dl/extractor/playtvak.py +++ b/youtube_dl/extractor/playtvak.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( compat_urlparse, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -106,7 +106,7 @@ class PlaytvakIE(InfoExtractor): }) info_url = compat_urlparse.urlunparse( - parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True))) + parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) json_info = self._download_json( info_url, video_id, diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 12e1c2862..575775f09 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -8,7 +8,7 @@ import collections from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -76,7 +76,7 @@ class PluralsightIE(PluralsightBaseIE): post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) request = sanitized_Request( - post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + post_url, compat_urllib_parse_urlencode(login_form).encode('utf-8')) request.add_header('Content-Type', 'application/x-www-form-urlencoded') response = self._download_webpage( diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 63ce87ee3..9894f3262 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -2,8 +2,8 @@ from __future__ import unicode_literals from ..compat import ( - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, ) from .common import InfoExtractor from ..utils import ( @@ -50,7 +50,7 @@ class Porn91IE(InfoExtractor): r'so.addVariable\(\'seccode\',\'([^\']+)\'', webpage, 'sec code') max_vid = self._search_regex( r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage, 'max vid') - url_params = compat_urllib_parse.urlencode({ + url_params = compat_urllib_parse_urlencode({ 'VID': file_id, 'mp4': '1', 'seccode': sec_code, diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py index 85aae9576..188f08826 100644 --- a/youtube_dl/extractor/primesharetv.py +++ b/youtube_dl/extractor/primesharetv.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, @@ -42,7 +42,7 @@ class PrimeShareTVIE(InfoExtractor): self._sleep(wait_time, video_id) req = sanitized_Request( - url, compat_urllib_parse.urlencode(fields), headers) + url, compat_urllib_parse_urlencode(fields), headers) video_page = self._download_webpage( req, video_id, 'Downloading video page') diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index d5357283a..67312016c 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( determine_ext, ExtractorError, @@ -34,7 +34,7 @@ class PromptFileIE(InfoExtractor): expected=True) fields = self._hidden_inputs(webpage) - post = compat_urllib_parse.urlencode(fields) + post = compat_urllib_parse_urlencode(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 670e6950f..07d49d489 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -5,9 +5,7 @@ import re from hashlib import sha1 from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, -) +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, determine_ext, @@ -235,7 +233,7 @@ class ProSiebenSat1IE(InfoExtractor): client_name = 'kolibri-2.0.19-splec4' client_location = url - videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({ + videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse_urlencode({ 'access_token': access_token, 'client_location': client_location, 'client_name': client_name, @@ -256,7 +254,7 @@ class ProSiebenSat1IE(InfoExtractor): client_id = g[:2] + sha1(''.join([clip_id, g, access_token, client_location, g, client_name]) .encode('utf-8')).hexdigest() - sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse.urlencode({ + sources_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources?%s' % (clip_id, compat_urllib_parse_urlencode({ 'access_token': access_token, 'client_id': client_id, 'client_location': client_location, @@ -270,7 +268,7 @@ class ProSiebenSat1IE(InfoExtractor): client_location, source_ids_str, g, client_name]) .encode('utf-8')).hexdigest() - url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse.urlencode({ + url_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url?%s' % (clip_id, compat_urllib_parse_urlencode({ 'access_token': access_token, 'client_id': client_id, 'client_location': client_location, diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index 1178b7a27..b4433a689 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, @@ -81,7 +81,7 @@ class ShahidIE(InfoExtractor): video = self._download_json( '%s/%s/%s?%s' % ( api_vars['url'], api_vars['playerType'], api_vars['id'], - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'apiKey': 'sh@hid0nlin3', 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', })), diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 96fe0b90d..e66441997 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import base64 from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, @@ -45,7 +45,7 @@ class SharedIE(InfoExtractor): download_form = self._hidden_inputs(webpage) request = sanitized_Request( - url, compat_urllib_parse.urlencode(download_form)) + url, compat_urllib_parse_urlencode(download_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') video_page = self._download_webpage( diff --git a/youtube_dl/extractor/sharesix.py b/youtube_dl/extractor/sharesix.py index f1ea9bdb2..61dc1c235 100644 --- a/youtube_dl/extractor/sharesix.py +++ b/youtube_dl/extractor/sharesix.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( parse_duration, sanitized_Request, @@ -47,7 +47,7 @@ class ShareSixIE(InfoExtractor): fields = { 'method_free': 'Free' } - post = compat_urllib_parse.urlencode(fields) + post = compat_urllib_parse_urlencode(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py index b2258a0f6..d03f1b1d4 100644 --- a/youtube_dl/extractor/sina.py +++ b/youtube_dl/extractor/sina.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import sanitized_Request @@ -39,7 +39,7 @@ class SinaIE(InfoExtractor): ] def _extract_video(self, video_id): - data = compat_urllib_parse.urlencode({'vid': video_id}) + data = compat_urllib_parse_urlencode({'vid': video_id}) url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data, video_id, 'Downloading video url') image_page = self._download_webpage( diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 015ef75f3..b4c6d5bbf 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -7,7 +7,7 @@ import hashlib import uuid from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, @@ -175,7 +175,7 @@ class SmotriIE(InfoExtractor): video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest() request = sanitized_Request( - 'http://smotri.com/video/view/url/bot/', compat_urllib_parse.urlencode(video_form)) + 'http://smotri.com/video/view/url/bot/', compat_urllib_parse_urlencode(video_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') video = self._download_json(request, video_id, 'Downloading video JSON') @@ -338,7 +338,7 @@ class SmotriBroadcastIE(InfoExtractor): } request = sanitized_Request( - broadcast_url + '/?no_redirect=1', compat_urllib_parse.urlencode(login_form)) + broadcast_url + '/?no_redirect=1', compat_urllib_parse_urlencode(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') broadcast_page = self._download_webpage( request, broadcast_id, 'Logging in and confirming age') diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index ea8fc258d..49e5d09ae 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -6,7 +6,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -170,7 +170,7 @@ class SohuIE(InfoExtractor): if retries > 0: download_note += ' (retry #%d)' % retries part_info = self._parse_json(self._download_webpage( - 'http://%s/?%s' % (allot, compat_urllib_parse.urlencode(params)), + 'http://%s/?%s' % (allot, compat_urllib_parse_urlencode(params)), video_id, download_note), video_id) video_url = part_info['url'] diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 1efb2b980..2bca8fa3a 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -11,10 +11,9 @@ from .common import ( from ..compat import ( compat_str, compat_urlparse, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( - encode_dict, ExtractorError, int_or_none, unified_strdate, @@ -393,7 +392,7 @@ class SoundcloudUserIE(SoundcloudIE): query = COMMON_QUERY.copy() query['offset'] = 0 - next_href = base_url + '?' + compat_urllib_parse.urlencode(query) + next_href = base_url + '?' + compat_urllib_parse_urlencode(query) entries = [] for i in itertools.count(): @@ -424,7 +423,7 @@ class SoundcloudUserIE(SoundcloudIE): qs = compat_urlparse.parse_qs(parsed_next_href.query) qs.update(COMMON_QUERY) next_href = compat_urlparse.urlunparse( - parsed_next_href._replace(query=compat_urllib_parse.urlencode(qs, True))) + parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True))) return { '_type': 'playlist', @@ -460,7 +459,7 @@ class SoundcloudPlaylistIE(SoundcloudIE): if token: data_dict['secret_token'] = token - data = compat_urllib_parse.urlencode(data_dict) + data = compat_urllib_parse_urlencode(data_dict) data = self._download_json( base_url + data, playlist_id, 'Downloading playlist') @@ -500,7 +499,8 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): query['client_id'] = self._CLIENT_ID query['linked_partitioning'] = '1' query['offset'] = 0 - data = compat_urllib_parse.urlencode(encode_dict(query)) + data = compat_urllib_parse_urlencode(query) + data = compat_urllib_parse_urlencode(query) next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) collected_results = 0 diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py index 77841b946..b17779e4b 100644 --- a/youtube_dl/extractor/streamcloud.py +++ b/youtube_dl/extractor/streamcloud.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import sanitized_Request @@ -35,7 +35,7 @@ class StreamcloudIE(InfoExtractor): (?:id="[^"]+"\s+)? value="([^"]*)" ''', orig_webpage) - post = compat_urllib_parse.urlencode(fields) + post = compat_urllib_parse_urlencode(fields) self._sleep(12, video_id) headers = { diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index 2c8e9b941..d6b2560f8 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -5,8 +5,8 @@ import json from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -74,7 +74,7 @@ class TelecincoIE(InfoExtractor): info_el = self._download_xml(info_url, episode).find('./video/info') video_link = info_el.find('videoUrl/link').text - token_query = compat_urllib_parse.urlencode({'id': video_link}) + token_query = compat_urllib_parse_urlencode({'id': video_link}) token_info = self._download_json( embed_data['flashvars']['ov_tk'] + '?' + token_query, episode, diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index 6d78b5dfe..50ed15163 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -5,7 +5,7 @@ import codecs import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, @@ -41,7 +41,7 @@ class TubiTvIE(InfoExtractor): 'username': username, 'password': password, } - payload = compat_urllib_parse.urlencode(form_data).encode('utf-8') + payload = compat_urllib_parse_urlencode(form_data).encode('utf-8') request = sanitized_Request(self._LOGIN_URL, payload) request.add_header('Content-Type', 'application/x-www-form-urlencoded') login_page = self._download_webpage( diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index d4169ec6d..c92dcc7b9 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -9,12 +9,11 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urlparse, ) from ..utils import ( - encode_dict, ExtractorError, int_or_none, orderedSet, @@ -82,7 +81,7 @@ class TwitchBaseIE(InfoExtractor): post_url = compat_urlparse.urljoin(redirect_url, post_url) request = sanitized_Request( - post_url, compat_urllib_parse.urlencode(encode_dict(login_form)).encode('utf-8')) + post_url, compat_urllib_parse_urlencode(login_form).encode('utf-8')) request.add_header('Referer', redirect_url) response = self._download_webpage( request, None, 'Logging in as %s' % username) @@ -250,7 +249,7 @@ class TwitchVodIE(TwitchItemBaseIE): formats = self._extract_m3u8_formats( '%s/vod/%s?%s' % ( self._USHER_BASE, item_id, - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'allow_source': 'true', 'allow_audio_only': 'true', 'allow_spectre': 'true', @@ -442,7 +441,7 @@ class TwitchStreamIE(TwitchBaseIE): } formats = self._extract_m3u8_formats( '%s/api/channel/hls/%s.m3u8?%s' - % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)), + % (self._USHER_BASE, channel_id, compat_urllib_parse_urlencode(query)), channel_id, 'mp4') self._prefer_source(formats) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index a9046b865..6adfb2cee 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, ) @@ -71,7 +71,7 @@ class UdemyIE(InfoExtractor): def _download_lecture(self, course_id, lecture_id): return self._download_json( 'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?%s' % ( - course_id, lecture_id, compat_urllib_parse.urlencode({ + course_id, lecture_id, compat_urllib_parse_urlencode({ 'video_only': '', 'auto_play': '', 'fields[lecture]': 'title,description,asset', @@ -139,7 +139,7 @@ class UdemyIE(InfoExtractor): }) request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) request.add_header('Referer', self._ORIGIN_URL) request.add_header('Origin', self._ORIGIN_URL) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index b755dda90..77bb200e9 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -48,7 +48,7 @@ class Vbox7IE(InfoExtractor): webpage, 'title').split('/')[0].strip() info_url = 'http://vbox7.com/play/magare.do' - data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id}) + data = compat_urllib_parse_urlencode({'as3': '1', 'vid': video_id}) info_request = sanitized_Request(info_url, data) info_request.add_header('Content-Type', 'application/x-www-form-urlencoded') info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage') diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py index 6bfbd4d85..8d92aee87 100644 --- a/youtube_dl/extractor/viddler.py +++ b/youtube_dl/extractor/viddler.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -93,7 +93,7 @@ class ViddlerIE(InfoExtractor): headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'} request = sanitized_Request( 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s' - % compat_urllib_parse.urlencode(query), None, headers) + % compat_urllib_parse_urlencode(query), None, headers) data = self._download_json(request, video_id)['video'] formats = [] diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 71c30d2cd..707a5735a 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -12,7 +12,6 @@ from ..compat import ( ) from ..utils import ( determine_ext, - encode_dict, ExtractorError, InAdvancePagedList, int_or_none, @@ -42,13 +41,13 @@ class VimeoBaseInfoExtractor(InfoExtractor): self.report_login() webpage = self._download_webpage(self._LOGIN_URL, None, False) token, vuid = self._extract_xsrft_and_vuid(webpage) - data = urlencode_postdata(encode_dict({ + data = urlencode_postdata({ 'action': 'login', 'email': username, 'password': password, 'service': 'vimeo', 'token': token, - })) + }) login_request = sanitized_Request(self._LOGIN_URL, data) login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') login_request.add_header('Referer', self._LOGIN_URL) @@ -255,10 +254,10 @@ class VimeoIE(VimeoBaseInfoExtractor): if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) token, vuid = self._extract_xsrft_and_vuid(webpage) - data = urlencode_postdata(encode_dict({ + data = urlencode_postdata({ 'password': password, 'token': token, - })) + }) if url.startswith('http://'): # vimeo only supports https now, but the user can give an http url url = url.replace('http://', 'https://') @@ -274,7 +273,7 @@ class VimeoIE(VimeoBaseInfoExtractor): password = self._downloader.params.get('videopassword') if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option') - data = urlencode_postdata(encode_dict({'password': password})) + data = urlencode_postdata({'password': password}) pass_url = url + '/check-password' password_request = sanitized_Request(pass_url, data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') @@ -575,7 +574,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): token, vuid = self._extract_xsrft_and_vuid(webpage) fields['token'] = token fields['password'] = password - post = urlencode_postdata(encode_dict(fields)) + post = urlencode_postdata(fields) password_path = self._search_regex( r'action="([^"]+)"', login_form, 'password URL') password_url = compat_urlparse.urljoin(page_url, password_path) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index d560a4b5e..458099a4a 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -7,7 +7,7 @@ import json from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -204,7 +204,7 @@ class VKIE(InfoExtractor): request = sanitized_Request( 'https://login.vk.com/?act=login', - compat_urllib_parse.urlencode(login_form).encode('utf-8')) + compat_urllib_parse_urlencode(login_form).encode('utf-8')) login_page = self._download_webpage( request, None, note='Logging in as %s' % username) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index bd5545173..baf39bb2c 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -7,7 +7,7 @@ from ..utils import ( float_or_none, int_or_none, ) -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode class VLiveIE(InfoExtractor): @@ -43,7 +43,7 @@ class VLiveIE(InfoExtractor): playinfo = self._download_json( 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s' - % compat_urllib_parse.urlencode({ + % compat_urllib_parse_urlencode({ 'videoId': long_video_id, 'key': key, 'ptc': 'http', diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index a97995a6d..f1abca4d9 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, NO_DEFAULT, @@ -38,7 +38,7 @@ class VodlockerIE(InfoExtractor): if fields['op'] == 'download1': self._sleep(3, video_id) # they do detect when requests happen too fast! - post = compat_urllib_parse.urlencode(fields) + post = compat_urllib_parse_urlencode(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 94abdb4f3..4e35e1f44 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -4,10 +4,9 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, - encode_dict, int_or_none, sanitized_Request, ) @@ -109,7 +108,7 @@ class XFileShareIE(InfoExtractor): if countdown: self._sleep(countdown, video_id) - post = compat_urllib_parse.urlencode(encode_dict(fields)) + post = compat_urllib_parse_urlencode(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 4c6142927..b2d8f4b48 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -8,6 +8,7 @@ import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -303,7 +304,7 @@ class YahooIE(InfoExtractor): region = self._search_regex( r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', webpage, 'region', fatal=False, default='US') - data = compat_urllib_parse.urlencode({ + data = compat_urllib_parse_urlencode({ 'protocol': 'http', 'region': region, }) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index e699e663f..158f3ea68 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -7,7 +7,7 @@ import hashlib from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, ) from ..utils import ( ExtractorError, @@ -170,7 +170,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids) request = sanitized_Request( 'https://music.yandex.ru/handlers/track-entries.jsx', - compat_urllib_parse.urlencode({ + compat_urllib_parse_urlencode({ 'entries': ','.join(missing_track_ids), 'lang': mu.get('settings', {}).get('lang', 'en'), 'external-domain': 'music.yandex.ru', diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 900eb2aba..fd7eb5a6d 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -8,7 +8,7 @@ import time from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_ord, ) from ..utils import ( @@ -138,7 +138,7 @@ class YoukuIE(InfoExtractor): '_00' + \ '/st/' + self.parse_ext_l(format) + \ '/fileid/' + get_fileid(format, n) + '?' + \ - compat_urllib_parse.urlencode(param) + compat_urllib_parse_urlencode(param) video_urls.append(video_url) video_urls_dict[format] = video_urls diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 96fa3b5aa..83b5840f7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -17,16 +17,15 @@ from ..swfinterp import SWFInterpreter from ..compat import ( compat_chr, compat_parse_qs, - compat_urllib_parse, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urlparse, compat_str, ) from ..utils import ( clean_html, - encode_dict, error_to_compat_str, ExtractorError, float_or_none, @@ -116,7 +115,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'hl': 'en_US', } - login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii') + login_data = compat_urllib_parse_urlencode(login_form_strs).encode('ascii') req = sanitized_Request(self._LOGIN_URL, login_data) login_results = self._download_webpage( @@ -149,7 +148,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'TrustDevice': 'on', }) - tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii') + tfa_data = compat_urllib_parse_urlencode(tfa_form_strs).encode('ascii') tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data) tfa_results = self._download_webpage( @@ -1007,7 +1006,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continue sub_formats = [] for ext in self._SUBTITLE_FORMATS: - params = compat_urllib_parse.urlencode({ + params = compat_urllib_parse_urlencode({ 'lang': lang, 'v': video_id, 'fmt': ext, @@ -1056,7 +1055,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if caption_url: timestamp = args['timestamp'] # We get the available subtitles - list_params = compat_urllib_parse.urlencode({ + list_params = compat_urllib_parse_urlencode({ 'type': 'list', 'tlangs': 1, 'asrs': 1, @@ -1075,7 +1074,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sub_lang = lang_node.attrib['lang_code'] sub_formats = [] for ext in self._SUBTITLE_FORMATS: - params = compat_urllib_parse.urlencode({ + params = compat_urllib_parse_urlencode({ 'lang': original_lang, 'tlang': sub_lang, 'fmt': ext, @@ -1094,7 +1093,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): caption_tracks = args['caption_tracks'] caption_translation_languages = args['caption_translation_languages'] caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0] - parsed_caption_url = compat_urlparse.urlparse(caption_url) + parsed_caption_url = compat_urllib_parse_urlparse(caption_url) caption_qs = compat_parse_qs(parsed_caption_url.query) sub_lang_list = {} @@ -1110,7 +1109,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'fmt': [ext], }) sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace( - query=compat_urllib_parse.urlencode(caption_qs, True))) + query=compat_urllib_parse_urlencode(caption_qs, True))) sub_formats.append({ 'url': sub_url, 'ext': ext, @@ -1140,7 +1139,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'cpn': [cpn], }) playback_url = compat_urlparse.urlunparse( - parsed_playback_url._replace(query=compat_urllib_parse.urlencode(qs, True))) + parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True))) self._download_webpage( playback_url, video_id, 'Marking watched', @@ -1225,7 +1224,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # this can be viewed without login into Youtube url = proto + '://www.youtube.com/embed/%s' % video_id embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage') - data = compat_urllib_parse.urlencode({ + data = compat_urllib_parse_urlencode({ 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, 'sts': self._search_regex( @@ -2085,7 +2084,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): 'spf': 'navigate', } url_query.update(self._EXTRA_QUERY_ARGS) - result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query) + result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) data = self._download_json( result_url, video_id='query "%s"' % query, note='Downloading page %s' % pagenum, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b6e1dc809..eacd81bf9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -47,6 +47,7 @@ from .compat import ( compat_str, compat_urllib_error, compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urllib_request, compat_urlparse, @@ -1315,7 +1316,7 @@ def shell_quote(args): def smuggle_url(url, data): """ Pass additional data in a URL for internal use. """ - sdata = compat_urllib_parse.urlencode( + sdata = compat_urllib_parse_urlencode( {'__youtubedl_smuggle': json.dumps(data)}) return url + '#' + sdata @@ -1789,22 +1790,15 @@ def read_batch_urls(batch_fd): def urlencode_postdata(*args, **kargs): - return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii') + return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii') def update_url_query(url, query): parsed_url = compat_urlparse.urlparse(url) qs = compat_parse_qs(parsed_url.query) qs.update(query) - qs = encode_dict(qs) return compat_urlparse.urlunparse(parsed_url._replace( - query=compat_urllib_parse.urlencode(qs, True))) - - -def encode_dict(d, encoding='utf-8'): - def encode(v): - return v.encode(encoding) if isinstance(v, compat_basestring) else v - return dict((encode(k), encode(v)) for k, v in d.items()) + query=compat_urllib_parse_urlencode(qs, True))) def dict_get(d, key_or_keys, default=None, skip_false_values=True): From 6e6bc8dae577c29c072ffc5c25078b5668435435 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Mar 2016 02:19:24 +0600 Subject: [PATCH 0007/3599] Use urlencode_postdata across the codebase --- youtube_dl/extractor/atresplayer.py | 16 +++++++--------- youtube_dl/extractor/bambuser.py | 10 ++++------ youtube_dl/extractor/ceskatelevize.py | 4 ++-- youtube_dl/extractor/crunchyroll.py | 2 +- youtube_dl/extractor/dcn.py | 3 ++- youtube_dl/extractor/dramafever.py | 4 ++-- youtube_dl/extractor/fc2.py | 4 ++-- youtube_dl/extractor/gdcvault.py | 4 ++-- youtube_dl/extractor/hotnewhiphop.py | 4 ++-- youtube_dl/extractor/lynda.py | 10 ++++------ youtube_dl/extractor/metacafe.py | 4 ++-- youtube_dl/extractor/minhateca.py | 4 ++-- youtube_dl/extractor/moevideo.py | 4 ++-- youtube_dl/extractor/moniker.py | 4 ++-- youtube_dl/extractor/mooshare.py | 4 ++-- youtube_dl/extractor/nfb.py | 8 +++++--- youtube_dl/extractor/niconico.py | 3 ++- youtube_dl/extractor/noco.py | 4 ++-- youtube_dl/extractor/played.py | 4 ++-- youtube_dl/extractor/pluralsight.py | 4 ++-- youtube_dl/extractor/primesharetv.py | 4 ++-- youtube_dl/extractor/promptfile.py | 4 ++-- youtube_dl/extractor/shared.py | 4 ++-- youtube_dl/extractor/sharesix.py | 4 ++-- youtube_dl/extractor/smotri.py | 6 +++--- youtube_dl/extractor/soundcloud.py | 1 - youtube_dl/extractor/streamcloud.py | 8 +++++--- youtube_dl/extractor/tubitv.py | 4 ++-- youtube_dl/extractor/twitch.py | 3 ++- youtube_dl/extractor/udemy.py | 3 ++- youtube_dl/extractor/vbox7.py | 8 +++----- youtube_dl/extractor/vk.py | 8 +++----- youtube_dl/extractor/vodlocker.py | 4 ++-- youtube_dl/extractor/xfileshare.py | 4 ++-- youtube_dl/extractor/yandexmusic.py | 10 ++++------ youtube_dl/extractor/youtube.py | 5 +++-- 36 files changed, 90 insertions(+), 94 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index f9568cb5b..d2f388964 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -6,16 +6,14 @@ import hashlib import re from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) +from ..compat import compat_str from ..utils import ( - int_or_none, - float_or_none, - sanitized_Request, - xpath_text, ExtractorError, + float_or_none, + int_or_none, + sanitized_Request, + urlencode_postdata, + xpath_text, ) @@ -86,7 +84,7 @@ class AtresPlayerIE(InfoExtractor): } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 1a2eef48d..0eb1930c2 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -4,15 +4,13 @@ import re import itertools from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_str, -) +from ..compat import compat_str from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -58,7 +56,7 @@ class BambuserIE(InfoExtractor): } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Referer', self._LOGIN_URL) response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index d93108df5..6652c8e42 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -6,13 +6,13 @@ import re from .common import InfoExtractor from ..compat import ( compat_urllib_parse_unquote, - compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, ) from ..utils import ( ExtractorError, float_or_none, sanitized_Request, + urlencode_postdata, ) @@ -102,7 +102,7 @@ class CeskaTelevizeIE(InfoExtractor): req = sanitized_Request( 'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', - data=compat_urllib_parse_urlencode(data)) + data=urlencode_postdata(data)) req.add_header('Content-type', 'application/x-www-form-urlencoded') req.add_header('x-addr', '127.0.0.1') diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 7746f1be3..8ae3f2890 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -308,7 +308,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) playerdata_req = sanitized_Request(playerdata_url) - playerdata_req.data = compat_urllib_parse_urlencode({'current_page': webpage_url}) + playerdata_req.data = urlencode_postdata({'current_page': webpage_url}) playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 982ed94ea..5deff5f30 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -15,6 +15,7 @@ from ..utils import ( sanitized_Request, smuggle_url, unsmuggle_url, + urlencode_postdata, ) @@ -174,7 +175,7 @@ class DCNSeasonIE(InfoExtractor): data['show_id'] = show_id request = sanitized_Request( 'http://admin.mangomolo.com/analytics/index.php/plus/show', - compat_urllib_parse_urlencode(data), + urlencode_postdata(data), { 'Origin': 'http://www.dcndigital.ae', 'Content-Type': 'application/x-www-form-urlencoded' diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 2101acaaf..3b6529f4b 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -6,7 +6,6 @@ import itertools from .amp import AMPIE from ..compat import ( compat_HTTPError, - compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -14,6 +13,7 @@ from ..utils import ( clean_html, int_or_none, sanitized_Request, + urlencode_postdata ) @@ -50,7 +50,7 @@ class DramaFeverBaseIE(AMPIE): } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, urlencode_postdata(login_form)) response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index cacf61973..c7d69ff1f 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -5,13 +5,13 @@ import hashlib from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, ) from ..utils import ( ExtractorError, sanitized_Request, + urlencode_postdata, ) @@ -56,7 +56,7 @@ class FC2IE(InfoExtractor): 'Submit': ' Login ', } - login_data = compat_urllib_parse_urlencode(login_form_strs).encode('utf-8') + login_data = urlencode_postdata(login_form_strs) request = sanitized_Request( 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index cc8fa45d2..59ed4c38f 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -3,11 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( remove_end, HEADRequest, sanitized_Request, + urlencode_postdata, ) @@ -123,7 +123,7 @@ class GDCVaultIE(InfoExtractor): 'password': password, } - request = sanitized_Request(login_url, compat_urllib_parse_urlencode(login_form)) + request = sanitized_Request(login_url, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self._download_webpage(request, display_id, 'Logging in') start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index 152d2a98a..9db565209 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -3,11 +3,11 @@ from __future__ import unicode_literals import base64 from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, HEADRequest, sanitized_Request, + urlencode_postdata, ) @@ -35,7 +35,7 @@ class HotNewHipHopIE(InfoExtractor): r'"contentUrl" content="(.*?)"', webpage, 'content URL') return self.url_result(video_url, ie='Youtube') - reqdata = compat_urllib_parse_urlencode([ + reqdata = urlencode_postdata([ ('mediaType', 's'), ('mediaId', video_id), ]) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index df50cb655..71fd55ade 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -4,15 +4,13 @@ import re import json from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) +from ..compat import compat_str from ..utils import ( ExtractorError, clean_html, int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -36,7 +34,7 @@ class LyndaBaseIE(InfoExtractor): 'stayPut': 'false' } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, urlencode_postdata(login_form)) login_page = self._download_webpage( request, None, 'Logging in as %s' % username) @@ -65,7 +63,7 @@ class LyndaBaseIE(InfoExtractor): 'stayPut': 'false', } request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse_urlencode(confirm_form).encode('utf-8')) + self._LOGIN_URL, urlencode_postdata(confirm_form)) login_page = self._download_webpage( request, None, 'Confirming log in and log out from another device') diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 0e4865446..61dadb7a7 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -6,13 +6,13 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urllib_parse_unquote, - compat_urllib_parse_urlencode, ) from ..utils import ( determine_ext, ExtractorError, int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -117,7 +117,7 @@ class MetacafeIE(InfoExtractor): 'filters': '0', 'submit': "Continue - I'm over 18", } - request = sanitized_Request(self._FILTER_POST, compat_urllib_parse_urlencode(disclaimer_form)) + request = sanitized_Request(self._FILTER_POST, urlencode_postdata(disclaimer_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self.report_age_confirmation() self._download_webpage(request, None, False, 'Unable to confirm age') diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py index 6ec53c303..e6730b75a 100644 --- a/youtube_dl/extractor/minhateca.py +++ b/youtube_dl/extractor/minhateca.py @@ -2,12 +2,12 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( int_or_none, parse_duration, parse_filesize, sanitized_Request, + urlencode_postdata, ) @@ -39,7 +39,7 @@ class MinhatecaIE(InfoExtractor): ] req = sanitized_Request( 'http://minhateca.com.br/action/License/Download', - data=compat_urllib_parse_urlencode(token_data)) + data=urlencode_postdata(token_data)) req.add_header('Content-Type', 'application/x-www-form-urlencoded') data = self._download_json( req, video_id, note='Downloading metadata') diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py index 89cdd4600..978d5d5bf 100644 --- a/youtube_dl/extractor/moevideo.py +++ b/youtube_dl/extractor/moevideo.py @@ -5,11 +5,11 @@ import json import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -77,7 +77,7 @@ class MoeVideoIE(InfoExtractor): ], ] r_json = json.dumps(r) - post = compat_urllib_parse_urlencode({'r': r_json}) + post = urlencode_postdata({'r': r_json}) req = sanitized_Request(self._API_URL, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index c5ce693f1..b208820fe 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -5,11 +5,11 @@ import os.path import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, remove_start, sanitized_Request, + urlencode_postdata, ) @@ -88,7 +88,7 @@ class MonikerIE(InfoExtractor): fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) data = dict(fields) - post = compat_urllib_parse_urlencode(data) + post = urlencode_postdata(data) headers = { b'Content-Type': b'application/x-www-form-urlencoded', } diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py index ee3947f43..a85109a89 100644 --- a/youtube_dl/extractor/mooshare.py +++ b/youtube_dl/extractor/mooshare.py @@ -3,10 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, + urlencode_postdata, ) @@ -58,7 +58,7 @@ class MooshareIE(InfoExtractor): } request = sanitized_Request( - 'http://mooshare.biz/%s' % video_id, compat_urllib_parse_urlencode(download_form)) + 'http://mooshare.biz/%s' % video_id, urlencode_postdata(download_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self._sleep(5, video_id) diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index ba1eefafc..51e4a34f7 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -1,8 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode -from ..utils import sanitized_Request +from ..utils import ( + sanitized_Request, + urlencode_postdata, +) class NFBIE(InfoExtractor): @@ -40,7 +42,7 @@ class NFBIE(InfoExtractor): request = sanitized_Request( 'https://www.nfb.ca/film/%s/player_config' % video_id, - compat_urllib_parse_urlencode({'getConfig': 'true'}).encode('ascii')) + urlencode_postdata({'getConfig': 'true'})) request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 688f0a124..dd75a48af 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -18,6 +18,7 @@ from ..utils import ( sanitized_Request, xpath_text, determine_ext, + urlencode_postdata, ) @@ -100,7 +101,7 @@ class NiconicoIE(InfoExtractor): 'mail': username, 'password': password, } - login_data = compat_urllib_parse_urlencode(login_form_strs).encode('utf-8') + login_data = urlencode_postdata(login_form_strs) request = sanitized_Request( 'https://secure.nicovideo.jp/secure/login', login_data) login_results = self._download_webpage( diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 8f4b69a6f..06f2bda07 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -8,7 +8,6 @@ import hashlib from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -18,6 +17,7 @@ from ..utils import ( float_or_none, parse_iso8601, sanitized_Request, + urlencode_postdata, ) @@ -75,7 +75,7 @@ class NocoIE(InfoExtractor): 'username': username, 'password': password, } - request = sanitized_Request(self._LOGIN_URL, compat_urllib_parse_urlencode(login_form)) + request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') login = self._download_json(request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index 63065622b..57c875ef0 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -5,10 +5,10 @@ import re import os.path from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, + urlencode_postdata, ) @@ -40,7 +40,7 @@ class PlayedIE(InfoExtractor): self._sleep(2, video_id) - post = compat_urllib_parse_urlencode(data) + post = urlencode_postdata(data) headers = { b'Content-Type': b'application/x-www-form-urlencoded', } diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 575775f09..bc66f7a9d 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -8,7 +8,6 @@ import collections from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -17,6 +16,7 @@ from ..utils import ( parse_duration, qualities, sanitized_Request, + urlencode_postdata, ) @@ -76,7 +76,7 @@ class PluralsightIE(PluralsightBaseIE): post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) request = sanitized_Request( - post_url, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + post_url, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') response = self._download_webpage( diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py index 188f08826..0c1024772 100644 --- a/youtube_dl/extractor/primesharetv.py +++ b/youtube_dl/extractor/primesharetv.py @@ -1,10 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, sanitized_Request, + urlencode_postdata, ) @@ -42,7 +42,7 @@ class PrimeShareTVIE(InfoExtractor): self._sleep(wait_time, video_id) req = sanitized_Request( - url, compat_urllib_parse_urlencode(fields), headers) + url, urlencode_postdata(fields), headers) video_page = self._download_webpage( req, video_id, 'Downloading video page') diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index 67312016c..f93bd19ff 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -4,11 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( determine_ext, ExtractorError, sanitized_Request, + urlencode_postdata, ) @@ -34,7 +34,7 @@ class PromptFileIE(InfoExtractor): expected=True) fields = self._hidden_inputs(webpage) - post = compat_urllib_parse_urlencode(fields) + post = urlencode_postdata(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index e66441997..e7e5f653e 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -3,11 +3,11 @@ from __future__ import unicode_literals import base64 from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -45,7 +45,7 @@ class SharedIE(InfoExtractor): download_form = self._hidden_inputs(webpage) request = sanitized_Request( - url, compat_urllib_parse_urlencode(download_form)) + url, urlencode_postdata(download_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') video_page = self._download_webpage( diff --git a/youtube_dl/extractor/sharesix.py b/youtube_dl/extractor/sharesix.py index 61dc1c235..9cce5ceb4 100644 --- a/youtube_dl/extractor/sharesix.py +++ b/youtube_dl/extractor/sharesix.py @@ -4,10 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( parse_duration, sanitized_Request, + urlencode_postdata, ) @@ -47,7 +47,7 @@ class ShareSixIE(InfoExtractor): fields = { 'method_free': 'Free' } - post = compat_urllib_parse_urlencode(fields) + post = urlencode_postdata(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index b4c6d5bbf..5c3fd0fec 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -7,12 +7,12 @@ import hashlib import uuid from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, sanitized_Request, unified_strdate, + urlencode_postdata, ) @@ -175,7 +175,7 @@ class SmotriIE(InfoExtractor): video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest() request = sanitized_Request( - 'http://smotri.com/video/view/url/bot/', compat_urllib_parse_urlencode(video_form)) + 'http://smotri.com/video/view/url/bot/', urlencode_postdata(video_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') video = self._download_json(request, video_id, 'Downloading video JSON') @@ -338,7 +338,7 @@ class SmotriBroadcastIE(InfoExtractor): } request = sanitized_Request( - broadcast_url + '/?no_redirect=1', compat_urllib_parse_urlencode(login_form)) + broadcast_url + '/?no_redirect=1', urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') broadcast_page = self._download_webpage( request, broadcast_id, 'Logging in and confirming age') diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 2bca8fa3a..194dabc71 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -500,7 +500,6 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): query['linked_partitioning'] = '1' query['offset'] = 0 data = compat_urllib_parse_urlencode(query) - data = compat_urllib_parse_urlencode(query) next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) collected_results = 0 diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py index b17779e4b..712359885 100644 --- a/youtube_dl/extractor/streamcloud.py +++ b/youtube_dl/extractor/streamcloud.py @@ -4,8 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode -from ..utils import sanitized_Request +from ..utils import ( + sanitized_Request, + urlencode_postdata, +) class StreamcloudIE(InfoExtractor): @@ -35,7 +37,7 @@ class StreamcloudIE(InfoExtractor): (?:id="[^"]+"\s+)? value="([^"]*)" ''', orig_webpage) - post = compat_urllib_parse_urlencode(fields) + post = urlencode_postdata(fields) self._sleep(12, video_id) headers = { diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index 50ed15163..306ee4e15 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -5,11 +5,11 @@ import codecs import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -41,7 +41,7 @@ class TubiTvIE(InfoExtractor): 'username': username, 'password': password, } - payload = compat_urllib_parse_urlencode(form_data).encode('utf-8') + payload = urlencode_postdata(form_data) request = sanitized_Request(self._LOGIN_URL, payload) request.add_header('Content-Type', 'application/x-www-form-urlencoded') login_page = self._download_webpage( diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index c92dcc7b9..36ee1adff 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -20,6 +20,7 @@ from ..utils import ( parse_duration, parse_iso8601, sanitized_Request, + urlencode_postdata, ) @@ -81,7 +82,7 @@ class TwitchBaseIE(InfoExtractor): post_url = compat_urlparse.urljoin(redirect_url, post_url) request = sanitized_Request( - post_url, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + post_url, urlencode_postdata(login_form)) request.add_header('Referer', redirect_url) response = self._download_webpage( request, None, 'Logging in as %s' % username) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 6adfb2cee..be6f3be5e 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -13,6 +13,7 @@ from ..utils import ( int_or_none, sanitized_Request, unescapeHTML, + urlencode_postdata, ) @@ -139,7 +140,7 @@ class UdemyIE(InfoExtractor): }) request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse_urlencode(login_form).encode('utf-8')) + self._LOGIN_URL, urlencode_postdata(login_form)) request.add_header('Referer', self._ORIGIN_URL) request.add_header('Origin', self._ORIGIN_URL) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 77bb200e9..dff1bb702 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -2,13 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_urlparse, -) +from ..compat import compat_urlparse from ..utils import ( ExtractorError, sanitized_Request, + urlencode_postdata, ) @@ -48,7 +46,7 @@ class Vbox7IE(InfoExtractor): webpage, 'title').split('/')[0].strip() info_url = 'http://vbox7.com/play/magare.do' - data = compat_urllib_parse_urlencode({'as3': '1', 'vid': video_id}) + data = urlencode_postdata({'as3': '1', 'vid': video_id}) info_request = sanitized_Request(info_url, data) info_request.add_header('Content-Type', 'application/x-www-form-urlencoded') info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage') diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 458099a4a..67220f1b7 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -5,10 +5,7 @@ import re import json from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) +from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, @@ -17,6 +14,7 @@ from ..utils import ( str_to_int, unescapeHTML, unified_strdate, + urlencode_postdata, ) from .vimeo import VimeoIE from .pladform import PladformIE @@ -204,7 +202,7 @@ class VKIE(InfoExtractor): request = sanitized_Request( 'https://login.vk.com/?act=login', - compat_urllib_parse_urlencode(login_form).encode('utf-8')) + urlencode_postdata(login_form)) login_page = self._download_webpage( request, None, note='Logging in as %s' % username) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index f1abca4d9..a938a4007 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -2,11 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, NO_DEFAULT, sanitized_Request, + urlencode_postdata, ) @@ -38,7 +38,7 @@ class VodlockerIE(InfoExtractor): if fields['op'] == 'download1': self._sleep(3, video_id) # they do detect when requests happen too fast! - post = compat_urllib_parse_urlencode(fields) + post = urlencode_postdata(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 4e35e1f44..2d1504eaa 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -4,11 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, int_or_none, sanitized_Request, + urlencode_postdata, ) @@ -108,7 +108,7 @@ class XFileShareIE(InfoExtractor): if countdown: self._sleep(countdown, video_id) - post = compat_urllib_parse_urlencode(fields) + post = urlencode_postdata(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 158f3ea68..025716958 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -5,15 +5,13 @@ import re import hashlib from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) +from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, float_or_none, sanitized_Request, + urlencode_postdata, ) @@ -170,14 +168,14 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids) request = sanitized_Request( 'https://music.yandex.ru/handlers/track-entries.jsx', - compat_urllib_parse_urlencode({ + urlencode_postdata({ 'entries': ','.join(missing_track_ids), 'lang': mu.get('settings', {}).get('lang', 'en'), 'external-domain': 'music.yandex.ru', 'overembed': 'false', 'sign': mu.get('authData', {}).get('user', {}).get('sign'), 'strict': 'true', - }).encode('utf-8')) + })) request.add_header('Referer', url) request.add_header('X-Requested-With', 'XMLHttpRequest') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 83b5840f7..8c321f1fc 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -44,6 +44,7 @@ from ..utils import ( unified_strdate, unsmuggle_url, uppercase_escape, + urlencode_postdata, ISO3166Utils, ) @@ -115,7 +116,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'hl': 'en_US', } - login_data = compat_urllib_parse_urlencode(login_form_strs).encode('ascii') + login_data = urlencode_postdata(login_form_strs) req = sanitized_Request(self._LOGIN_URL, login_data) login_results = self._download_webpage( @@ -148,7 +149,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'TrustDevice': 'on', }) - tfa_data = compat_urllib_parse_urlencode(tfa_form_strs).encode('ascii') + tfa_data = urlencode_postdata(tfa_form_strs) tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data) tfa_results = self._download_webpage( From e289d6d62cae85ded46ad6e92b33385f221b8370 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Mar 2016 02:38:33 +0600 Subject: [PATCH 0008/3599] [test_compat] Add tests for compat_urllib_parse_urlencode --- test/test_compat.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/test_compat.py b/test/test_compat.py index b6bfad05e..cc105807a 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -19,6 +19,7 @@ from youtube_dl.compat import ( compat_str, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, + compat_urllib_parse_urlencode, ) @@ -70,6 +71,12 @@ class TestCompat(unittest.TestCase): self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def') self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def') + def test_compat_urllib_parse_urlencode(self): + self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def') + def test_compat_shlex_split(self): self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) From 62cdb96f51eca4226b4d499e292d1ea1f9babb72 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 26 Mar 2016 08:58:03 +0100 Subject: [PATCH 0009/3599] release 2016.03.26 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2291ed783..d4cf099f5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.03.25' +__version__ = '2016.03.26' From 5964b598ff536c32198181e5027610f3d9a474bb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 25 Mar 2016 16:17:54 +0800 Subject: [PATCH 0010/3599] [brightcove] Support alternative BrightcoveExperience layout The full URL lays in the `data` attribute of (#8862) --- youtube_dl/extractor/brightcove.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 59e8008f9..afe081d82 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -136,13 +136,16 @@ class BrightcoveLegacyIE(InfoExtractor): else: flashvars = {} + data_url = object_doc.attrib.get('data', '') + data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query) + def find_param(name): if name in flashvars: return flashvars[name] node = find_xpath_attr(object_doc, './param', 'name', name) if node is not None: return node.attrib['value'] - return None + return data_url_params.get(name) params = {} From d6c340cae5c1e5704d6e709eefb7009fcda6e213 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 26 Mar 2016 18:21:07 +0800 Subject: [PATCH 0011/3599] [brightcove] Extract more formats (#8862) --- youtube_dl/extractor/brightcove.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index afe081d82..c9e43a275 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -297,7 +297,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'uploader': video_info.get('publisherName'), } - renditions = video_info.get('renditions') + renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', []) if renditions: formats = [] for rend in renditions: @@ -319,13 +319,23 @@ class BrightcoveLegacyIE(InfoExtractor): if ext is None: ext = determine_ext(url) size = rend.get('size') - formats.append({ + a_format = { 'url': url, 'ext': ext, 'height': rend.get('frameHeight'), 'width': rend.get('frameWidth'), 'filesize': size if size != 0 else None, - }) + } + + # m3u8 manifests with remote == false are media playlists + # Not calling _extract_m3u8_formats here to save network traffic + if ext == 'm3u8': + a_format.update({ + 'ext': 'mp4', + 'protocol': 'm3u8', + }) + + formats.append(a_format) self._sort_formats(formats) info['formats'] = formats elif video_info.get('FLVFullLengthURL') is not None: From d10fe8358c064325349469a20be952ba794566d4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 26 Mar 2016 18:30:43 +0800 Subject: [PATCH 0012/3599] [generic] Add a test case for brightcove embed Closes #8862 --- youtube_dl/extractor/generic.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 12f2309fc..ea4009b41 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1124,7 +1124,23 @@ class GenericIE(InfoExtractor): # m3u8 downloads 'skip_download': True, } - } + }, + # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions' + # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm + { + 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html', + 'info_dict': { + 'id': '4785848093001', + 'ext': 'mp4', + 'title': 'The Cardinal Pell Interview', + 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ', + 'uploader': 'GlobeCast Australia - GlobeStream', + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + }, + }, ] def report_following_redirect(self, new_url): From e68d3a010fcf34455c7922b28a05ccc012381729 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 26 Mar 2016 18:34:51 +0800 Subject: [PATCH 0013/3599] [twitter] Fix extraction (closes #8966) HLS and DASH formats are no longer appeared in test cases. I keep them for fear of triggering new errors. --- youtube_dl/extractor/twitter.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index e70b2ab3c..602538e5c 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -110,10 +110,9 @@ class TwitterCardIE(TwitterBaseIE): 'height': int(m.group('height')), }) - playlist = config.get('playlist') - if playlist: - video_url = playlist[0]['source'] + video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source') + if video_url: f = { 'url': video_url, } @@ -185,7 +184,6 @@ class TwitterIE(InfoExtractor): 'ext': 'mp4', 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!', 'thumbnail': 're:^https?://.*\.jpg', - 'duration': 12.922, 'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"', 'uploader': 'FREE THE NIPPLE', 'uploader_id': 'freethenipple', From b5a5bbf3764a3912a1d07816b6e91560fe1d8a10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Mar 2016 19:15:32 +0600 Subject: [PATCH 0014/3599] [mailru] Extend _VALID_URL (Closes #8990) --- youtube_dl/extractor/mailru.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index 46eb00492..9a7098c43 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -13,7 +13,7 @@ from ..utils import ( class MailRuIE(InfoExtractor): IE_NAME = 'mailru' IE_DESC = 'Видео@Mail.Ru' - _VALID_URL = r'https?://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P(?:[^/]+/){3}\d+)|(?:(?P(?:[^/]+/){2})video/(?P[^/]+/\d+))\.html)' + _VALID_URL = r'https?://(?:(?:www|m)\.)?my\.mail\.ru/(?:video/.*#video=/?(?P(?:[^/]+/){3}\d+)|(?:(?P(?:[^/]+/){2})video/(?P[^/]+/\d+))\.html)' _TESTS = [ { @@ -61,6 +61,10 @@ class MailRuIE(InfoExtractor): 'duration': 6001, }, 'skip': 'Not accessible from Travis CI server', + }, + { + 'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html', + 'only_matching': True, } ] From 17bcc626bf67453cc5ab67e56684b6c6e33f4cb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Mar 2016 19:33:57 +0600 Subject: [PATCH 0015/3599] [utils] Extract sanitize_url routine --- youtube_dl/utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index eacd81bf9..6d27b80c0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -417,9 +417,12 @@ def sanitize_path(s): # Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of # unwanted failures due to missing protocol +def sanitize_url(url): + return 'http:%s' % url if url.startswith('//') else url + + def sanitized_Request(url, *args, **kwargs): - return compat_urllib_request.Request( - 'http:%s' % url if url.startswith('//') else url, *args, **kwargs) + return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs) def orderedSet(iterable): From dcf77cf1a74ebcf7def71aecf55b8641e4645835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Mar 2016 19:37:41 +0600 Subject: [PATCH 0016/3599] [YoutubeDL] Sanitize final URLs (Closes #8991) --- youtube_dl/YoutubeDL.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 29d7a3106..33c269f9c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -65,6 +65,7 @@ from .utils import ( SameFileError, sanitize_filename, sanitize_path, + sanitize_url, sanitized_Request, std_headers, subtitles_filename, @@ -1229,6 +1230,7 @@ class YoutubeDL(object): t.get('preference'), t.get('width'), t.get('height'), t.get('id'), t.get('url'))) for i, t in enumerate(thumbnails): + t['url'] = sanitize_url(t['url']) if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) if t.get('id') is None: @@ -1263,6 +1265,7 @@ class YoutubeDL(object): if subtitles: for _, subtitle in subtitles.items(): for subtitle_format in subtitle: + subtitle_format['url'] = sanitize_url(subtitle_format['url']) if 'ext' not in subtitle_format: subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() @@ -1292,6 +1295,8 @@ class YoutubeDL(object): if 'url' not in format: raise ExtractorError('Missing "url" key in result (index %d)' % i) + format['url'] = sanitize_url(format['url']) + if format.get('format_id') is None: format['format_id'] = compat_str(i) else: From eedb7ba5364213b5f9dc773f70403ea028a44ab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Mar 2016 19:40:33 +0600 Subject: [PATCH 0017/3599] [YoutubeDL] Sort imports --- youtube_dl/YoutubeDL.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 33c269f9c..53a36c145 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -39,6 +39,8 @@ from .compat import ( compat_urllib_request_DataHandler, ) from .utils import ( + age_restricted, + args_to_str, ContentTooShortError, date_from_str, DateRange, @@ -58,10 +60,12 @@ from .utils import ( PagedList, parse_filesize, PerRequestProxyHandler, - PostProcessingError, platform_name, + PostProcessingError, preferredencoding, + prepend_extension, render_table, + replace_extension, SameFileError, sanitize_filename, sanitize_path, @@ -76,10 +80,6 @@ from .utils import ( write_string, YoutubeDLCookieProcessor, YoutubeDLHandler, - prepend_extension, - replace_extension, - args_to_str, - age_restricted, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractors From 6dee688e6d8992913bbdbcc65a413cd9897dd489 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Mar 2016 20:42:18 +0600 Subject: [PATCH 0018/3599] [youtube:playlistsbase] Restrict playlist regex (Closes #8986) --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8c321f1fc..28355bf46 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -234,7 +234,9 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): def _process_page(self, content): - for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)): + for playlist_id in orderedSet(re.findall( + r']+class="[^"]*yt-lockup-title[^"]*"[^>]*>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', + content)): yield self.url_result( 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist') From efcba804f646dfc4eda2f9df2baf3ebed0f1bbe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Mar 2016 23:42:34 +0600 Subject: [PATCH 0019/3599] [udemy] Extract formats from view_html (Closes #8979) --- youtube_dl/extractor/udemy.py | 41 +++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index be6f3be5e..da2d542ec 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import ( compat_HTTPError, @@ -8,6 +10,8 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + determine_ext, + extract_attributes, ExtractorError, float_or_none, int_or_none, @@ -73,11 +77,8 @@ class UdemyIE(InfoExtractor): return self._download_json( 'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?%s' % ( course_id, lecture_id, compat_urllib_parse_urlencode({ - 'video_only': '', - 'auto_play': '', - 'fields[lecture]': 'title,description,asset', + 'fields[lecture]': 'title,description,view_html,asset', 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data', - 'instructorPreviewMode': 'False', })), lecture_id, 'Downloading lecture JSON') @@ -246,6 +247,38 @@ class UdemyIE(InfoExtractor): f['format_id'] = '%sp' % format_id formats.append(f) + view_html = lecture.get('view_html') + if view_html: + view_html_urls = set() + for source in re.findall(r']+>', view_html): + attributes = extract_attributes(source) + src = attributes.get('src') + if not src: + continue + res = attributes.get('data-res') + height = int_or_none(res) + if src in view_html_urls: + continue + view_html_urls.add(src) + if attributes.get('type') == 'application/x-mpegURL' or determine_ext(src) == 'm3u8': + m3u8_formats = self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False) + for f in m3u8_formats: + m = re.search(r'/hls_(?P\d{3,4})_(?P\d{2,})/', f['url']) + if m: + if not f.get('height'): + f['height'] = int(m.group('height')) + if not f.get('tbr'): + f['tbr'] = int(m.group('tbr')) + formats.extend(m3u8_formats) + else: + formats.append({ + 'url': src, + 'format_id': '%dp' % height if height else None, + 'height': height, + }) + self._sort_formats(formats) return { From 48dce58ca907921f5013367a7b22235b3a3a05df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Mar 2016 23:42:46 +0600 Subject: [PATCH 0020/3599] [udemy] Use custom sorting --- youtube_dl/extractor/udemy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index da2d542ec..100db4dd0 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -279,7 +279,7 @@ class UdemyIE(InfoExtractor): 'height': height, }) - self._sort_formats(formats) + self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) return { 'id': video_id, From 02d7634d24b704a099e17224e3dc71906ccc92a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Mar 2016 23:43:25 +0600 Subject: [PATCH 0021/3599] [udemy] Fix outputs' formats format_id --- youtube_dl/extractor/udemy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 100db4dd0..89e713285 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -201,7 +201,7 @@ class UdemyIE(InfoExtractor): def extract_output_format(src): return { 'url': src['url'], - 'format_id': '%sp' % (src.get('label') or format_id), + 'format_id': '%sp' % (src.get('height') or format_id), 'width': int_or_none(src.get('width')), 'height': int_or_none(src.get('height')), 'vbr': int_or_none(src.get('video_bitrate_in_kbps')), From 62f55aa68a5409c25457a14289cc859fdd73cc1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Mar 2016 23:54:12 +0600 Subject: [PATCH 0022/3599] [udemy] Add outputs metadata to view_html formats --- youtube_dl/extractor/udemy.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 89e713285..5a6de9982 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -218,6 +218,16 @@ class UdemyIE(InfoExtractor): if not isinstance(outputs, dict): outputs = {} + def add_output_format_meta(f, key, format_id): + output = outputs.get(key) + if isinstance(output, dict): + output_format = extract_output_format(output) + output_format.update(f) + return output_format + else: + f['format_id'] = format_id + return f + for format_id, output in outputs.items(): if isinstance(output, dict) and output.get('url'): formats.append(extract_output_format(output)) @@ -238,13 +248,7 @@ class UdemyIE(InfoExtractor): if format_id: # Some videos contain additional metadata (e.g. # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208) - output = outputs.get(format_id) - if isinstance(output, dict): - output_format = extract_output_format(output) - output_format.update(f) - f = output_format - else: - f['format_id'] = '%sp' % format_id + f = add_output_format_meta(f, format_id, '%sp' % format_id) formats.append(f) view_html = lecture.get('view_html') @@ -273,11 +277,10 @@ class UdemyIE(InfoExtractor): f['tbr'] = int(m.group('tbr')) formats.extend(m3u8_formats) else: - formats.append({ + formats.append(add_output_format_meta({ 'url': src, - 'format_id': '%dp' % height if height else None, 'height': height, - }) + }, res, '%dp' % height if height else None)) self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) From f973e5d54e233c139d0b407b5772ff4966c8fa30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Mar 2016 23:55:07 +0600 Subject: [PATCH 0023/3599] [udemy] Drop outputs' formats Always results in 403 --- youtube_dl/extractor/udemy.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 5a6de9982..6a3dcb8d6 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -228,10 +228,6 @@ class UdemyIE(InfoExtractor): f['format_id'] = format_id return f - for format_id, output in outputs.items(): - if isinstance(output, dict) and output.get('url'): - formats.append(extract_output_format(output)) - download_urls = asset.get('download_urls') if isinstance(download_urls, dict): video = download_urls.get('Video') From af4116f4f04a3fc8150fdb4a220ef31a0a2dd044 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Mar 2016 00:02:52 +0600 Subject: [PATCH 0024/3599] [udemy] Improve format_id --- youtube_dl/extractor/udemy.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 6a3dcb8d6..5a5e9fa9e 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -218,15 +218,13 @@ class UdemyIE(InfoExtractor): if not isinstance(outputs, dict): outputs = {} - def add_output_format_meta(f, key, format_id): + def add_output_format_meta(f, key): output = outputs.get(key) if isinstance(output, dict): output_format = extract_output_format(output) output_format.update(f) return output_format - else: - f['format_id'] = format_id - return f + return f download_urls = asset.get('download_urls') if isinstance(download_urls, dict): @@ -239,12 +237,13 @@ class UdemyIE(InfoExtractor): format_id = format_.get('label') f = { 'url': format_['file'], + 'format_id': '%sp' % format_id, 'height': int_or_none(format_id), } if format_id: # Some videos contain additional metadata (e.g. # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208) - f = add_output_format_meta(f, format_id, '%sp' % format_id) + f = add_output_format_meta(f, format_id) formats.append(f) view_html = lecture.get('view_html') @@ -275,8 +274,9 @@ class UdemyIE(InfoExtractor): else: formats.append(add_output_format_meta({ 'url': src, + 'format_id': '%dp' % height if height else None, 'height': height, - }, res, '%dp' % height if height else None)) + }, res)) self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) From b24ab3e341b9082774785332a1aa6405764f7202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Mar 2016 00:09:12 +0600 Subject: [PATCH 0025/3599] [udemy] Improve paid course detection --- youtube_dl/extractor/udemy.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 5a5e9fa9e..71bea5363 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -55,21 +55,26 @@ class UdemyIE(InfoExtractor): }] def _enroll_course(self, base_url, webpage, course_id): + def combine_url(base_url, url): + return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url + checkout_url = unescapeHTML(self._search_regex( - r'href=(["\'])(?Phttps?://(?:www\.)?udemy\.com/payment/checkout/.+?)\1', + r'href=(["\'])(?P(?:https?://(?:www\.)?udemy\.com)?/payment/checkout/.+?)\1', webpage, 'checkout url', group='url', default=None)) if checkout_url: raise ExtractorError( 'Course %s is not free. You have to pay for it before you can download. ' - 'Use this URL to confirm purchase: %s' % (course_id, checkout_url), expected=True) + 'Use this URL to confirm purchase: %s' + % (course_id, combine_url(base_url, checkout_url)), + expected=True) enroll_url = unescapeHTML(self._search_regex( r'href=(["\'])(?P(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1', webpage, 'enroll url', group='url', default=None)) if enroll_url: - if not enroll_url.startswith('http'): - enroll_url = compat_urlparse.urljoin(base_url, enroll_url) - webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course') + webpage = self._download_webpage( + combine_url(base_url, enroll_url), + course_id, 'Enrolling in the course') if '>You have enrolled in' in webpage: self.to_screen('%s: Successfully enrolled in the course' % course_id) From 4cf3489c6e548aebe29534e496e7ccd638be6873 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Mar 2016 01:11:11 +0600 Subject: [PATCH 0026/3599] [vevo] Update videoservice API URL (Closes #8900) --- youtube_dl/extractor/vevo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 152fef42e..147480f64 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -152,7 +152,7 @@ class VevoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id + json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id response = self._download_json( json_url, video_id, 'Downloading video info', 'Unable to download info') video_info = response.get('video') or {} From 00322ad4fda31864f249bce410ac3ba520e865ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Mar 2016 02:00:36 +0600 Subject: [PATCH 0027/3599] [lynda] Extract chapter metadata (#8993) --- youtube_dl/extractor/lynda.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 71fd55ade..655627479 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -219,7 +219,7 @@ class LyndaCourseIE(LyndaBaseIE): 'Course %s does not exist' % course_id, expected=True) unaccessible_videos = 0 - videos = [] + entries = [] # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided # by single video API anymore @@ -229,20 +229,22 @@ class LyndaCourseIE(LyndaBaseIE): if video.get('HasAccess') is False: unaccessible_videos += 1 continue - if video.get('ID'): - videos.append(video['ID']) + video_id = video.get('ID') + if video_id: + entries.append({ + '_type': 'url_transparent', + 'url': 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id), + 'ie_key': LyndaIE.ie_key(), + 'chapter': chapter.get('Title'), + 'chapter_number': int_or_none(chapter.get('ChapterIndex')), + 'chapter_id': compat_str(chapter.get('ID')), + }) if unaccessible_videos > 0: self._downloader.report_warning( '%s videos are only available for members (or paid members) and will not be downloaded. ' % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT) - entries = [ - self.url_result( - 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id), - 'Lynda') - for video_id in videos] - course_title = course.get('Title') return self.playlist_result(entries, course_id, course_title) From 8018028d0fabb00c32b19b04984c482c6b54d2fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Mar 2016 02:10:52 +0600 Subject: [PATCH 0028/3599] [pluralsight] Extract chapter metadata (Closes #8993) --- youtube_dl/extractor/pluralsight.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index bc66f7a9d..df03dd419 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -279,13 +279,18 @@ class PluralsightCourseIE(PluralsightBaseIE): course_id, 'Downloading course data JSON') entries = [] - for module in course_data: + for num, module in enumerate(course_data, 1): for clip in module.get('clips', []): player_parameters = clip.get('playerParameters') if not player_parameters: continue - entries.append(self.url_result( - '%s/training/player?%s' % (self._API_BASE, player_parameters), - 'Pluralsight')) + entries.append({ + '_type': 'url_transparent', + 'url': '%s/training/player?%s' % (self._API_BASE, player_parameters), + 'ie_key': PluralsightIE.ie_key(), + 'chapter': module.get('title'), + 'chapter_number': num, + 'chapter_id': module.get('moduleRef'), + }) return self.playlist_result(entries, course_id, title, description) From 395fd4b08a4639f7e84754527e9facd83c8f782d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 27 Mar 2016 04:36:02 +0800 Subject: [PATCH 0029/3599] [twitter] Handle another form of embedded Vine Fixes #8996 --- youtube_dl/extractor/twitter.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 602538e5c..1f32ea2eb 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -102,6 +102,9 @@ class TwitterCardIE(TwitterBaseIE): r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'), video_id) + if config.get('source_type') == 'vine': + return self.url_result(config['player_url'], 'Vine') + def _search_dimensions_in_video_url(a_format, video_url): m = re.search(r'/(?P\d+)x(?P\d+)/', video_url) if m: @@ -245,6 +248,18 @@ class TwitterIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, { + 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609', + 'md5': '89a15ed345d13b86e9a5a5e051fa308a', + 'info_dict': { + 'id': 'MIOxnrUteUd', + 'ext': 'mp4', + 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン', + 'uploader': 'TAKUMA', + 'uploader_id': '1004126642786242560', + 'upload_date': '20140615', + }, + 'add_ie': ['Vine'], }] def _real_extract(self, url): From 19dbaeece321c51fa336ef142507adf440e22e22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Mar 2016 07:03:08 +0600 Subject: [PATCH 0030/3599] Remove _sort_formats from _extract_*_formats methods Now _sort_formats should be called explicitly. _sort_formats has been added to all the necessary places in code. Closes #8051 --- youtube_dl/extractor/abc7news.py | 1 + youtube_dl/extractor/azubu.py | 1 + youtube_dl/extractor/bet.py | 1 + youtube_dl/extractor/cbsnews.py | 1 + youtube_dl/extractor/chaturbate.py | 1 + youtube_dl/extractor/common.py | 6 ------ youtube_dl/extractor/cwtv.py | 1 + youtube_dl/extractor/dfb.py | 1 + youtube_dl/extractor/discovery.py | 29 +++++++++++++++++------------ youtube_dl/extractor/dplay.py | 2 ++ youtube_dl/extractor/dw.py | 2 +- youtube_dl/extractor/generic.py | 11 ++++++++++- youtube_dl/extractor/laola1tv.py | 1 + youtube_dl/extractor/lrt.py | 1 + youtube_dl/extractor/matchtv.py | 1 + youtube_dl/extractor/mitele.py | 1 + youtube_dl/extractor/nrk.py | 1 + youtube_dl/extractor/restudy.py | 1 + youtube_dl/extractor/rte.py | 1 + youtube_dl/extractor/rtve.py | 1 + youtube_dl/extractor/rtvnh.py | 1 + youtube_dl/extractor/shahid.py | 1 + youtube_dl/extractor/sportbox.py | 1 + youtube_dl/extractor/telecinco.py | 1 + youtube_dl/extractor/tubitv.py | 1 + youtube_dl/extractor/videomore.py | 1 + youtube_dl/extractor/vier.py | 1 + youtube_dl/extractor/viidea.py | 1 + youtube_dl/extractor/ynet.py | 4 +++- 29 files changed, 56 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/abc7news.py b/youtube_dl/extractor/abc7news.py index 122dc9099..c04949c21 100644 --- a/youtube_dl/extractor/abc7news.py +++ b/youtube_dl/extractor/abc7news.py @@ -44,6 +44,7 @@ class Abc7NewsIE(InfoExtractor): 'contentURL', webpage, 'm3u8 url', fatal=True) formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4') + self._sort_formats(formats) title = self._og_search_title(webpage).strip() description = self._og_search_description(webpage).strip() diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index 1805b7312..efa624de1 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -120,6 +120,7 @@ class AzubuLiveIE(InfoExtractor): bc_info = self._download_json(req, user) m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS') formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4') + self._sort_formats(formats) return { 'id': info['id'], diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index 03dad4636..986245bf0 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -94,6 +94,7 @@ class BetIE(InfoExtractor): xpath_with_ns('./media:thumbnail', NS_MAP)).get('url') formats = self._extract_smil_formats(smil_url, display_id) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index f23bac9a1..e6b7f3584 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -122,6 +122,7 @@ class CBSNewsLiveVideoIE(InfoExtractor): for entry in f4m_formats: # URLs without the extra param induce an 404 error entry.update({'extra_param_to_segment_url': hdcore_sign}) + self._sort_formats(f4m_formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 242fba311..b2234549e 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -48,6 +48,7 @@ class ChaturbateIE(InfoExtractor): raise ExtractorError('Unable to find stream URL') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b412fd030..40ddf175c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1021,8 +1021,6 @@ class InfoExtractor(object): 'height': int_or_none(media_el.attrib.get('height')), 'preference': preference, }) - self._sort_formats(formats) - return formats def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, @@ -1143,7 +1141,6 @@ class InfoExtractor(object): last_media = None formats.append(f) last_info = {} - self._sort_formats(formats) return formats @staticmethod @@ -1317,8 +1314,6 @@ class InfoExtractor(object): }) continue - self._sort_formats(formats) - return formats def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): @@ -1536,7 +1531,6 @@ class InfoExtractor(object): existing_format.update(f) else: self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) - self._sort_formats(formats) return formats def _live_title(self, name): diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py index 36af67013..f5cefd966 100644 --- a/youtube_dl/extractor/cwtv.py +++ b/youtube_dl/extractor/cwtv.py @@ -57,6 +57,7 @@ class CWTVIE(InfoExtractor): formats = self._extract_m3u8_formats( video_data['videos']['variantplaylist']['uri'], video_id, 'mp4') + self._sort_formats(formats) thumbnails = [{ 'url': image['uri'], diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py index 263532cc6..cdfeccacb 100644 --- a/youtube_dl/extractor/dfb.py +++ b/youtube_dl/extractor/dfb.py @@ -38,6 +38,7 @@ class DFBIE(InfoExtractor): token_el = f4m_info.find('token') manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' formats = self._extract_f4m_formats(manifest_url, display_id) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index fdce1429a..5f1275b39 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -63,18 +63,23 @@ class DiscoveryIE(InfoExtractor): video_title = info.get('playlist_title') or info.get('video_title') - entries = [{ - 'id': compat_str(video_info['id']), - 'formats': self._extract_m3u8_formats( + entries = [] + + for idx, video_info in enumerate(info['playlist']): + formats = self._extract_m3u8_formats( video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls', - note='Download m3u8 information for video %d' % (idx + 1)), - 'title': video_info['title'], - 'description': video_info.get('description'), - 'duration': parse_duration(video_info.get('video_length')), - 'webpage_url': video_info.get('href') or video_info.get('url'), - 'thumbnail': video_info.get('thumbnailURL'), - 'alt_title': video_info.get('secondary_title'), - 'timestamp': parse_iso8601(video_info.get('publishedDate')), - } for idx, video_info in enumerate(info['playlist'])] + note='Download m3u8 information for video %d' % (idx + 1)) + self._sort_formats(formats) + entries.append({ + 'id': compat_str(video_info['id']), + 'formats': formats, + 'title': video_info['title'], + 'description': video_info.get('description'), + 'duration': parse_duration(video_info.get('video_length')), + 'webpage_url': video_info.get('href') or video_info.get('url'), + 'thumbnail': video_info.get('thumbnailURL'), + 'alt_title': video_info.get('secondary_title'), + 'timestamp': parse_iso8601(video_info.get('publishedDate')), + }) return self.playlist_result(entries, display_id, video_title) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 1e7dcada6..66bbfc6ca 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -118,6 +118,8 @@ class DPlayIE(InfoExtractor): if info.get(protocol): extract_formats(protocol, info[protocol]) + self._sort_formats(formats) + return { 'id': video_id, 'display_id': display_id, diff --git a/youtube_dl/extractor/dw.py b/youtube_dl/extractor/dw.py index b6c985547..ae7c571bd 100644 --- a/youtube_dl/extractor/dw.py +++ b/youtube_dl/extractor/dw.py @@ -39,13 +39,13 @@ class DWIE(InfoExtractor): hidden_inputs = self._hidden_inputs(webpage) title = hidden_inputs['media_title'] - formats = [] if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1': formats = self._extract_smil_formats( 'http://www.dw.com/smil/v-%s' % media_id, media_id, transform_source=lambda s: s.replace( 'rtmp://tv-od.dw.de/flash/', 'http://tv-download.dw.de/dwtv_video/flv/')) + self._sort_formats(formats) else: formats = [{'url': hidden_inputs['file_name']}] diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ea4009b41..f3de738f7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1310,6 +1310,7 @@ class GenericIE(InfoExtractor): 'vcodec': 'none' if m.group('type') == 'audio' else None }] info_dict['direct'] = True + self._sort_formats(formats) info_dict['formats'] = formats return info_dict @@ -1336,6 +1337,7 @@ class GenericIE(InfoExtractor): # Is it an M3U playlist? if first_bytes.startswith(b'#EXTM3U'): info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4') + self._sort_formats(info_dict['formats']) return info_dict # Maybe it's a direct link to a video? @@ -1360,15 +1362,19 @@ class GenericIE(InfoExtractor): if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): - return self._parse_smil(doc, url, video_id) + smil = self._parse_smil(doc, url, video_id) + self._sort_formats(smil['formats']) + return smil elif doc.tag == '{http://xspf.org/ns/0/}playlist': return self.playlist_result(self._parse_xspf(doc, video_id), video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( doc, video_id, mpd_base_url=url.rpartition('/')[0]) + self._sort_formats(info_dict['formats']) return info_dict elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id) + self._sort_formats(info_dict['formats']) return info_dict except compat_xml_parse_error: pass @@ -2053,6 +2059,9 @@ class GenericIE(InfoExtractor): else: entry_info_dict['url'] = video_url + if entry_info_dict.get('formats'): + self._sort_formats(entry_info_dict['formats']) + entries.append(entry_info_dict) if len(entries) == 1: diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py index d9dc067d2..d4fbafece 100644 --- a/youtube_dl/extractor/laola1tv.py +++ b/youtube_dl/extractor/laola1tv.py @@ -130,6 +130,7 @@ class Laola1TvIE(InfoExtractor): formats = self._extract_f4m_formats( '%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth), video_id, f4m_id='hds') + self._sort_formats(formats) categories_str = _v('meta_sports') categories = categories_str.split(',') if categories_str else [] diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index 863efd896..1072405b3 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -37,6 +37,7 @@ class LRTIE(InfoExtractor): r'file\s*:\s*(["\'])(?P.+?)\1\s*\+\s*location\.hash\.substring\(1\)', webpage, 'm3u8 url', group='url') formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage) diff --git a/youtube_dl/extractor/matchtv.py b/youtube_dl/extractor/matchtv.py index e33bfde3b..80a0d7013 100644 --- a/youtube_dl/extractor/matchtv.py +++ b/youtube_dl/extractor/matchtv.py @@ -47,6 +47,7 @@ class MatchTVIE(InfoExtractor): video_url = self._download_json(request, video_id)['data']['videoUrl'] f4m_url = xpath_text(self._download_xml(video_url, video_id), './to') formats = self._extract_f4m_formats(f4m_url, video_id) + self._sort_formats(formats) return { 'id': video_id, 'title': self._live_title('Матч ТВ - Прямой эфир'), diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 76ced7928..7b4581dc5 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -67,6 +67,7 @@ class MiTeleIE(InfoExtractor): formats.extend(self._extract_f4m_formats( file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18', display_id, f4m_id=loc)) + self._sort_formats(formats) title = self._search_regex( r'class="Destacado-text"[^>]*>\s*([^<]+)', webpage, 'title') diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 3b21fbd4d..9df200822 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -63,6 +63,7 @@ class NRKIE(InfoExtractor): if determine_ext(media_url) == 'f4m': formats = self._extract_f4m_formats( media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds') + self._sort_formats(formats) else: formats = [{ 'url': media_url, diff --git a/youtube_dl/extractor/restudy.py b/youtube_dl/extractor/restudy.py index b17c2bfc0..fd50065d4 100644 --- a/youtube_dl/extractor/restudy.py +++ b/youtube_dl/extractor/restudy.py @@ -31,6 +31,7 @@ class RestudyIE(InfoExtractor): formats = self._extract_smil_formats( 'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id, video_id) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py index 042bc8dab..9c89974e7 100644 --- a/youtube_dl/extractor/rte.py +++ b/youtube_dl/extractor/rte.py @@ -49,6 +49,7 @@ class RteIE(InfoExtractor): # f4m_url = server + relative_url f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url'] f4m_formats = self._extract_f4m_formats(f4m_url, video_id) + self._sort_formats(f4m_formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 08cd1ae6c..79af47715 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -209,6 +209,7 @@ class RTVELiveIE(InfoExtractor): png = self._download_webpage(png_url, video_id, 'Downloading url information') m3u8_url = _decrypt_url(png) formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/rtvnh.py b/youtube_dl/extractor/rtvnh.py index 7c9d4b0cd..4896d09d6 100644 --- a/youtube_dl/extractor/rtvnh.py +++ b/youtube_dl/extractor/rtvnh.py @@ -38,6 +38,7 @@ class RTVNHIE(InfoExtractor): item['file'], video_id, ext='mp4', entry_protocol='m3u8_native')) elif item.get('type') == '': formats.append({'url': item['file']}) + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index b4433a689..d95ea06be 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -77,6 +77,7 @@ class ShahidIE(InfoExtractor): raise ExtractorError('This video is DRM protected.', expected=True) formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') + self._sort_formats(formats) video = self._download_json( '%s/%s/%s?%s' % ( diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 86d509ae5..4f0c66213 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -99,6 +99,7 @@ class SportBoxEmbedIE(InfoExtractor): webpage, 'hls file') formats = self._extract_m3u8_formats(hls, video_id, 'mp4') + self._sort_formats(formats) title = self._search_regex( r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title') diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index d6b2560f8..4b4b740b4 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -82,6 +82,7 @@ class TelecincoIE(InfoExtractor): ) formats = self._extract_m3u8_formats( token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native') + self._sort_formats(formats) return { 'id': embed_data['videoId'], diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index 306ee4e15..7af233cd6 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -69,6 +69,7 @@ class TubiTvIE(InfoExtractor): apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu') m3u8_url = codecs.decode(apu, 'rot_13')[::-1] formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + self._sort_formats(formats) return { 'id': video_id, diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index 0bd1e1eec..04e95c66e 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -111,6 +111,7 @@ class VideomoreIE(InfoExtractor): video_url = xpath_text(video, './/video_url', 'video url', fatal=True) formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds') + self._sort_formats(formats) data = self._download_json( 'http://videomore.ru/video/tracks/%s.json' % video_id, diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index c76c20614..6645c6186 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -50,6 +50,7 @@ class VierIE(InfoExtractor): playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename) formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4') + self._sort_formats(formats) title = self._og_search_title(webpage, default=display_id) description = self._og_search_description(webpage, default=None) diff --git a/youtube_dl/extractor/viidea.py b/youtube_dl/extractor/viidea.py index 03b9f1353..a4f914d14 100644 --- a/youtube_dl/extractor/viidea.py +++ b/youtube_dl/extractor/viidea.py @@ -151,6 +151,7 @@ class ViideaIE(InfoExtractor): smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id) smil = self._download_smil(smil_url, lecture_id) info = self._parse_smil(smil, smil_url, lecture_id) + self._sort_formats(info['formats']) info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id) info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id) if multipart: diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 2522551dc..0d943c343 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -41,10 +41,12 @@ class YnetIE(InfoExtractor): m = re.search(r'ynet - HOT -- (["\']+)(?P.+?)\1', title) if m: title = m.group('title') + formats = self._extract_f4m_formats(f4m_url, video_id) + self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'formats': self._extract_f4m_formats(f4m_url, video_id), + 'formats': formats, 'thumbnail': self._og_search_thumbnail(webpage), } From f7df343b4a0223698f0a5320b850410d7a42be6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Mar 2016 07:41:19 +0600 Subject: [PATCH 0031/3599] [downloader/f4m] Extract routine for removing unsupported encrypted media --- youtube_dl/downloader/f4m.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index fc9642905..664d87543 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -223,6 +223,12 @@ def write_metadata_tag(stream, metadata): write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata)) +def remove_encrypted_media(media): + return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and + 'drmAdditionalHeaderSetId' not in e.attrib, + media)) + + def _add_ns(prop): return '{http://ns.adobe.com/f4m/1.0}%s' % prop @@ -244,9 +250,7 @@ class F4mFD(FragmentFD): # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute if 'id' not in e.attrib: self.report_error('Missing ID in f4m DRM') - media = list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and - 'drmAdditionalHeaderSetId' not in e.attrib, - media)) + media = remove_encrypted_media(media) if not media: self.report_error('Unsupported DRM') return media From b22ca76204e1a05e1c4b07d24cb6a0dbbc09d18e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Mar 2016 07:42:38 +0600 Subject: [PATCH 0032/3599] [extractor/common] Filter out unsupported encrypted media for f4m formats (Closes #8573) --- youtube_dl/extractor/common.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 40ddf175c..9b7ab8924 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -24,6 +24,7 @@ from ..compat import ( compat_urllib_parse_urlencode, compat_urlparse, ) +from ..downloader.f4m import remove_encrypted_media from ..utils import ( NO_DEFAULT, age_restricted, @@ -989,6 +990,11 @@ class InfoExtractor(object): if not media_nodes: manifest_version = '2.0' media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') + # Remove unsupported DRM protected media from final formats + # rendition (see https://github.com/rg3/youtube-dl/issues/8573). + media_nodes = remove_encrypted_media(media_nodes) + if not media_nodes: + return formats base_url = xpath_text( manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'], 'base URL', default=None) From a122e7080bbcc505d638eaef8ab4d1e4f5bd91ee Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 27 Mar 2016 16:56:33 +0200 Subject: [PATCH 0033/3599] release 2016.03.27 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d4cf099f5..5daa7f4e8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.03.26' +__version__ = '2016.03.27' From a1cf3e38a34caa333fd9703333ef55e0b3ac5a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 27 Mar 2016 23:22:51 +0600 Subject: [PATCH 0034/3599] [bbc] Extend vpid regex (Closes #9003) --- youtube_dl/extractor/bbc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 2dfcee98d..dedf721bd 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -688,6 +688,10 @@ class BBCIE(BBCCoUkIE): # custom redirection to www.bbc.com 'url': 'http://www.bbc.co.uk/news/science-environment-33661876', 'only_matching': True, + }, { + # single video article embedded with data-media-vpid + 'url': 'http://www.bbc.co.uk/sport/rowing/35908187', + 'only_matching': True, }] @classmethod @@ -817,7 +821,7 @@ class BBCIE(BBCCoUkIE): # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) programme_id = self._search_regex( - [r'data-video-player-vpid="(%s)"' % self._ID_REGEX, + [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX, r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX, r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX], webpage, 'vpid', default=None) From 8f9a477e7f260d60836843fbe8f75629e3ae8892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 00:21:08 +0600 Subject: [PATCH 0035/3599] [pornhub:playlistbase] Use orderedSet --- youtube_dl/extractor/pornhub.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 5a55c25e7..670e9294a 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -12,6 +12,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + orderedSet, sanitized_Request, str_to_int, ) @@ -150,7 +151,7 @@ class PornHubPlaylistBaseIE(InfoExtractor): def _extract_entries(self, webpage): return [ self.url_result('http://www.pornhub.com/%s' % video_url, PornHubIE.ie_key()) - for video_url in set(re.findall( + for video_url in orderedSet(re.findall( r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage)) ] From 3a23bae9ccf11c9c114d2d27e4fbc09fb0bbeafe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 00:32:57 +0600 Subject: [PATCH 0036/3599] [pornhub:playlistbase] Do not include videos not from playlist --- youtube_dl/extractor/pornhub.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 670e9294a..b3bf81a13 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -150,9 +150,12 @@ class PornHubIE(InfoExtractor): class PornHubPlaylistBaseIE(InfoExtractor): def _extract_entries(self, webpage): return [ - self.url_result('http://www.pornhub.com/%s' % video_url, PornHubIE.ie_key()) - for video_url in orderedSet(re.findall( - r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage)) + self.url_result( + 'http://www.pornhub.com/%s' % video_url, + PornHubIE.ie_key(), video_title=title) + for video_url, title in orderedSet(re.findall( + r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', + webpage)) ] def _real_extract(self, url): From 3454139576ad98b62162ba0a9bca4b342c5d07ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 00:50:46 +0600 Subject: [PATCH 0037/3599] [pornhub:uservideos] Add support for multipage videos (Closes #9006) --- youtube_dl/extractor/pornhub.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index b3bf81a13..ac298d0ce 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -1,10 +1,12 @@ from __future__ import unicode_literals +import itertools import os import re from .common import InfoExtractor from ..compat import ( + compat_HTTPError, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlparse, @@ -189,16 +191,31 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE): class PornHubUserVideosIE(PornHubPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P<id>[^/]+)/videos' _TESTS = [{ - 'url': 'http://www.pornhub.com/users/rushandlia/videos', + 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'info_dict': { - 'id': 'rushandlia', + 'id': 'zoe_ph', }, - 'playlist_mincount': 13, + 'playlist_mincount': 171, + }, { + 'url': 'http://www.pornhub.com/users/rushandlia/videos', + 'only_matching': True, }] def _real_extract(self, url): user_id = self._match_id(url) - webpage = self._download_webpage(url, user_id) + entries = [] + for page_num in itertools.count(1): + try: + webpage = self._download_webpage( + url, user_id, 'Downloading page %d' % page_num, + query={'page': page_num}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + break + page_entries = self._extract_entries(webpage) + if not page_entries: + break + entries.extend(page_entries) - return self.playlist_result(self._extract_entries(webpage), user_id) + return self.playlist_result(entries, user_id) From 87d105ac6c90b4dad519de7d013623923d74d570 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 01:13:47 +0600 Subject: [PATCH 0038/3599] [amp] Fix upload timestamp extraction (Closes #9007) --- youtube_dl/extractor/amp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py index 69e6baff7..138fa0808 100644 --- a/youtube_dl/extractor/amp.py +++ b/youtube_dl/extractor/amp.py @@ -69,12 +69,14 @@ class AMPIE(InfoExtractor): self._sort_formats(formats) + timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date')) + return { 'id': video_id, 'title': get_media_node('title'), 'description': get_media_node('description'), 'thumbnails': thumbnails, - 'timestamp': parse_iso8601(item.get('pubDate'), ' '), + 'timestamp': timestamp, 'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')), 'subtitles': subtitles, 'formats': formats, From c8b13fec025bcb7402656095df369ad3f3225ac6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 01:14:12 +0600 Subject: [PATCH 0039/3599] [foxnews] Restore upload time fields in test --- youtube_dl/extractor/foxnews.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 1dc50318c..b04da2415 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -18,8 +18,8 @@ class FoxNewsIE(AMPIE): 'title': 'Frozen in Time', 'description': '16-year-old girl is size of toddler', 'duration': 265, - # 'timestamp': 1304411491, - # 'upload_date': '20110503', + 'timestamp': 1304411491, + 'upload_date': '20110503', 'thumbnail': 're:^https?://.*\.jpg$', }, }, @@ -32,8 +32,8 @@ class FoxNewsIE(AMPIE): 'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal", 'description': "Congressman discusses president's plan", 'duration': 292, - # 'timestamp': 1417662047, - # 'upload_date': '20141204', + 'timestamp': 1417662047, + 'upload_date': '20141204', 'thumbnail': 're:^https?://.*\.jpg$', }, 'params': { From 03442072c0890f10043d1de25dc3c3fcaf10f4eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 01:21:44 +0600 Subject: [PATCH 0040/3599] [pornhub] Fix typo (Closes #9008) --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index ac298d0ce..407ea08d4 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -78,7 +78,7 @@ class PornHubIE(InfoExtractor): flashvars = self._parse_json( self._search_regex( - r'var\s+flashv1ars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), + r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), video_id) if flashvars: video_title = flashvars.get('video_title') From 33f3040a3e611f45ad920bd06030691910ddf815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 28 Mar 2016 03:13:39 +0600 Subject: [PATCH 0041/3599] [YoutubeDL] Fix sanitizing subtitles' url --- youtube_dl/YoutubeDL.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 53a36c145..d7aa951ff 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1265,7 +1265,8 @@ class YoutubeDL(object): if subtitles: for _, subtitle in subtitles.items(): for subtitle_format in subtitle: - subtitle_format['url'] = sanitize_url(subtitle_format['url']) + if subtitle_format.get('url'): + subtitle_format['url'] = sanitize_url(subtitle_format['url']) if 'ext' not in subtitle_format: subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() From 8d9dd3c34bd38b2545af95f8ef670b07ae1fb6ff Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Mon, 28 Mar 2016 03:08:34 +0500 Subject: [PATCH 0042/3599] [README.md] Add format_id to the list of string meta fields available for use in format selection --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 52b2a85a3..e972bf69f 100644 --- a/README.md +++ b/README.md @@ -600,6 +600,7 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin - `vcodec`: Name of the video codec in use - `container`: Name of the container format - `protocol`: The protocol that will be used for the actual download, lower-case. `http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `m3u8`, or `m3u8_native` + - `format_id`: A short description of the format Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by video hoster. From 7710bdf4e813879dbd8c5857e13a2c64e0ce8837 Mon Sep 17 00:00:00 2001 From: Sander van den Oever <sandervdo@gmail.com> Date: Sun, 6 Mar 2016 23:16:13 +0100 Subject: [PATCH 0043/3599] Add initial ISSUE_TEMPLATE Add auto-updating of youtube-dl version in ISSUE_TEMPLATE Move parts of template text and adopt makefile to new format Moved the 'kind-of-issue' section and rephrased a bit Rephrased and moved Example URL section upwards Moved ISSUE_TEMPLATE inside .github folder. Update makefile to match new folderstructure --- .github/ISSUE_TEMPLATE.md | 37 +++++++++++++++++++++++++++++++ Makefile | 5 ++++- devscripts/make_issue_template.py | 32 ++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 .github/ISSUE_TEMPLATE.md create mode 100644 devscripts/make_issue_template.py diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 000000000..c34cbe743 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,37 @@ +Make sure you are using the **latest** version of youtube-dl. Latest youtube-dl version at this moment is **2016.03.06**. Read [our FAQ](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) if you have troubles updating. +- [ ] I've verified that I'm running youtube-dl version **2016.03.06** + +**What is the purpose of this issue? Put an `x` to the relevant box** +- [ ] Site support request (add support for a new website) +- [ ] Feature request (request new functionality) +- [ ] Bug report (encountered problems with youtube-dl) +- [ ] Other, namely ... + +**If the purpose of this issues is a site support request please provide _at least_ one example URL of a video or a playlist you are trying to download.** + +- http://some.example.url/to-video + +*If the purpose of this issue is a bug report or you are unsure about its relevance please include a log as described below.* + +**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it below wrapped in ``` for better formatting. It should look similar to this: +``` +$ youtube-dl -v <your command line> +[debug] System config: [] +[debug] User config: [] +[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] +[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 +[debug] youtube-dl version 2015.12.06 +[debug] Git HEAD: 135392e +[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2 +[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 +[debug] Proxy map: {} +... +``` + +**Brief description of the problem/request** + +*I am having a problem with ... I have tried to do ... and ... I expected that ... would happen, but instead ... happened. Example: I tried to download a file but the site was not supported. Please add support for site xyz. Another example: I encountered a bug when downloading a video from xyz. I have tried to do a and b.* + +**Suggested solution or other information** + +*In case you have suggestions for a solution or any other relevant information you can write it here* diff --git a/Makefile b/Makefile index 6689ec06f..bfbe5e6cb 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites +all: youtube-dl README.md CONTRIBUTING.md issue_template README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe @@ -59,6 +59,9 @@ README.md: youtube_dl/*.py youtube_dl/*/*.py CONTRIBUTING.md: README.md $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md +issue_template: .github/ISSUE_TEMPLATE.md youtube_dl/version.py + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE.md + supportedsites: $(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py new file mode 100644 index 000000000..2fdd05035 --- /dev/null +++ b/devscripts/make_issue_template.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import io +import optparse +import re + + +def main(): + parser = optparse.OptionParser(usage='%prog FILE') + options, args = parser.parse_args() + if len(args) != 1: + parser.error('Expected an filename') + + with io.open(args[0], encoding='utf-8') as inf: + issue_template_text = inf.read() + + # Get the version from youtube_dl/version.py without importing the package + exec(compile(open('youtube_dl/version.py').read(), + 'youtube_dl/version.py', 'exec')) + + issue_template_text = re.sub( + r'(?<=\*\*)(?P<version>[0-9\.]+)(?=\*\*)', + __version__, + issue_template_text + ) + + with io.open(args[0], 'w', encoding='utf-8') as outf: + outf.write(issue_template_text) + +if __name__ == '__main__': + main() From 3842a3e6524c8704d4295f65e6c0bce578d69e93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:15:26 +0600 Subject: [PATCH 0044/3599] Add ISSUE_TEMPLATE.tmpl as template for ISSUE_TEMPLATE.md --- .github/ISSUE_TEMPLATE.tmpl | 43 +++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE.tmpl diff --git a/.github/ISSUE_TEMPLATE.tmpl b/.github/ISSUE_TEMPLATE.tmpl new file mode 100644 index 000000000..9bca5b531 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.tmpl @@ -0,0 +1,43 @@ +### Please follow the guide below, provide requested information and put an `x` into all the boxes [ ] relevant to your *issue*. + +#### :heavy_exclamation_mark: IMPORTANT :heavy_exclamation_mark: Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s** + +#### Before submitting an *issue* make sure you have: +- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections +- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones + +#### What is the purpose of your *issue*? +- [ ] Bug report (encountered problems with youtube-dl) +- [ ] Site support request (request for adding support for a new site) +- [ ] Feature request (request for a new functionality) +- [ ] Question +- [ ] Other + +#### The following sections concretize particular purposed issues, you can erase any section not applicable to your *issue*. + +#### :heavy_exclamation_mark: IMPORTANT :heavy_exclamation_mark: If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: + +Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): +``` +$ youtube-dl -v <your command line> +[debug] System config: [] +[debug] User config: [] +[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] +[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 +[debug] youtube-dl version %(version)s +[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 +[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 +[debug] Proxy map: {} +... +<end of log> +``` + +#### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**): +- Single video: https://www.youtube.com/watch?v=BaW_jenozKc +- Single video: https://youtu.be/BaW_jenozKc +- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + +#### Description of your *issue*, suggested solution and other information + +Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. From 3bf1df51fda4189eaa9164134b56393e2c4a7f72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:16:38 +0600 Subject: [PATCH 0045/3599] [devscripts/make_issue_template] Rework to use ISSUE_TEMPLATE.tmpl (Closes #8785) --- devscripts/make_issue_template.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index 2fdd05035..e5564bac1 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -3,30 +3,27 @@ from __future__ import unicode_literals import io import optparse -import re def main(): - parser = optparse.OptionParser(usage='%prog FILE') + parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') options, args = parser.parse_args() - if len(args) != 1: - parser.error('Expected an filename') + if len(args) != 2: + parser.error('Expected an input and an output filename') - with io.open(args[0], encoding='utf-8') as inf: - issue_template_text = inf.read() + infile, outfile = args + + with io.open(infile, encoding='utf-8') as inf: + issue_template_tmpl = inf.read() # Get the version from youtube_dl/version.py without importing the package exec(compile(open('youtube_dl/version.py').read(), - 'youtube_dl/version.py', 'exec')) + 'youtube_dl/version.py', 'exec')) - issue_template_text = re.sub( - r'(?<=\*\*)(?P<version>[0-9\.]+)(?=\*\*)', - __version__, - issue_template_text - ) + out = issue_template_tmpl % {'version': __version__} - with io.open(args[0], 'w', encoding='utf-8') as outf: - outf.write(issue_template_text) + with io.open(outfile, 'w', encoding='utf-8') as outf: + outf.write(out) if __name__ == '__main__': main() From 8751da85a7d21702132091986bc6224d3a3af319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:17:57 +0600 Subject: [PATCH 0046/3599] [Makefile] Fix ISSUE_TEMPLATE.md target --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index bfbe5e6cb..9e3ce78c8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ -all: youtube-dl README.md CONTRIBUTING.md issue_template README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites +all: youtube-dl README.md CONTRIBUTING.md ISSUE_TEMPLATE.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete @@ -59,8 +59,8 @@ README.md: youtube_dl/*.py youtube_dl/*/*.py CONTRIBUTING.md: README.md $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md -issue_template: .github/ISSUE_TEMPLATE.md youtube_dl/version.py - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE.md +ISSUE_TEMPLATE.md: + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE.tmpl .github/ISSUE_TEMPLATE.md supportedsites: $(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md From 4a5a67ca2503165fc4b7c2ca9c881b79101245b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:18:52 +0600 Subject: [PATCH 0047/3599] [devscripts/release.sh] Make ISSUE_TEMPLATE.md and commit it --- devscripts/release.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 61806961c..6718ce39b 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -45,9 +45,9 @@ fi /bin/echo -e "\n### Changing version in version.py..." sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py -/bin/echo -e "\n### Committing documentation and youtube_dl/version.py..." -make README.md CONTRIBUTING.md supportedsites -git add README.md CONTRIBUTING.md docs/supportedsites.md youtube_dl/version.py +/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." +make README.md CONTRIBUTING.md ISSUE_TEMPLATE.md supportedsites +git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py git commit -m "release $version" /bin/echo -e "\n### Now tagging, signing and pushing..." From a39c68f7e5a83d6bb3b1c4eb7f856b764a5488bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:19:24 +0600 Subject: [PATCH 0048/3599] Exclude make_issue_template.py from flake8 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 26857750c..5760112d4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,5 +2,5 @@ universal = True [flake8] -exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git +exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/make_issue_template.py,setup.py,build,.git ignore = E402,E501,E731 From 89924f8230987f9d6405d6487a7914197d42b12e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:34:12 +0600 Subject: [PATCH 0049/3599] [devscripts/make_issue_template] Fix NameError under python3 --- devscripts/make_issue_template.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index e5564bac1..eed4b3a00 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -16,6 +16,8 @@ def main(): with io.open(infile, encoding='utf-8') as inf: issue_template_tmpl = inf.read() + __version__ = None + # Get the version from youtube_dl/version.py without importing the package exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec')) From 3c0de33ad758c2f16f0abb1bb594c79f4cb40593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:43:48 +0600 Subject: [PATCH 0050/3599] Remove ISSUE_TEMPLATE.md --- .github/ISSUE_TEMPLATE.md | 37 ------------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE.md diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md deleted file mode 100644 index c34cbe743..000000000 --- a/.github/ISSUE_TEMPLATE.md +++ /dev/null @@ -1,37 +0,0 @@ -Make sure you are using the **latest** version of youtube-dl. Latest youtube-dl version at this moment is **2016.03.06**. Read [our FAQ](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) if you have troubles updating. -- [ ] I've verified that I'm running youtube-dl version **2016.03.06** - -**What is the purpose of this issue? Put an `x` to the relevant box** -- [ ] Site support request (add support for a new website) -- [ ] Feature request (request new functionality) -- [ ] Bug report (encountered problems with youtube-dl) -- [ ] Other, namely ... - -**If the purpose of this issues is a site support request please provide _at least_ one example URL of a video or a playlist you are trying to download.** - -- http://some.example.url/to-video - -*If the purpose of this issue is a bug report or you are unsure about its relevance please include a log as described below.* - -**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it below wrapped in ``` for better formatting. It should look similar to this: -``` -$ youtube-dl -v <your command line> -[debug] System config: [] -[debug] User config: [] -[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] -[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2015.12.06 -[debug] Git HEAD: 135392e -[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2 -[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 -[debug] Proxy map: {} -... -``` - -**Brief description of the problem/request** - -*I am having a problem with ... I have tried to do ... and ... I expected that ... would happen, but instead ... happened. Example: I tried to download a file but the site was not supported. Please add support for site xyz. Another example: I encountered a bug when downloading a video from xyz. I have tried to do a and b.* - -**Suggested solution or other information** - -*In case you have suggestions for a solution or any other relevant information you can write it here* From 034947dd1eed9e7d61671c48844b3f77a4683e77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 02:48:04 +0600 Subject: [PATCH 0051/3599] Rename ISSUE_TEMPLATE.tmpl in order not to be picked up by github --- .github/{ISSUE_TEMPLATE.tmpl => ISSUE_TEMPLATE_tmpl.md} | 0 Makefile | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename .github/{ISSUE_TEMPLATE.tmpl => ISSUE_TEMPLATE_tmpl.md} (100%) diff --git a/.github/ISSUE_TEMPLATE.tmpl b/.github/ISSUE_TEMPLATE_tmpl.md similarity index 100% rename from .github/ISSUE_TEMPLATE.tmpl rename to .github/ISSUE_TEMPLATE_tmpl.md diff --git a/Makefile b/Makefile index 9e3ce78c8..3a6c37944 100644 --- a/Makefile +++ b/Makefile @@ -60,7 +60,7 @@ CONTRIBUTING.md: README.md $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md ISSUE_TEMPLATE.md: - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE.tmpl .github/ISSUE_TEMPLATE.md + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md .github/ISSUE_TEMPLATE.md supportedsites: $(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md From 7168a6c874c1c0e4cffb6c1e29eeda322051def7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 03:05:15 +0600 Subject: [PATCH 0052/3599] [devscripts/make_issue_template] Fix __version__ again --- devscripts/make_issue_template.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index eed4b3a00..b7ad23d83 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -16,13 +16,11 @@ def main(): with io.open(infile, encoding='utf-8') as inf: issue_template_tmpl = inf.read() - __version__ = None - # Get the version from youtube_dl/version.py without importing the package exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec')) - out = issue_template_tmpl % {'version': __version__} + out = issue_template_tmpl % {'version': locals()['__version__']} with io.open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) From 0b7bfc94221bbdb79fd4602643891c8c9c59292f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 22:02:42 +0600 Subject: [PATCH 0053/3599] Improve ISSUE_TEMPLATE_tmpl.md --- .github/ISSUE_TEMPLATE_tmpl.md | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index 9bca5b531..a5e6a4233 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -1,22 +1,32 @@ -### Please follow the guide below, provide requested information and put an `x` into all the boxes [ ] relevant to your *issue*. +## Please follow the guide below -#### :heavy_exclamation_mark: IMPORTANT :heavy_exclamation_mark: Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly +- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x]) +- Use *Preview* tab to see how your issue will actually look like + +--- + +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. - [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s** -#### Before submitting an *issue* make sure you have: +### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones -#### What is the purpose of your *issue*? +### What is the purpose of your *issue*? - [ ] Bug report (encountered problems with youtube-dl) - [ ] Site support request (request for adding support for a new site) - [ ] Feature request (request for a new functionality) - [ ] Question - [ ] Other -#### The following sections concretize particular purposed issues, you can erase any section not applicable to your *issue*. +--- -#### :heavy_exclamation_mark: IMPORTANT :heavy_exclamation_mark: If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: +### The following sections concretize particular purposed issues, you can erase any section (the contents between triple ---) not applicable to your *issue* + +--- + +### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): ``` @@ -33,11 +43,16 @@ $ youtube-dl -v <your command line> <end of log> ``` -#### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**): +--- + +### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**): - Single video: https://www.youtube.com/watch?v=BaW_jenozKc - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc -#### Description of your *issue*, suggested solution and other information +--- + +### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. +If work on your *issue* required an account credentials please provide them or explain how one can obtain them. From 607619bc90e202b09028053d5eab9c03dc7cee4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 29 Mar 2016 22:04:29 +0600 Subject: [PATCH 0054/3599] Add manually generated ISSUE_TEMPLATE.md In order not to wait for the next release --- .github/ISSUE_TEMPLATE.md | 58 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE.md diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 000000000..5b1f573e7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,58 @@ +## Please follow the guide below + +- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly +- Put an `x` into all the boxes [ ] relevant to your *issue* (like that [x]) +- Use *Preview* tab to see how your issue will actually look like + +--- + +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.03.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.03.27** + +### Before submitting an *issue* make sure you have: +- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections +- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones + +### What is the purpose of your *issue*? +- [ ] Bug report (encountered problems with youtube-dl) +- [ ] Site support request (request for adding support for a new site) +- [ ] Feature request (request for a new functionality) +- [ ] Question +- [ ] Other + +--- + +### The following sections concretize particular purposed issues, you can erase any section (the contents between triple ---) not applicable to your *issue* + +--- + +### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: + +Add `-v` flag to **your command line** you run youtube-dl with, copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): +``` +$ youtube-dl -v <your command line> +[debug] System config: [] +[debug] User config: [] +[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] +[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 +[debug] youtube-dl version 2016.03.27 +[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 +[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 +[debug] Proxy map: {} +... +<end of log> +``` + +--- + +### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**): +- Single video: https://www.youtube.com/watch?v=BaW_jenozKc +- Single video: https://youtu.be/BaW_jenozKc +- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + +--- + +### Description of your *issue*, suggested solution and other information + +Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. +If work on your *issue* required an account credentials please provide them or explain how one can obtain them. From 6cbb20bb090845898fcc368beed45708f05bf908 Mon Sep 17 00:00:00 2001 From: DarkstaIkers <DarkstaIkers@users.noreply.github.com> Date: Tue, 29 Mar 2016 14:26:24 -0300 Subject: [PATCH 0055/3599] Update crunchyroll.py --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 8ae3f2890..44c720aaa 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -184,7 +184,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style'] output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x'] output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y'] - output += """ScaledBorderAndShadow: yes + output += """ScaledBorderAndShadow: no [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding From 6e359a1534e6a20acb53e1268ec77b6b92765e22 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Wed, 30 Mar 2016 12:27:00 +0100 Subject: [PATCH 0056/3599] [comcarcoff] don not depend on crackle extractor(closes #8995) previously extraction has been delegated to crackle to extract more info and subtitles #6106 but some of the episodes can't be extracted using crackle #8995. --- youtube_dl/extractor/comcarcoff.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py index e697d1410..747c245c8 100644 --- a/youtube_dl/extractor/comcarcoff.py +++ b/youtube_dl/extractor/comcarcoff.py @@ -41,7 +41,13 @@ class ComCarCoffIE(InfoExtractor): display_id = full_data['activeVideo']['video'] video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id] + video_id = compat_str(video_data['mediaId']) + title = video_data['title'] + formats = self._extract_m3u8_formats( + video_data['mediaUrl'], video_id, 'mp4') + self._sort_formats(formats) + thumbnails = [{ 'url': video_data['images']['thumb'], }, { @@ -54,15 +60,14 @@ class ComCarCoffIE(InfoExtractor): video_data.get('duration')) return { - '_type': 'url_transparent', - 'url': 'crackle:%s' % video_id, 'id': video_id, 'display_id': display_id, - 'title': video_data['title'], + 'title': title, 'description': video_data.get('description'), 'timestamp': timestamp, 'duration': duration, 'thumbnails': thumbnails, + 'formats': formats, 'season_number': int_or_none(video_data.get('season')), 'episode_number': int_or_none(video_data.get('episode')), 'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))), From afca767d196dee68b254720706a2ba191455e99b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 30 Mar 2016 22:26:43 +0600 Subject: [PATCH 0057/3599] [tumblr] Improve _VALID_URL (Closes #9027) --- youtube_dl/extractor/tumblr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index e5bcf7798..4d8b57111 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -8,7 +8,7 @@ from ..utils import int_or_none class TumblrIE(InfoExtractor): - _VALID_URL = r'https?://(?P<blog_name>.*?)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])' + _VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])' _TESTS = [{ 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', 'md5': '479bb068e5b16462f5176a6828829767', From 597d52fadbf32af4f2dcc7b9e236c318145f536b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 31 Mar 2016 01:54:08 +0800 Subject: [PATCH 0058/3599] [kuwo:song] Correct song ID extraction (fixes #9033) Bug introduced in daef04a4e75ccd2ff5e2d2495baa0ac9bcf75724. --- youtube_dl/extractor/kuwo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index a586308b2..84c0f363d 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -55,7 +55,7 @@ class KuwoBaseIE(InfoExtractor): class KuwoIE(KuwoBaseIE): IE_NAME = 'kuwo:song' IE_DESC = '酷我音乐' - _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+?)' + _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.kuwo.cn/yinyue/635632/', 'info_dict': { From 3ae6f8fec1381df41dc05272bfe3ab03654ac4af Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 31 Mar 2016 02:11:21 +0800 Subject: [PATCH 0059/3599] [kwuo] Remove _sort_formats() from KuwoBaseIE._get_formats() Following the idea proposed in 19dbaeece321c51fa336ef142507adf440e22e22 --- youtube_dl/extractor/kuwo.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 84c0f363d..f09436332 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -44,11 +44,6 @@ class KuwoBaseIE(InfoExtractor): 'abr': file_format.get('abr'), }) - # XXX _sort_formats fails if there are not formats, while it's not the - # desired behavior if 'IPDeny' is ignored - # This check can be removed if https://github.com/rg3/youtube-dl/pull/8051 is merged - if not tolerate_ip_deny: - self._sort_formats(formats) return formats @@ -103,6 +98,7 @@ class KuwoIE(KuwoBaseIE): lrc_content = None formats = self._get_formats(song_id) + self._sort_formats(formats) album_id = self._html_search_regex( r'<p[^>]+class="album"[^<]+<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"', From e621a344e6ec3518420f0b13577726615c2f4485 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 31 Mar 2016 02:27:52 +0800 Subject: [PATCH 0060/3599] [kwuo] Port to new API and enable --cn-verification-proxy --- youtube_dl/extractor/kuwo.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index f09436332..86c17c931 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -26,10 +26,23 @@ class KuwoBaseIE(InfoExtractor): def _get_formats(self, song_id, tolerate_ip_deny=False): formats = [] for file_format in self._FORMATS: + headers = {} + cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') + if cn_verification_proxy: + headers['Ytdl-request-proxy'] = cn_verification_proxy + + query = { + 'format': file_format['ext'], + 'br': file_format.get('br', ''), + 'rid': 'MUSIC_%s' % song_id, + 'type': 'convert_url', + 'response': 'url' + } + song_url = self._download_webpage( - 'http://antiserver.kuwo.cn/anti.s?format=%s&br=%s&rid=MUSIC_%s&type=convert_url&response=url' % - (file_format['ext'], file_format.get('br', ''), song_id), + 'http://antiserver.kuwo.cn/anti.s', song_id, note='Download %s url info' % file_format['format'], + query=query, headers=headers, ) if song_url == 'IPDeny' and not tolerate_ip_deny: From 6b820a2376a953657578f9a477ff7768d3633512 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Wed, 30 Mar 2016 21:18:07 +0100 Subject: [PATCH 0061/3599] [myspace] improve extraction --- youtube_dl/extractor/myspace.py | 80 ++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py index 83414a232..0d5238d77 100644 --- a/youtube_dl/extractor/myspace.py +++ b/youtube_dl/extractor/myspace.py @@ -2,13 +2,13 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor -from ..compat import ( - compat_str, +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, ) -from ..utils import ExtractorError class MySpaceIE(InfoExtractor): @@ -24,6 +24,8 @@ class MySpaceIE(InfoExtractor): 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.', 'uploader': 'Five Minutes to the Stage', 'uploader_id': 'fiveminutestothestage', + 'timestamp': 1414108751, + 'upload_date': '20141023', }, 'params': { # rtmp download @@ -64,7 +66,7 @@ class MySpaceIE(InfoExtractor): 'ext': 'mp4', 'title': 'Starset - First Light', 'description': 'md5:2d5db6c9d11d527683bcda818d332414', - 'uploader': 'Jacob Soren', + 'uploader': 'Yumi K', 'uploader_id': 'SorenPromotions', 'upload_date': '20140725', } @@ -78,6 +80,19 @@ class MySpaceIE(InfoExtractor): player_url = self._search_regex( r'playerSwf":"([^"?]*)', webpage, 'player URL') + def rtmp_format_from_stream_url(stream_url, width=None, height=None): + rtmp_url, play_path = stream_url.split(';', 1) + return { + 'format_id': 'rtmp', + 'url': rtmp_url, + 'play_path': play_path, + 'player_url': player_url, + 'protocol': 'rtmp', + 'ext': 'flv', + 'width': width, + 'height': height, + } + if mobj.group('mediatype').startswith('music/song'): # songs don't store any useful info in the 'context' variable song_data = self._search_regex( @@ -93,8 +108,8 @@ class MySpaceIE(InfoExtractor): return self._search_regex( r'''data-%s=([\'"])(?P<data>.*?)\1''' % name, song_data, name, default='', group='data') - streamUrl = search_data('stream-url') - if not streamUrl: + stream_url = search_data('stream-url') + if not stream_url: vevo_id = search_data('vevo-id') youtube_id = search_data('youtube-id') if vevo_id: @@ -106,36 +121,47 @@ class MySpaceIE(InfoExtractor): else: raise ExtractorError( 'Found song but don\'t know how to download it') - info = { + return { 'id': video_id, 'title': self._og_search_title(webpage), 'uploader': search_data('artist-name'), 'uploader_id': search_data('artist-username'), 'thumbnail': self._og_search_thumbnail(webpage), + 'duration': int_or_none(search_data('duration')), + 'formats': [rtmp_format_from_stream_url(stream_url)] } else: - context = json.loads(self._search_regex( - r'context = ({.*?});', webpage, 'context')) - video = context['video'] - streamUrl = video['streamUrl'] - info = { - 'id': compat_str(video['mediaId']), + video = self._parse_json(self._search_regex( + r'context = ({.*?});', webpage, 'context'), + video_id)['video'] + formats = [] + hls_stream_url = video.get('hlsStreamUrl') + if hls_stream_url: + formats.append({ + 'format_id': 'hls', + 'url': hls_stream_url, + 'protocol': 'm3u8_native', + 'ext': 'mp4', + }) + stream_url = video.get('streamUrl') + if stream_url: + formats.append(rtmp_format_from_stream_url( + stream_url, + int_or_none(video.get('width')), + int_or_none(video.get('height')))) + self._sort_formats(formats) + return { + 'id': video_id, 'title': video['title'], - 'description': video['description'], - 'thumbnail': video['imageUrl'], - 'uploader': video['artistName'], - 'uploader_id': video['artistUsername'], + 'description': video.get('description'), + 'thumbnail': video.get('imageUrl'), + 'uploader': video.get('artistName'), + 'uploader_id': video.get('artistUsername'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': parse_iso8601(video.get('dateAdded')), + 'formats': formats, } - rtmp_url, play_path = streamUrl.split(';', 1) - info.update({ - 'url': rtmp_url, - 'play_path': play_path, - 'player_url': player_url, - 'ext': 'flv', - }) - return info - class MySpaceAlbumIE(InfoExtractor): IE_NAME = 'MySpace:album' From c02ec7d4300d3e2607f48fe73011fd8caa38f90c Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Wed, 30 Mar 2016 23:18:31 +0100 Subject: [PATCH 0062/3599] [cnbc] Add new extractor(closes #8012) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/cnbc.py | 29 +++++++++++++++++++++++++++++ youtube_dl/extractor/theplatform.py | 8 ++++---- 3 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 youtube_dl/extractor/cnbc.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1e4b078a4..bd1f7d293 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -127,6 +127,7 @@ from .cloudy import CloudyIE from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE +from .cnbc import CNBCIE from .cnet import CNETIE from .cnn import ( CNNIE, diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py new file mode 100644 index 000000000..593e459aa --- /dev/null +++ b/youtube_dl/extractor/cnbc.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import smuggle_url + + +class CNBCIE(InfoExtractor): + _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://video.cnbc.com/gallery/?video=3000503714', + 'md5': '', + 'info_dict': { + 'id': '3000503714', + 'ext': 'mp4', + 'title': 'Video title goes here', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url( + 'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id, + {'force_smil_url': True}), + 'id': video_id, + } diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 863914299..236c99972 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -82,7 +82,7 @@ class ThePlatformBaseIE(OnceIE): class ThePlatformIE(ThePlatformBaseIE): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/ - (?:(?P<media>(?:(?:[^/]+/)+select/)?media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))? + (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))? |theplatform:)(?P<id>[^/\?&]+)''' _TESTS = [{ @@ -170,10 +170,10 @@ class ThePlatformIE(ThePlatformBaseIE): if not provider_id: provider_id = 'dJ5BDC' - path = provider_id + path = provider_id + '/' if mobj.group('media'): - path += '/media' - path += '/' + video_id + path += mobj.group('media') + path += video_id qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query) if 'guid' in qs_dict: From ce548296fe8bde2756fd9915bd744c904231de8f Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Mar 2016 00:25:11 +0100 Subject: [PATCH 0063/3599] [cnbc] fix test --- youtube_dl/extractor/cnbc.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index 593e459aa..25b308752 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -9,12 +9,16 @@ class CNBCIE(InfoExtractor): _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)' _TEST = { 'url': 'http://video.cnbc.com/gallery/?video=3000503714', - 'md5': '', 'info_dict': { 'id': '3000503714', 'ext': 'mp4', - 'title': 'Video title goes here', - } + 'title': 'Fighting zombies is big business', + 'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): From 9cf01f7f30b698aee6d22052d8898b14a59d94bb Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Mar 2016 00:26:42 +0100 Subject: [PATCH 0064/3599] [nbc] add new extractor for csnne.com(#5432) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nbc.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bd1f7d293..7b0f2b21a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -442,6 +442,7 @@ from .nationalgeographic import NationalGeographicIE from .naver import NaverIE from .nba import NBAIE from .nbc import ( + CSNNEIE, NBCIE, NBCNewsIE, NBCSportsIE, diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index a622f2212..43d75d3ca 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -134,6 +134,30 @@ class NBCSportsIE(InfoExtractor): NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer') +class CSNNEIE(InfoExtractor): + _VALID_URL = r'https?://www\.csnne\.com/video/(?P<id>[0-9a-z-]+)' + + _TEST = { + 'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter', + 'info_dict': { + 'id': 'yvBLLUgQ8WU0', + 'ext': 'mp4', + 'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.', + 'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': self._html_search_meta('twitter:player:stream', webpage), + 'display_id': display_id, + } + + class NBCNewsIE(ThePlatformIE): _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ (?:video/.+?/(?P<id>\d+)| From fea7295b1400f27218422cb37f70e7c4e2c66c29 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Mar 2016 10:48:22 +0100 Subject: [PATCH 0065/3599] [brightcove] relax embed_in_page regex --- youtube_dl/extractor/brightcove.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index c9e43a275..0d162d337 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -439,7 +439,7 @@ class BrightcoveNewIE(InfoExtractor): </video>.*? <script[^>]+ src=["\'](?:https?:)?//players\.brightcove\.net/ - (\d+)/([\da-f-]+)_([^/]+)/index(?:\.min)?\.js + (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js ''', webpage): entries.append( 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' From dd17041c82169b862e7f91cee9c5a5ed86b68ca9 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Mar 2016 11:59:40 +0100 Subject: [PATCH 0066/3599] [tenplay] remove extractor(fixes #6927) --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/tenplay.py | 90 -------------------------------- 2 files changed, 91 deletions(-) delete mode 100644 youtube_dl/extractor/tenplay.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7b0f2b21a..0a2dee40a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -737,7 +737,6 @@ from .telecinco import TelecincoIE from .telegraaf import TelegraafIE from .telemb import TeleMBIE from .teletask import TeleTaskIE -from .tenplay import TenPlayIE from .testurl import TestURLIE from .tf1 import TF1IE from .theintercept import TheInterceptIE diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py deleted file mode 100644 index 02a31a609..000000000 --- a/youtube_dl/extractor/tenplay.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - float_or_none, -) - - -class TenPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+' - _TEST = { - 'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way', - 'info_dict': { - 'id': '2695695426001', - 'ext': 'flv', - 'title': 'TENplay: TV your way', - 'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.', - 'timestamp': 1380150606.889, - 'upload_date': '20130925', - 'uploader': 'TENplay', - }, - 'params': { - 'skip_download': True, # Requires rtmpdump - } - } - - _video_fields = [ - 'id', 'name', 'shortDescription', 'longDescription', 'creationDate', - 'publishedDate', 'lastModifiedDate', 'customFields', 'videoStillURL', - 'thumbnailURL', 'referenceId', 'length', 'playsTotal', - 'playsTrailingWeek', 'renditions', 'captioning', 'startDate', 'endDate'] - - def _real_extract(self, url): - webpage = self._download_webpage(url, url) - video_id = self._html_search_regex( - r'videoID: "(\d+?)"', webpage, 'video_id') - api_token = self._html_search_regex( - r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token') - title = self._html_search_regex( - r'<meta property="og:title" content="\s*(.*?)\s*"\s*/?\s*>', - webpage, 'title') - - json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title) - - formats = [] - for rendition in json['renditions']: - url = rendition['remoteUrl'] or rendition['url'] - protocol = 'rtmp' if url.startswith('rtmp') else 'http' - ext = 'flv' if protocol == 'rtmp' else rendition['videoContainer'].lower() - - if protocol == 'rtmp': - url = url.replace('&mp4:', '') - - tbr = int_or_none(rendition.get('encodingRate'), 1000) - - formats.append({ - 'format_id': '_'.join( - ['rtmp', rendition['videoContainer'].lower(), - rendition['videoCodec'].lower(), '%sk' % tbr]), - 'width': int_or_none(rendition['frameWidth']), - 'height': int_or_none(rendition['frameHeight']), - 'tbr': tbr, - 'filesize': int_or_none(rendition['size']), - 'protocol': protocol, - 'ext': ext, - 'vcodec': rendition['videoCodec'].lower(), - 'container': rendition['videoContainer'].lower(), - 'url': url, - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'display_id': json['referenceId'], - 'title': json['name'], - 'description': json['shortDescription'] or json['longDescription'], - 'formats': formats, - 'thumbnails': [{ - 'url': json['videoStillURL'] - }, { - 'url': json['thumbnailURL'] - }], - 'thumbnail': json['videoStillURL'], - 'duration': float_or_none(json.get('length'), 1000), - 'timestamp': float_or_none(json.get('creationDate'), 1000), - 'uploader': json.get('customFields', {}).get('production_company_distributor') or 'TENplay', - 'view_count': int_or_none(json.get('playsTotal')), - } From d84b48e3f1d9c2099a2a8ba48df3a2bd5e591807 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Mar 2016 13:44:55 +0100 Subject: [PATCH 0067/3599] [nationalgeographic] improve extraction --- youtube_dl/extractor/nationalgeographic.py | 30 +++++++++++----------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index d5e53365c..1560e3e81 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -8,14 +8,16 @@ from ..utils import ( class NationalGeographicIE(InfoExtractor): + IE_NAME = 'natgeo' _VALID_URL = r'https?://video\.nationalgeographic\.com/.*?' _TESTS = [ { 'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo', + 'md5': '730855d559abbad6b42c2be1fa584917', 'info_dict': { - 'id': '4DmDACA6Qtk_', - 'ext': 'flv', + 'id': '0000014b-70a1-dd8c-af7f-f7b559330001', + 'ext': 'mp4', 'title': 'Mating Crabs Busted by Sharks', 'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3', }, @@ -23,9 +25,10 @@ class NationalGeographicIE(InfoExtractor): }, { 'url': 'http://video.nationalgeographic.com/wild/when-sharks-attack/the-real-jaws', + 'md5': '6a3105eb448c070503b3105fb9b320b5', 'info_dict': { - 'id': '_JeBD_D7PlS5', - 'ext': 'flv', + 'id': 'ngc-I0IauNSWznb_UV008GxSbwY35BZvgi2e', + 'ext': 'mp4', 'title': 'The Real Jaws', 'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6', }, @@ -37,18 +40,15 @@ class NationalGeographicIE(InfoExtractor): name = url_basename(url) webpage = self._download_webpage(url, name) - feed_url = self._search_regex( - r'data-feed-url="([^"]+)"', webpage, 'feed url') guid = self._search_regex( r'id="(?:videoPlayer|player-container)"[^>]+data-guid="([^"]+)"', webpage, 'guid') - feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name) - content = feed.find('.//{http://search.yahoo.com/mrss/}content') - theplatform_id = url_basename(content.attrib.get('url')) - - return self.url_result(smuggle_url( - 'http://link.theplatform.com/s/ngs/%s?formats=MPEG4&manifest=f4m' % theplatform_id, - # For some reason, the normal links don't work and we must force - # the use of f4m - {'force_smil_url': True})) + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url( + 'http://link.theplatform.com/s/ngs/media/guid/2423130747/%s?mbr=true' % guid, + {'force_smil_url': True}), + 'id': guid, + } From c9c39c22c5740c1eedcc9ce7a10f5df199ea5c78 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Mar 2016 13:47:38 +0100 Subject: [PATCH 0068/3599] [nationalgeographic] add support for channel.nationalgeographic.com urls --- youtube_dl/extractor/__init__.py | 5 ++- youtube_dl/extractor/nationalgeographic.py | 47 ++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0a2dee40a..76354b67e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -438,7 +438,10 @@ from .myspass import MySpassIE from .myvi import MyviIE from .myvideo import MyVideoIE from .myvidster import MyVidsterIE -from .nationalgeographic import NationalGeographicIE +from .nationalgeographic import ( + NationalGeographicIE, + NationalGeographicChannelIE, +) from .naver import NaverIE from .nba import NBAIE from .nbc import ( diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 1560e3e81..61b5c700e 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( smuggle_url, url_basename, + update_url_query, ) @@ -52,3 +53,49 @@ class NationalGeographicIE(InfoExtractor): {'force_smil_url': True}), 'id': guid, } + + +class NationalGeographicChannelIE(InfoExtractor): + IE_NAME = 'natgeo:channel' + _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/videos/(?P<id>[^/?]+)' + + _TESTS = [ + { + 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/', + 'md5': '518c9aa655686cf81493af5cc21e2a04', + 'info_dict': { + 'id': 'nB5vIAfmyllm', + 'ext': 'mp4', + 'title': 'Uncovering a Universal Knowledge', + 'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a', + }, + 'add_ie': ['ThePlatform'], + }, + { + 'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/', + 'md5': 'c4912f656b4cbe58f3e000c489360989', + 'info_dict': { + 'id': '3TmMv9OvGwIR', + 'ext': 'mp4', + 'title': 'The Stunning Red Bird of Paradise', + 'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c', + }, + 'add_ie': ['ThePlatform'], + }, + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + release_url = self._search_regex( + r'video_auth_playlist_url\s*=\s*"([^"]+)"', + webpage, 'release url') + + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url( + update_url_query(release_url, {'mbr': 'true', 'switch': 'http'}), + {'force_smil_url': True}), + 'display_id': display_id, + } From 5299bc3f91ffbb784addaee002611a52232134a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 31 Mar 2016 20:42:41 +0600 Subject: [PATCH 0069/3599] [beeg] Switch to api v6 (Closes #9036) --- youtube_dl/extractor/beeg.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index 34c2a756f..9072949dd 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -34,7 +34,7 @@ class BeegIE(InfoExtractor): video_id = self._match_id(url) video = self._download_json( - 'https://api.beeg.com/api/v5/video/%s' % video_id, video_id) + 'https://api.beeg.com/api/v6/1738/video/%s' % video_id, video_id) def split(o, e): def cut(s, x): @@ -50,8 +50,8 @@ class BeegIE(InfoExtractor): return n def decrypt_key(key): - # Reverse engineered from http://static.beeg.com/cpl/1105.js - a = '5ShMcIQlssOd7zChAIOlmeTZDaUxULbJRnywYaiB' + # Reverse engineered from http://static.beeg.com/cpl/1738.js + a = 'GUuyodcfS8FW8gQp4OKLMsZBcX0T7B' e = compat_urllib_parse_unquote(key) o = ''.join([ compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21) From 81da8cbc4513df16d0d04dc2992d6de9ab0f4038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 31 Mar 2016 22:05:25 +0600 Subject: [PATCH 0070/3599] [udemy] Switch to api 2.0 (Closes #9035) --- youtube_dl/extractor/udemy.py | 60 +++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 71bea5363..2e54dbc11 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -17,6 +17,7 @@ from ..utils import ( int_or_none, sanitized_Request, unescapeHTML, + update_url_query, urlencode_postdata, ) @@ -54,6 +55,16 @@ class UdemyIE(InfoExtractor): 'only_matching': True, }] + def _extract_course_info(self, webpage, video_id): + course = self._parse_json( + unescapeHTML(self._search_regex( + r'ng-init=["\'].*\bcourse=({.+?});', webpage, 'course', default='{}')), + video_id, fatal=False) or {} + course_id = course.get('id') or self._search_regex( + (r'"id"\s*:\s*(\d+)', r'data-course-id=["\'](\d+)'), + webpage, 'course id') + return course_id, course.get('title') + def _enroll_course(self, base_url, webpage, course_id): def combine_url(base_url, url): return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url @@ -98,7 +109,7 @@ class UdemyIE(InfoExtractor): error_str += ' - %s' % error_data.get('formErrors') raise ExtractorError(error_str, expected=True) - def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'): + def _download_json(self, url_or_request, *args, **kwargs): headers = { 'X-Udemy-Snail-Case': 'true', 'X-Requested-With': 'XMLHttpRequest', @@ -116,7 +127,7 @@ class UdemyIE(InfoExtractor): else: url_or_request = sanitized_Request(url_or_request, headers=headers) - response = super(UdemyIE, self)._download_json(url_or_request, video_id, note) + response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs) self._handle_error(response) return response @@ -166,9 +177,7 @@ class UdemyIE(InfoExtractor): webpage = self._download_webpage(url, lecture_id) - course_id = self._search_regex( - (r'data-course-id=["\'](\d+)', r'"id"\s*:\s*(\d+)'), - webpage, 'course id') + course_id, _ = self._extract_course_info(webpage, lecture_id) try: lecture = self._download_lecture(course_id, lecture_id) @@ -309,29 +318,32 @@ class UdemyCourseIE(UdemyIE): webpage = self._download_webpage(url, course_path) - response = self._download_json( - 'https://www.udemy.com/api-1.1/courses/%s' % course_path, - course_path, 'Downloading course JSON') - - course_id = response['id'] - course_title = response.get('title') + course_id, title = self._extract_course_info(webpage, course_path) self._enroll_course(url, webpage, course_id) + course_url = update_url_query( + 'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id, + { + 'fields[chapter]': 'title,object_index', + 'fields[lecture]': 'title', + 'page_size': '1000', + }) + response = self._download_json( - 'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id, - course_id, 'Downloading course curriculum') + course_url, course_id, 'Downloading course curriculum') entries = [] - chapter, chapter_number = None, None - for asset in response: - asset_type = asset.get('assetType') or asset.get('asset_type') - if asset_type == 'Video': - asset_id = asset.get('id') - if asset_id: + chapter, chapter_number = [None] * 2 + for entry in response['results']: + clazz = entry.get('_class') + if clazz == 'lecture': + lecture_id = entry.get('id') + if lecture_id: entry = { '_type': 'url_transparent', - 'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), + 'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, entry['id']), + 'title': entry.get('title'), 'ie_key': UdemyIE.ie_key(), } if chapter_number: @@ -339,8 +351,8 @@ class UdemyCourseIE(UdemyIE): if chapter: entry['chapter'] = chapter entries.append(entry) - elif asset.get('type') == 'chapter': - chapter_number = asset.get('index') or asset.get('object_index') - chapter = asset.get('title') + elif clazz == 'chapter': + chapter_number = entry.get('object_index') + chapter = entry.get('title') - return self.playlist_result(entries, course_id, course_title) + return self.playlist_result(entries, course_id, title) From ed0291d1533600b21903cb98f070791a20e47433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 31 Mar 2016 22:55:01 +0600 Subject: [PATCH 0071/3599] [utils] Add update_Request --- youtube_dl/utils.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6d27b80c0..4532b737b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1804,6 +1804,20 @@ def update_url_query(url, query): query=compat_urllib_parse_urlencode(qs, True))) +def update_Request(req, url=None, data=None, headers={}, query={}): + req_headers = req.headers.copy() + req_headers.update(headers) + req_data = data or req.data + req_url = update_url_query(url or req.get_full_url(), query) + req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request + new_req = req_type( + req_url, data=req_data, headers=req_headers, + origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) + if hasattr(req, 'timeout'): + new_req.timeout = req.timeout + return new_req + + def dict_get(d, key_or_keys, default=None, skip_false_values=True): if isinstance(key_or_keys, (list, tuple)): for key in key_or_keys: From 15d260ebaa48409112270685d306a5d9152260c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 31 Mar 2016 22:55:49 +0600 Subject: [PATCH 0072/3599] [utils] Use update_Request in http_request --- youtube_dl/utils.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 4532b737b..5c4ab2748 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -778,12 +778,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): # Substitute URL if any change after escaping if url != url_escaped: - req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request - new_req = req_type( - url_escaped, data=req.data, headers=req.headers, - origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) - new_req.timeout = req.timeout - req = new_req + req = update_Request(req, url=url_escaped) for h, v in std_headers.items(): # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 From 41d06b042427aa019200bb80a1935d12110f50b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 31 Mar 2016 22:58:38 +0600 Subject: [PATCH 0073/3599] [extractor/common] Improve _request_webpage * Do not ignore data, headers and query for Requests * Default values for headers and query switched to dicts since these are used by urllib itself --- youtube_dl/extractor/common.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9b7ab8924..85ac0400c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -22,6 +22,7 @@ from ..compat import ( compat_str, compat_urllib_error, compat_urllib_parse_urlencode, + compat_urllib_request, compat_urlparse, ) from ..downloader.f4m import remove_encrypted_media @@ -49,6 +50,7 @@ from ..utils import ( determine_protocol, parse_duration, mimetype2ext, + update_Request, update_url_query, ) @@ -347,7 +349,7 @@ class InfoExtractor(object): def IE_NAME(self): return compat_str(type(self).__name__[:-2]) - def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None): + def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): """ Returns the response handle """ if note is None: self.report_download_webpage(video_id) @@ -357,11 +359,14 @@ class InfoExtractor(object): else: self.to_screen('%s: %s' % (video_id, note)) # data, headers and query params will be ignored for `Request` objects - if isinstance(url_or_request, compat_str): + if isinstance(url_or_request, compat_urllib_request.Request): + url_or_request = update_Request( + url_or_request, data=data, headers=headers, query=query) + else: if query: url_or_request = update_url_query(url_or_request, query) if data or headers: - url_or_request = sanitized_Request(url_or_request, data, headers or {}) + url_or_request = sanitized_Request(url_or_request, data, headers) try: return self._downloader.urlopen(url_or_request) except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: @@ -377,7 +382,7 @@ class InfoExtractor(object): self._downloader.report_warning(errmsg) return False - def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers=None, query=None): + def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}): """ Returns a tuple (page content as string, URL handle) """ # Strip hashes from the URL (#1038) if isinstance(url_or_request, (compat_str, str)): @@ -470,7 +475,7 @@ class InfoExtractor(object): return content - def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers=None, query=None): + def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}): """ Returns the data of the page as a string """ success = False try_count = 0 @@ -491,7 +496,7 @@ class InfoExtractor(object): def _download_xml(self, url_or_request, video_id, note='Downloading XML', errnote='Unable to download XML', - transform_source=None, fatal=True, encoding=None, data=None, headers=None, query=None): + transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}): """Return the xml as an xml.etree.ElementTree.Element""" xml_string = self._download_webpage( url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query) @@ -505,7 +510,7 @@ class InfoExtractor(object): note='Downloading JSON metadata', errnote='Unable to download JSON metadata', transform_source=None, - fatal=True, encoding=None, data=None, headers=None, query=None): + fatal=True, encoding=None, data=None, headers={}, query={}): json_string = self._download_webpage( url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query) From 6bb4600717cb5378d392845e5c9bebe236a1b224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 31 Mar 2016 22:59:19 +0600 Subject: [PATCH 0074/3599] [udemy:course] Simplify course curriculum downloading --- youtube_dl/extractor/udemy.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 2e54dbc11..1784599e9 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -322,17 +322,14 @@ class UdemyCourseIE(UdemyIE): self._enroll_course(url, webpage, course_id) - course_url = update_url_query( + response = self._download_json( 'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id, - { + course_id, 'Downloading course curriculum', query={ 'fields[chapter]': 'title,object_index', 'fields[lecture]': 'title', 'page_size': '1000', }) - response = self._download_json( - course_url, course_id, 'Downloading course curriculum') - entries = [] chapter, chapter_number = [None] * 2 for entry in response['results']: From 86f3b66cec3a1ba6d3b197018a954865ca14c323 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 31 Mar 2016 23:00:11 +0600 Subject: [PATCH 0075/3599] [udemy] Remove unused import --- youtube_dl/extractor/udemy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 1784599e9..e91cf44fe 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -17,7 +17,6 @@ from ..utils import ( int_or_none, sanitized_Request, unescapeHTML, - update_url_query, urlencode_postdata, ) From 8369942773aec76a5b97582f77b3e67f701bcf47 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Mar 2016 18:36:41 +0100 Subject: [PATCH 0076/3599] [voxmedia] Add new extractor(closes #3182) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/voxmedia.py | 99 ++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 youtube_dl/extractor/voxmedia.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 76354b67e..19f802411 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -904,6 +904,7 @@ from .vk import ( from .vlive import VLiveIE from .vodlocker import VodlockerIE from .voicerepublic import VoiceRepublicIE +from .voxmedia import VoxMediaIE from .vporn import VpornIE from .vrt import VRTIE from .vube import VubeIE diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py new file mode 100644 index 000000000..3b13d38a1 --- /dev/null +++ b/youtube_dl/extractor/voxmedia.py @@ -0,0 +1,99 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote + + +class VoxMediaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P<id>[^/?]+)' + _TESTS = [{ + 'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of', + 'md5': '73856edf3e89a711e70d5cf7cb280b37', + 'info_dict': { + 'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe', + 'ext': 'mp4', + 'title': 'Google\'s new material design direction', + 'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2', + } + }, { + # data-ooyala-id + 'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet', + 'md5': 'd744484ff127884cd2ba09e3fa604e4b', + 'info_dict': { + 'id': 'RkZXU4cTphOCPDMZg5oEounJyoFI0g-B', + 'ext': 'mp4', + 'title': 'The Nexus 6: hands-on with Google\'s phablet', + 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af', + } + }, { + # volume embed + 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', + 'md5': '375c483c5080ab8cd85c9c84cfc2d1e4', + 'info_dict': { + 'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b', + 'ext': 'mp4', + 'title': 'The new frontier of LGBTQ civil rights, explained', + 'description': 'md5:0dc58e94a465cbe91d02950f770eb93f', + } + }, { + # youtube embed + 'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance', + 'md5': '83b3080489fb103941e549352d3e0977', + 'info_dict': { + 'id': 'FcNHTJU1ufM', + 'ext': 'mp4', + 'title': 'How "the robot" became the greatest novelty dance of all time', + 'description': 'md5:b081c0d588b8b2085870cda55e6da176', + 'upload_date': '20160324', + 'uploader_id': 'voxdotcom', + 'uploader': 'Vox', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id)) + + title = None + description = None + provider_video_id = None + provider_video_type = None + + entry = self._search_regex([ + r'Chorus\.VideoContext\.addVideo\(\[({.+})\]\);', + r'var\s+entry\s*=\s*({.+});' + ], webpage, 'video data', default=None) + if entry: + video_data = self._parse_json(entry, display_id) + provider_video_id = video_data.get('provider_video_id') + provider_video_type = video_data.get('provider_video_type') + if provider_video_id and provider_video_type: + title = video_data.get('title') + description = video_data.get('description') + + if not provider_video_id or not provider_video_type: + provider_video_id = self._search_regex( + r'data-ooyala-id="([^"]+)"', webpage, 'ooyala id', default=None) + if provider_video_id: + provider_video_type = 'ooyala' + else: + volume_uuid = self._search_regex(r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid') + volume_webpage = self._download_webpage( + 'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid) + video_data = self._parse_json(self._search_regex( + r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid) + title = video_data.get('title_short') + description = video_data.get('description_long') or video_data.get('description_short') + for pvtype in ('ooyala', 'youtube'): + provider_video_id = video_data.get('%s_id' % pvtype) + if provider_video_id: + provider_video_type = pvtype + break + + return { + '_type': 'url_transparent', + 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id), + 'title': title or self._og_search_title(webpage), + 'description': description or self._og_search_description(webpage), + } From 5b012dfce87e4f7dd9ab4a2f5a01ab8f2e16adad Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 1 Apr 2016 01:42:16 +0800 Subject: [PATCH 0077/3599] [tudou] Improve error handling (closes #8988) --- youtube_dl/extractor/tudou.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 9892e8a62..63b5d5924 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -5,6 +5,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ExtractorError, int_or_none, InAdvancePagedList, float_or_none, @@ -46,6 +47,19 @@ class TudouIE(InfoExtractor): _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' + # Translated from tudou/tools/TVCHelper.as in PortalPlayer_193.swf + # 0001, 0002 and 4001 are not included as they indicate temporary issues + TVC_ERRORS = { + '0003': 'The video is deleted or does not exist', + '1001': 'This video is unavailable due to licensing issues', + '1002': 'This video is unavailable as it\'s under review', + '1003': 'This video is unavailable as it\'s under review', + '3001': 'Password required', + '5001': 'This video is available in Mainland China only due to licensing issues', + '7001': 'This video is unavailable', + '8001': 'This video is unavailable due to licensing issues', + } + def _url_for_id(self, video_id, quality=None): info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id) if quality: @@ -63,6 +77,15 @@ class TudouIE(InfoExtractor): if youku_vcode: return self.url_result('youku:' + youku_vcode, ie='Youku') + if not item_data.get('itemSegs'): + tvc_code = item_data.get('tvcCode') + if tvc_code: + err_msg = self.TVC_ERRORS.get(tvc_code) + if err_msg: + raise ExtractorError('Tudou said: %s' % err_msg, expected=True) + raise ExtractorError('Unexpected error %s returned from Tudou' % tvc_code) + raise ExtractorError('Unxpected error returned from Tudou') + title = unescapeHTML(item_data['kw']) description = item_data.get('desc') thumbnail_url = item_data.get('pic') From 4ecc1fc6387d900b7d61d43a112becff9e293206 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Mar 2016 21:09:27 +0100 Subject: [PATCH 0078/3599] [howstuffworks] improve extraction --- youtube_dl/extractor/howstuffworks.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py index 663e6632a..76b74c51d 100644 --- a/youtube_dl/extractor/howstuffworks.py +++ b/youtube_dl/extractor/howstuffworks.py @@ -6,6 +6,7 @@ from ..utils import ( int_or_none, js_to_json, unescapeHTML, + determine_ext, ) @@ -39,7 +40,7 @@ class HowStuffWorksIE(InfoExtractor): 'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm', 'info_dict': { 'id': '440011', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Sword Swallowing #1 by Dan Meyer', 'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>', 'display_id': 'sword-swallowing-1-by-dan-meyer', @@ -63,13 +64,19 @@ class HowStuffWorksIE(InfoExtractor): video_id = clip_info['content_id'] formats = [] m3u8_url = clip_info.get('m3u8') - if m3u8_url: - formats += self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + if m3u8_url and determine_ext(m3u8_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', format_id='hls', fatal=True)) + flv_url = clip_info.get('flv_url') + if flv_url: + formats.append({ + 'url': flv_url, + 'format_id': 'flv', + }) for video in clip_info.get('mp4', []): formats.append({ 'url': video['src'], - 'format_id': video['bitrate'], - 'vbr': int(video['bitrate'].rstrip('k')), + 'format_id': 'mp4-%s' % video['bitrate'], + 'vbr': int_or_none(video['bitrate'].rstrip('k')), }) if not formats: @@ -102,6 +109,6 @@ class HowStuffWorksIE(InfoExtractor): 'title': unescapeHTML(clip_info['clip_title']), 'description': unescapeHTML(clip_info.get('caption')), 'thumbnail': clip_info.get('video_still_url'), - 'duration': clip_info.get('duration'), + 'duration': int_or_none(clip_info.get('duration')), 'formats': formats, } From b53a06e3b9f2c4ad86b09d35051f5eff2ad1bff0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Apr 2016 02:24:22 +0600 Subject: [PATCH 0079/3599] [udemy:course] Use new URL format --- youtube_dl/extractor/udemy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index e91cf44fe..a788cdd77 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -305,7 +305,7 @@ class UdemyIE(InfoExtractor): class UdemyCourseIE(UdemyIE): IE_NAME = 'udemy:course' - _VALID_URL = r'https?://www\.udemy\.com/(?P<id>[\da-z-]+)' + _VALID_URL = r'https?://www\.udemy\.com/(?P<id>[^/?#&]+)' _TESTS = [] @classmethod @@ -338,7 +338,7 @@ class UdemyCourseIE(UdemyIE): if lecture_id: entry = { '_type': 'url_transparent', - 'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, entry['id']), + 'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']), 'title': entry.get('title'), 'ie_key': UdemyIE.ie_key(), } From 961fc024d218275221dce8de2015c3900f2557c8 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Mar 2016 23:33:36 +0100 Subject: [PATCH 0080/3599] [voxmedia] improve sbnation support --- youtube_dl/extractor/voxmedia.py | 109 ++++++++++++++++++++----------- 1 file changed, 71 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index 3b13d38a1..0c6b1f030 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -49,51 +49,84 @@ class VoxMediaIE(InfoExtractor): 'uploader_id': 'voxdotcom', 'uploader': 'Vox', } + }, { + # SBN.VideoLinkset.entryGroup multiple ooyala embeds + 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', + 'info_dict': { + 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok', + 'title': '25 lies you will tell yourself on National Signing Day', + 'description': 'It\'s the most self-delusional time of the year, and everyone\'s gonna tell the same lies together!', + }, + 'playlist': [{ + 'md5': '721fededf2ab74ae4176c8c8cbfe092e', + 'info_dict': { + 'id': 'p3cThlMjE61VDi_SD9JlIteSNPWVDBB9', + 'ext': 'mp4', + 'title': 'Buddy Hield vs Steph Curry (and the world)', + 'description': 'Let’s dissect only the most important Final Four storylines.', + }, + }, { + 'md5': 'bf0c5cc115636af028be1bab79217ea9', + 'info_dict': { + 'id': 'BmbmVjMjE6esPHxdALGubTrouQ0jYLHj', + 'ext': 'mp4', + 'title': 'Chasing Cinderella 2016: Syracuse basketball', + 'description': 'md5:e02d56b026d51aa32c010676765a690d', + }, + }], }] def _real_extract(self, url): display_id = self._match_id(url) webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id)) - title = None - description = None - provider_video_id = None - provider_video_type = None + def create_entry(provider_video_id, provider_video_type, title=None, description=None): + return { + '_type': 'url_transparent', + 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id), + 'title': title or self._og_search_title(webpage), + 'description': description or self._og_search_description(webpage), + } - entry = self._search_regex([ - r'Chorus\.VideoContext\.addVideo\(\[({.+})\]\);', - r'var\s+entry\s*=\s*({.+});' + entries = [] + entries_data = self._search_regex([ + r'Chorus\.VideoContext\.addVideo\((\[{.+}\])\);', + r'var\s+entry\s*=\s*({.+});', + r'SBN\.VideoLinkset\.entryGroup\(\s*(\[.+\])', ], webpage, 'video data', default=None) - if entry: - video_data = self._parse_json(entry, display_id) - provider_video_id = video_data.get('provider_video_id') - provider_video_type = video_data.get('provider_video_type') - if provider_video_id and provider_video_type: - title = video_data.get('title') - description = video_data.get('description') + if entries_data: + entries_data = self._parse_json(entries_data, display_id) + if isinstance(entries_data, dict): + entries_data = [entries_data] + for video_data in entries_data: + provider_video_id = video_data.get('provider_video_id') + provider_video_type = video_data.get('provider_video_type') + if provider_video_id and provider_video_type: + entries.append(create_entry( + provider_video_id, provider_video_type, + video_data.get('title'), video_data.get('description'))) - if not provider_video_id or not provider_video_type: - provider_video_id = self._search_regex( - r'data-ooyala-id="([^"]+)"', webpage, 'ooyala id', default=None) - if provider_video_id: - provider_video_type = 'ooyala' - else: - volume_uuid = self._search_regex(r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid') - volume_webpage = self._download_webpage( - 'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid) - video_data = self._parse_json(self._search_regex( - r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid) - title = video_data.get('title_short') - description = video_data.get('description_long') or video_data.get('description_short') - for pvtype in ('ooyala', 'youtube'): - provider_video_id = video_data.get('%s_id' % pvtype) - if provider_video_id: - provider_video_type = pvtype - break + provider_video_id = self._search_regex( + r'data-ooyala-id="([^"]+)"', webpage, 'ooyala id', default=None) + if provider_video_id: + entries.append(create_entry(provider_video_id, 'ooyala')) - return { - '_type': 'url_transparent', - 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id), - 'title': title or self._og_search_title(webpage), - 'description': description or self._og_search_description(webpage), - } + volume_uuid = self._search_regex( + r'data-volume-uuid="([^"]+)"', webpage, 'volume uuid', default=None) + if volume_uuid: + volume_webpage = self._download_webpage( + 'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid) + video_data = self._parse_json(self._search_regex( + r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid) + for provider_video_type in ('ooyala', 'youtube'): + provider_video_id = video_data.get('%s_id' % provider_video_type) + if provider_video_id: + description = video_data.get('description_long') or video_data.get('description_short') + entries.append(create_entry( + provider_video_id, provider_video_type, video_data.get('title_short'), description)) + break + + if len(entries) == 1: + return entries[0] + else: + return self.playlist_result(entries, display_id, self._og_search_title(webpage), self._og_search_description(webpage)) From f9b1529af8aec98bffd42edb5be15e1ada791a20 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Mar 2016 23:50:45 +0100 Subject: [PATCH 0081/3599] [generic] remove sbnation test(handled by VoxMediaIE) --- youtube_dl/extractor/generic.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f3de738f7..589d1e152 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -406,19 +406,6 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, - # multiple ooyala embeds on SBN network websites - { - 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', - 'info_dict': { - 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok', - 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com', - }, - 'playlist_mincount': 3, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], - }, # embed.ly video { 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', From 63c55e9f2245b24f437564e616ebff76994978cf Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 1 Apr 2016 07:33:37 +0100 Subject: [PATCH 0082/3599] [cbs] improve extraction(closes #6321) --- youtube_dl/extractor/cbs.py | 78 +++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 40d07ab18..0cf56b9c7 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -1,21 +1,24 @@ from __future__ import unicode_literals -from .common import InfoExtractor +from .theplatform import ThePlatformIE from ..utils import ( - sanitized_Request, - smuggle_url, + xpath_text, + xpath_element, + int_or_none, + ExtractorError, + find_xpath_attr, ) -class CBSIE(InfoExtractor): +class CBSIE(ThePlatformIE): _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', 'info_dict': { - 'id': '4JUVEwq3wUT7', + 'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_', 'display_id': 'connect-chat-feat-garth-brooks', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Connect Chat feat. Garth Brooks', 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', 'duration': 1495, @@ -47,22 +50,55 @@ class CBSIE(InfoExtractor): 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', 'only_matching': True, }] + TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?manifest=m3u&mbr=true' + + def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): + closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL') + return { + 'en': [{ + 'ext': 'ttml', + 'url': closed_caption_e.attrib['value'], + }] + } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] def _real_extract(self, url): display_id = self._match_id(url) - request = sanitized_Request(url) - # Android UA is served with higher quality (720p) streams (see - # https://github.com/rg3/youtube-dl/issues/7490) - request.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 4.4; Nexus 5)') - webpage = self._download_webpage(request, display_id) - real_id = self._search_regex( - [r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"], - webpage, 'real video ID') - return { - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'url': smuggle_url( - 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true&manifest=m3u' % real_id, - {'force_smil_url': True}), + webpage = self._download_webpage(url, display_id) + content_id = self._search_regex( + [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], + webpage, 'content id') + items_data = self._download_xml( + 'http://can.cbs.com/thunder/player/videoPlayerService.php', + content_id, query={'partner': 'cbs', 'contentId': content_id}) + video_data = xpath_element(items_data, './/item') + title = xpath_text(video_data, 'videoTitle', 'title', True) + + subtitles = {} + formats = [] + for item in items_data.findall('.//item'): + pid = xpath_text(item, 'pid') + if not pid: + continue + try: + tp_formats, tp_subtitles = self._extract_theplatform_smil( + self.TP_RELEASE_URL_TEMPLATE % pid, content_id, 'Downloading %s SMIL data' % pid) + except ExtractorError: + continue + formats.extend(tp_formats) + subtitles = self._merge_subtitles(subtitles, tp_subtitles) + self._sort_formats(formats) + + info = self.get_metadata('dJ5BDC/media/guid/2198311517/%s' % content_id, content_id) + info.update({ + 'id': content_id, 'display_id': display_id, - } + 'title': title, + 'series': xpath_text(video_data, 'seriesTitle'), + 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), + 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), + 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), + 'thumbnail': xpath_text(video_data, 'previewImageURL'), + 'formats': formats, + 'subtitles': subtitles, + }) + return info From 1e02bc7ba28bfb5859dcd0c8d960b965d9c59f12 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 1 Apr 2016 09:07:40 +0200 Subject: [PATCH 0083/3599] release 2016.04.01 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 7 +++++-- youtube_dl/version.py | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 5b1f573e7..128ba2fc0 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.03.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.03.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.03.27 +[debug] youtube-dl version 2016.04.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 00b8c247c..b9bcf450c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -118,6 +118,7 @@ - **Clubic** - **Clyp** - **cmt.com** + - **CNBC** - **CNET** - **CNN** - **CNNArticle** @@ -134,6 +135,7 @@ - **CrooksAndLiars** - **Crunchyroll** - **crunchyroll:playlist** + - **CSNNE** - **CSpan**: C-SPAN - **CtsNews**: 華視新聞 - **culturebox.francetvinfo.fr** @@ -376,7 +378,8 @@ - **myvideo** (Currently broken) - **MyVidster** - **n-tv.de** - - **NationalGeographic** + - **natgeo** + - **natgeo:channel** - **Naver** - **NBA** - **NBC** @@ -618,7 +621,6 @@ - **Telegraaf** - **TeleMB** - **TeleTask** - - **TenPlay** - **TF1** - **TheIntercept** - **TheOnion** @@ -740,6 +742,7 @@ - **vlive** - **Vodlocker** - **VoiceRepublic** + - **VoxMedia** - **Vporn** - **vpro**: npo.nl and ntr.nl - **VRT** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5daa7f4e8..d9e1cb2a8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.03.27' +__version__ = '2016.04.01' From db1c969da5dc22b36228b50ac2c09f3a50dd17ae Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 1 Apr 2016 09:55:46 +0100 Subject: [PATCH 0084/3599] [theplatform] sign https urls --- youtube_dl/extractor/theplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 236c99972..bf6f82f5a 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -154,7 +154,7 @@ class ThePlatformIE(ThePlatformBaseIE): def hex_to_str(hex): return binascii.a2b_hex(hex) - relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0] + relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1) clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path)) checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() sig = flags + expiration_date + checksum + str_to_hex(sig_secret) From d8873d4defdd527c82634bea8566370f5d385020 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 1 Apr 2016 09:56:18 +0100 Subject: [PATCH 0085/3599] [aenetworks] improve format extraction --- youtube_dl/extractor/aenetworks.py | 32 +++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 6018ae79a..b7232c904 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -1,13 +1,19 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor -from ..utils import smuggle_url +from ..utils import ( + smuggle_url, + update_url_query, + unescapeHTML, +) class AENetworksIE(InfoExtractor): IE_NAME = 'aenetworks' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network' - _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])' _TESTS = [{ 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', @@ -25,16 +31,13 @@ class AENetworksIE(InfoExtractor): 'expected_warnings': ['JSON-LD'], }, { 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', + 'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', 'info_dict': { 'id': 'eg47EERs_JsZ', 'ext': 'mp4', 'title': 'Winter Is Coming', 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, 'add_ie': ['ThePlatform'], }, { 'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry', @@ -48,7 +51,7 @@ class AENetworksIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + page_type, video_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, video_id) @@ -56,11 +59,22 @@ class AENetworksIE(InfoExtractor): r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id, r"media_url\s*=\s*'([^']+)'" ] - video_url = self._search_regex(video_url_re, webpage, 'video url') + video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url')) + query = {'mbr': 'true'} + if page_type == 'shows': + query['assetTypes'] = 'medium_video_s3' + if 'switch=hds' in video_url: + query['switch'] = 'hls' info = self._search_json_ld(webpage, video_id, fatal=False) info.update({ '_type': 'url_transparent', - 'url': smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}}), + 'url': smuggle_url(update_url_query( + video_url, query), { + 'sig': { + 'key': 'crazyjava', + 'secret': 's3cr3t'}, + 'force_smil_url': True + }), }) return info From 3e0c3d14d9ab49f3a98324e91763c849ef52a1df Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 1 Apr 2016 10:12:29 +0100 Subject: [PATCH 0086/3599] [cbs] add base extractor --- youtube_dl/extractor/cbs.py | 22 ++++++++++++---------- youtube_dl/extractor/cbsnews.py | 13 ++----------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 0cf56b9c7..6e4079ca3 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -10,7 +10,18 @@ from ..utils import ( ) -class CBSIE(ThePlatformIE): +class CBSBaseIE(ThePlatformIE): + def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): + closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL') + return { + 'en': [{ + 'ext': 'ttml', + 'url': closed_caption_e.attrib['value'], + }] + } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] + + +class CBSIE(CBSBaseIE): _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)' _TESTS = [{ @@ -52,15 +63,6 @@ class CBSIE(ThePlatformIE): }] TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?manifest=m3u&mbr=true' - def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): - closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL') - return { - 'en': [{ - 'ext': 'ttml', - 'url': closed_caption_e.attrib['value'], - }] - } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] - def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index e6b7f3584..b5e78a65d 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -2,14 +2,14 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .theplatform import ThePlatformIE +from .cbs import CBSBaseIE from ..utils import ( parse_duration, find_xpath_attr, ) -class CBSNewsIE(ThePlatformIE): +class CBSNewsIE(CBSBaseIE): IE_DESC = 'CBS News' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' @@ -49,15 +49,6 @@ class CBSNewsIE(ThePlatformIE): }, ] - def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): - closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL') - return { - 'en': [{ - 'ext': 'ttml', - 'url': closed_caption_e.attrib['value'], - }] - } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] - def _real_extract(self, url): video_id = self._match_id(url) From 3f64379eda3477306df013466045ab1a711533f4 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 1 Apr 2016 16:22:06 +0100 Subject: [PATCH 0087/3599] [movieclips] fix extraction --- youtube_dl/extractor/movieclips.py | 43 ++++++++++++++++++------------ 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/movieclips.py b/youtube_dl/extractor/movieclips.py index 1564cb71f..d0cb8278e 100644 --- a/youtube_dl/extractor/movieclips.py +++ b/youtube_dl/extractor/movieclips.py @@ -2,39 +2,48 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import sanitized_Request +from ..utils import ( + smuggle_url, + float_or_none, + parse_iso8601, + update_url_query, +) class MovieClipsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www.)?movieclips\.com/videos/.+-(?P<id>\d+)(?:\?|$)' _TEST = { - 'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597?autoPlay=true&playlistId=5', + 'url': 'http://www.movieclips.com/videos/warcraft-trailer-1-561180739597', + 'md5': '42b5a0352d4933a7bd54f2104f481244', 'info_dict': { 'id': 'pKIGmG83AqD9', - 'display_id': 'warcraft-trailer-1-561180739597', 'ext': 'mp4', 'title': 'Warcraft Trailer 1', 'description': 'Watch Trailer 1 from Warcraft (2016). Legendary’s WARCRAFT is a 3D epic adventure of world-colliding conflict based.', 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1446843055, + 'upload_date': '20151106', + 'uploader': 'Movieclips', }, 'add_ie': ['ThePlatform'], } def _real_extract(self, url): - display_id = self._match_id(url) - - req = sanitized_Request(url) - # it doesn't work if it thinks the browser it's too old - req.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/43.0 (Chrome)') - webpage = self._download_webpage(req, display_id) - theplatform_link = self._html_search_regex(r'src="(http://player.theplatform.com/p/.*?)"', webpage, 'theplatform link') - title = self._html_search_regex(r'<title[^>]*>([^>]+)-\s*\d+\s*|\s*Movieclips.com', webpage, 'title') - description = self._html_search_meta('description', webpage) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video = next(v for v in self._parse_json(self._search_regex( + r'var\s+__REACT_ENGINE__\s*=\s*({.+});', + webpage, 'react engine'), video_id)['playlist']['videos'] if v['id'] == video_id) return { '_type': 'url_transparent', - 'url': theplatform_link, - 'title': title, - 'display_id': display_id, - 'description': description, + 'ie_key': 'ThePlatform', + 'url': smuggle_url(update_url_query( + video['contentUrl'], {'mbr': 'true'}), {'force_smil_url': True}), + 'title': self._og_search_title(webpage), + 'description': self._html_search_meta('description', webpage), + 'duration': float_or_none(video.get('duration')), + 'timestamp': parse_iso8601(video.get('dateCreated')), + 'thumbnail': video.get('defaultImage'), + 'uploader': video.get('provider'), } From 03caa463e73c2ae2f666b85febf25ddb03f961ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 Apr 2016 22:38:56 +0600 Subject: [PATCH 0088/3599] [udemy:course] Skip non-video lectures --- youtube_dl/extractor/udemy.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index a788cdd77..bc69e6e41 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -193,12 +193,12 @@ class UdemyIE(InfoExtractor): asset = lecture['asset'] - asset_type = asset.get('assetType') or asset.get('asset_type') + asset_type = asset.get('asset_type') or asset.get('assetType') if asset_type != 'Video': raise ExtractorError( 'Lecture %s is not a video' % lecture_id, expected=True) - stream_url = asset.get('streamUrl') or asset.get('stream_url') + stream_url = asset.get('stream_url') or asset.get('streamUrl') if stream_url: youtube_url = self._search_regex( r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url, 'youtube URL', default=None) @@ -206,7 +206,7 @@ class UdemyIE(InfoExtractor): return self.url_result(youtube_url, 'Youtube') video_id = asset['id'] - thumbnail = asset.get('thumbnailUrl') or asset.get('thumbnail_url') + thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl') duration = float_or_none(asset.get('data', {}).get('duration')) formats = [] @@ -325,7 +325,7 @@ class UdemyCourseIE(UdemyIE): 'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id, course_id, 'Downloading course curriculum', query={ 'fields[chapter]': 'title,object_index', - 'fields[lecture]': 'title', + 'fields[lecture]': 'title,asset', 'page_size': '1000', }) @@ -334,6 +334,11 @@ class UdemyCourseIE(UdemyIE): for entry in response['results']: clazz = entry.get('_class') if clazz == 'lecture': + asset = entry.get('asset') + if isinstance(asset, dict): + asset_type = asset.get('asset_type') or asset.get('assetType') + if asset_type != 'Video': + continue lecture_id = entry.get('id') if lecture_id: entry = { From a3373823e1bd0239e0f58d5dd16ef5a4ec6bceb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 Apr 2016 22:42:09 +0600 Subject: [PATCH 0089/3599] [udemy] Remove unnecessary login/password encode This is now covered by compat_urllib_parse_urlencode --- youtube_dl/extractor/udemy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index bc69e6e41..d1e6f2703 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -151,8 +151,8 @@ class UdemyIE(InfoExtractor): login_form = self._form_hidden_inputs('login-form', login_popup) login_form.update({ - 'email': username.encode('utf-8'), - 'password': password.encode('utf-8'), + 'email': username, + 'password': password, }) request = sanitized_Request( From fbdaced256f9d7d9b0adb97d093f0f381c9483f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 Apr 2016 22:45:20 +0600 Subject: [PATCH 0090/3599] [lynda] Remove unnecessary login/password encode --- youtube_dl/extractor/lynda.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 655627479..86d47266f 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -28,8 +28,8 @@ class LyndaBaseIE(InfoExtractor): return login_form = { - 'username': username.encode('utf-8'), - 'password': password.encode('utf-8'), + 'username': username, + 'password': password, 'remember': 'false', 'stayPut': 'false' } From 244cd04237fe4a1e4d92421711f41de3c2566d5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 Apr 2016 22:46:46 +0600 Subject: [PATCH 0091/3599] [pluralsight] Remove unnecessary login/password encode --- youtube_dl/extractor/pluralsight.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index df03dd419..9aab77645 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -64,8 +64,8 @@ class PluralsightIE(PluralsightBaseIE): login_form = self._hidden_inputs(login_page) login_form.update({ - 'Username': username.encode('utf-8'), - 'Password': password.encode('utf-8'), + 'Username': username, + 'Password': password, }) post_url = self._search_regex( From 83cedc1cf224206adf513f5bdd5f5ce915d67933 Mon Sep 17 00:00:00 2001 From: Martin Trigaux Date: Tue, 29 Mar 2016 14:18:44 +0200 Subject: [PATCH 0092/3599] screencast.com: support missing www The "www." part of the URL is not mandatory --- youtube_dl/extractor/screencast.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index dfd897ba3..d5111c629 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -12,7 +12,7 @@ from ..utils import ( class ScreencastIE(InfoExtractor): - _VALID_URL = r'https?://www\.screencast\.com/t/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'http://www.screencast.com/t/3ZEjQXlT', 'md5': '917df1c13798a3e96211dd1561fded83', @@ -34,7 +34,7 @@ class ScreencastIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', } }, { - 'url': 'http://www.screencast.com/t/aAB3iowa', + 'url': 'http://screencast.com/t/aAB3iowa', 'md5': 'dedb2734ed00c9755761ccaee88527cd', 'info_dict': { 'id': 'aAB3iowa', From 81de73e5b43e5009a14f569aed92fe73e61d4f03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 Apr 2016 23:00:45 +0600 Subject: [PATCH 0093/3599] [screencast] Add test --- youtube_dl/extractor/screencast.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index d5111c629..c69451151 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -34,7 +34,7 @@ class ScreencastIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', } }, { - 'url': 'http://screencast.com/t/aAB3iowa', + 'url': 'http://www.screencast.com/t/aAB3iowa', 'md5': 'dedb2734ed00c9755761ccaee88527cd', 'info_dict': { 'id': 'aAB3iowa', @@ -53,8 +53,10 @@ class ScreencastIE(InfoExtractor): 'description': 'md5:7b9f393bc92af02326a5c5889639eab0', 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', } - }, - ] + }, { + 'url': 'http://screencast.com/t/aAB3iowa', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 791d6aaeccd2efae2c4c5fa1e72010be85eb89b8 Mon Sep 17 00:00:00 2001 From: Martin Trigaux Date: Tue, 29 Mar 2016 14:34:58 +0200 Subject: [PATCH 0094/3599] screencast.com: fallback on page title When determining the title of the page, use the tag of the page --- youtube_dl/extractor/screencast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index c69451151..32f31fdd7 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -97,7 +97,8 @@ class ScreencastIE(InfoExtractor): if title is None: title = self._html_search_regex( [r'<b>Title:</b> ([^<]*)</div>', - r'class="tabSeperator">></span><span class="tabText">(.*?)<'], + r'class="tabSeperator">></span><span class="tabText">(.*?)<', + r'<title>([^<]*)'], webpage, 'title') thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage, default=None) From 75d572e9fb8d3e26e4ab45e65cd5e23c6b1c6915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 Apr 2016 23:01:55 +0600 Subject: [PATCH 0095/3599] [screencast] Improve title regexes (Closes #9025) --- youtube_dl/extractor/screencast.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index 32f31fdd7..356631700 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -96,9 +96,9 @@ class ScreencastIE(InfoExtractor): title = self._og_search_title(webpage, default=None) if title is None: title = self._html_search_regex( - [r'Title: ([^<]*)', - r'class="tabSeperator">>(.*?)<', - r'([^<]*)'], + [r'Title: ([^<]+)', + r'class="tabSeperator">>(.+?)<', + r'([^<]+)'], webpage, 'title') thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage, default=None) From 79ba9140dc8fcf5883b7473596e8f20cba6b479f Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 1 Apr 2016 18:06:11 +0100 Subject: [PATCH 0096/3599] [theplatform] extract timestamp and uploader --- youtube_dl/extractor/aenetworks.py | 6 ++++++ youtube_dl/extractor/bravotv.py | 3 +++ youtube_dl/extractor/cbs.py | 3 +++ youtube_dl/extractor/cnbc.py | 3 +++ youtube_dl/extractor/fox.py | 3 +++ youtube_dl/extractor/nationalgeographic.py | 12 ++++++++++++ youtube_dl/extractor/nbc.py | 14 ++++++++++++++ youtube_dl/extractor/sbs.py | 6 +++++- youtube_dl/extractor/theplatform.py | 10 ++++++++++ 9 files changed, 59 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index b7232c904..3fddaba54 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -22,6 +22,9 @@ class AENetworksIE(InfoExtractor): 'ext': 'mp4', 'title': "Bet You Didn't Know: Valentine's Day", 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', + 'timestamp': 1375819729, + 'upload_date': '20130806', + 'uploader': 'AENE-NEW', }, 'params': { # m3u8 download @@ -37,6 +40,9 @@ class AENetworksIE(InfoExtractor): 'ext': 'mp4', 'title': 'Winter Is Coming', 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', + 'timestamp': 1338306241, + 'upload_date': '20120529', + 'uploader': 'AENE-NEW', }, 'add_ie': ['ThePlatform'], }, { diff --git a/youtube_dl/extractor/bravotv.py b/youtube_dl/extractor/bravotv.py index 34d451f38..541c76944 100644 --- a/youtube_dl/extractor/bravotv.py +++ b/youtube_dl/extractor/bravotv.py @@ -15,6 +15,9 @@ class BravoTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Last Chance Kitchen Returns', 'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13', + 'timestamp': 1448926740, + 'upload_date': '20151130', + 'uploader': 'NBCU-BRAV', } } diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 6e4079ca3..c621a08d5 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -33,6 +33,9 @@ class CBSIE(CBSBaseIE): 'title': 'Connect Chat feat. Garth Brooks', 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', 'duration': 1495, + 'timestamp': 1385585425, + 'upload_date': '20131127', + 'uploader': 'CBSI-NEW', }, 'params': { # rtmp download diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index 25b308752..d354d9f95 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -14,6 +14,9 @@ class CNBCIE(InfoExtractor): 'ext': 'mp4', 'title': 'Fighting zombies is big business', 'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e', + 'timestamp': 1459332000, + 'upload_date': '20160330', + 'uploader': 'NBCU-CNBC', }, 'params': { # m3u8 download diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index fa05af50d..95c1abf94 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -16,6 +16,9 @@ class FOXIE(InfoExtractor): 'title': 'Official Trailer: Gotham', 'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.', 'duration': 129, + 'timestamp': 1400020798, + 'upload_date': '20140513', + 'uploader': 'NEWA-FNG-FOXCOM', }, 'add_ie': ['ThePlatform'], } diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 61b5c700e..722518663 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -21,6 +21,9 @@ class NationalGeographicIE(InfoExtractor): 'ext': 'mp4', 'title': 'Mating Crabs Busted by Sharks', 'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3', + 'timestamp': 1423523799, + 'upload_date': '20150209', + 'uploader': 'NAGS', }, 'add_ie': ['ThePlatform'], }, @@ -32,6 +35,9 @@ class NationalGeographicIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Real Jaws', 'description': 'md5:8d3e09d9d53a85cd397b4b21b2c77be6', + 'timestamp': 1433772632, + 'upload_date': '20150608', + 'uploader': 'NAGS', }, 'add_ie': ['ThePlatform'], }, @@ -68,6 +74,9 @@ class NationalGeographicChannelIE(InfoExtractor): 'ext': 'mp4', 'title': 'Uncovering a Universal Knowledge', 'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a', + 'timestamp': 1458680907, + 'upload_date': '20160322', + 'uploader': 'NEWA-FNG-NGTV', }, 'add_ie': ['ThePlatform'], }, @@ -79,6 +88,9 @@ class NationalGeographicChannelIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Stunning Red Bird of Paradise', 'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c', + 'timestamp': 1459362152, + 'upload_date': '20160330', + 'uploader': 'NEWA-FNG-NGTV', }, 'add_ie': ['ThePlatform'], }, diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 43d75d3ca..e67025ff6 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -27,6 +27,9 @@ class NBCIE(InfoExtractor): 'ext': 'mp4', 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', + 'timestamp': 1424246400, + 'upload_date': '20150218', + 'uploader': 'NBCU-COM', }, 'params': { # m3u8 download @@ -50,6 +53,9 @@ class NBCIE(InfoExtractor): 'ext': 'mp4', 'title': 'Star Wars Teaser', 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442', + 'timestamp': 1417852800, + 'upload_date': '20141206', + 'uploader': 'NBCU-COM', }, 'params': { # m3u8 download @@ -78,6 +84,7 @@ class NBCIE(InfoExtractor): theplatform_url = 'http:' + theplatform_url return { '_type': 'url_transparent', + 'ie_key': 'ThePlatform', 'url': smuggle_url(theplatform_url, {'source_url': url}), 'id': video_id, } @@ -93,6 +100,9 @@ class NBCSportsVPlayerIE(InfoExtractor): 'ext': 'flv', 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', + 'timestamp': 1426270238, + 'upload_date': '20150313', + 'uploader': 'NBCU-SPORTS', } }, { 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z', @@ -144,6 +154,9 @@ class CSNNEIE(InfoExtractor): 'ext': 'mp4', 'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.', 'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3', + 'timestamp': 1459369979, + 'upload_date': '20160330', + 'uploader': 'NBCU-SPORTS', } } @@ -331,6 +344,7 @@ class MSNBCIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', 'timestamp': 1406937606, 'upload_date': '20140802', + 'uploader': 'NBCU-NEWS', 'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'], }, } diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index 2f96477ca..96472fbc4 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -24,6 +24,9 @@ class SBSIE(InfoExtractor): 'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5', 'thumbnail': 're:http://.*\.jpg', 'duration': 308, + 'timestamp': 1408613220, + 'upload_date': '20140821', + 'uploader': 'SBSC', }, }, { 'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed', @@ -57,6 +60,7 @@ class SBSIE(InfoExtractor): return { '_type': 'url_transparent', + 'ie_key': 'ThePlatform', 'id': video_id, - 'url': smuggle_url(theplatform_url, {'force_smil_url': True}), + 'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}), } diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index bf6f82f5a..6da701a39 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -76,6 +76,8 @@ class ThePlatformBaseIE(OnceIE): 'description': info['description'], 'thumbnail': info['defaultThumbnailUrl'], 'duration': int_or_none(info.get('duration'), 1000), + 'timestamp': int_or_none(info.get('pubDate'), 1000) or None, + 'uploader': info.get('billingCode'), } @@ -94,6 +96,9 @@ class ThePlatformIE(ThePlatformBaseIE): 'title': 'Blackberry\'s big, bold Z30', 'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.', 'duration': 247, + 'timestamp': 1383239700, + 'upload_date': '20131031', + 'uploader': 'CBSI-NEW', }, 'params': { # rtmp download @@ -107,6 +112,9 @@ class ThePlatformIE(ThePlatformBaseIE): 'ext': 'flv', 'description': 'md5:ac330c9258c04f9d7512cf26b9595409', 'title': 'Tesla Model S: A second step towards a cleaner motoring future', + 'timestamp': 1426176191, + 'upload_date': '20150312', + 'uploader': 'CBSI-NEW', }, 'params': { # rtmp download @@ -119,6 +127,7 @@ class ThePlatformIE(ThePlatformBaseIE): 'ext': 'mp4', 'description': 'md5:644ad9188d655b742f942bf2e06b002d', 'title': 'HIGHLIGHTS: USA bag first ever series Cup win', + 'uploader': 'EGSM', } }, { 'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7', @@ -135,6 +144,7 @@ class ThePlatformIE(ThePlatformBaseIE): 'thumbnail': 're:^https?://.*\.jpg$', 'timestamp': 1435752600, 'upload_date': '20150701', + 'uploader': 'NBCU-NEWS', }, }, { # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1 From 04819db58edfa7a169e7ba6fd2d5734500754571 Mon Sep 17 00:00:00 2001 From: theGeekPirate Date: Sat, 26 Mar 2016 05:37:40 -0700 Subject: [PATCH 0097/3599] [camwithher] Add extractor Corrected unnecessary test Sane variable naming RTMP all .flv & url_id for _download_webpage() Corrected all outstanding issues, next up is a squash! --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/camwithher.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/camwithher.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 19f802411..438e1cc63 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -95,6 +95,7 @@ from .camdemy import ( CamdemyIE, CamdemyFolderIE ) +from .camwithher import CamWithHerIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE from .canvas import CanvasIE diff --git a/youtube_dl/extractor/camwithher.py b/youtube_dl/extractor/camwithher.py new file mode 100644 index 000000000..eb0a4ec56 --- /dev/null +++ b/youtube_dl/extractor/camwithher.py @@ -0,0 +1,55 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class CamWithHerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*viewkey=(?P\w+)' + + _TESTS = [ + { + 'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=', + 'info_dict': { + 'id': '5644', + 'ext': 'flv', + 'title': 'Periscope Tease', + }, + 'params': { + 'skip_download': True, + } + }, + { + 'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937', + 'only_matching': True, + }, + { + 'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=', + 'only_matching': True, + }, + { + 'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv', + 'only_matching': True, + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + flv_id = self._html_search_regex(r' 2010 else flv_id) + + title = self._html_search_regex(r'
\s+

(.+?)

', webpage, 'title') + + return { + 'id': flv_id, + 'url': rtmp_url, + 'no_resume': True, + 'ext': 'flv', + 'title': title, + } From 9aaaf8e8e8ae12ed8fbc62461558a4cdb8640ad5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 Apr 2016 23:47:27 +0600 Subject: [PATCH 0098/3599] [camwithher] Improve extraction (Closes #8989) --- youtube_dl/extractor/camwithher.py | 95 ++++++++++++++++++++---------- 1 file changed, 64 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/camwithher.py b/youtube_dl/extractor/camwithher.py index eb0a4ec56..9809096ec 100644 --- a/youtube_dl/extractor/camwithher.py +++ b/youtube_dl/extractor/camwithher.py @@ -1,55 +1,88 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + unified_strdate, +) class CamWithHerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*viewkey=(?P\w+)' + _VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P\w+)' - _TESTS = [ - { - 'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=', - 'info_dict': { - 'id': '5644', - 'ext': 'flv', - 'title': 'Periscope Tease', - }, - 'params': { - 'skip_download': True, - } + _TESTS = [{ + 'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=', + 'info_dict': { + 'id': '5644', + 'ext': 'flv', + 'title': 'Periscope Tease', + 'description': 'In the clouds teasing on periscope to my favorite song', + 'duration': 240, + 'view_count': int, + 'comment_count': int, + 'uploader': 'MileenaK', + 'upload_date': '20160322', }, - { - 'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937', - 'only_matching': True, - }, - { - 'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=', - 'only_matching': True, - }, - { - 'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv', - 'only_matching': True, + 'params': { + 'skip_download': True, } - ] + }, { + 'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937', + 'only_matching': True, + }, { + 'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=', + 'only_matching': True, + }, { + 'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - flv_id = self._html_search_regex(r'
2010 else flv_id) + # Video URL construction algorithm is reverse-engineered from cwhplayer.swf + rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % ( + ('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id) + + title = self._html_search_regex( + r']+style="float:left"[^>]*>\s*

(.+?)

', webpage, 'title') + description = self._html_search_regex( + r'>Description:(.+?)
', webpage, 'description', default=None) + + runtime = self._search_regex( + r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None) + if runtime: + runtime = re.sub(r'[\s-]', '', runtime) + duration = parse_duration(runtime) + view_count = int_or_none(self._search_regex( + r'Views\s*:\s*(\d+)', webpage, 'view count', default=None)) + comment_count = int_or_none(self._search_regex( + r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None)) + + uploader = self._search_regex( + r'Added by\s*:\s*]+>([^<]+)', webpage, 'uploader', default=None) + upload_date = unified_strdate(self._search_regex( + r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None)) - title = self._html_search_regex(r'
\s+

(.+?)

', webpage, 'title') return { 'id': flv_id, 'url': rtmp_url, - 'no_resume': True, 'ext': 'flv', + 'no_resume': True, 'title': title, + 'description': description, + 'duration': duration, + 'view_count': view_count, + 'comment_count': comment_count, + 'uploader': uploader, + 'upload_date': upload_date, } From 329c1eae54bf71ae8602f79f71570eaf90ef7d2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 1 Apr 2016 20:42:19 +0200 Subject: [PATCH 0099/3599] [aenetworks] Make pep8 happy --- youtube_dl/extractor/aenetworks.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 3fddaba54..1bbfe2641 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -75,8 +75,9 @@ class AENetworksIE(InfoExtractor): info = self._search_json_ld(webpage, video_id, fatal=False) info.update({ '_type': 'url_transparent', - 'url': smuggle_url(update_url_query( - video_url, query), { + 'url': smuggle_url( + update_url_query(video_url, query), + { 'sig': { 'key': 'crazyjava', 'secret': 's3cr3t'}, From 0f28777f58b5c21226d8e02477834dbb08b170ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 1 Apr 2016 20:43:14 +0200 Subject: [PATCH 0100/3599] [cbsnews] Remove unused import --- youtube_dl/extractor/cbsnews.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index b5e78a65d..79ddc20a0 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -5,7 +5,6 @@ from .common import InfoExtractor from .cbs import CBSBaseIE from ..utils import ( parse_duration, - find_xpath_attr, ) From 6d628fafcadf6b9d2bc16c34c8cda8b53860e406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 1 Apr 2016 20:45:21 +0200 Subject: [PATCH 0101/3599] [camwithher] Remove extra blank line --- youtube_dl/extractor/camwithher.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/camwithher.py b/youtube_dl/extractor/camwithher.py index 9809096ec..afbc5ea26 100644 --- a/youtube_dl/extractor/camwithher.py +++ b/youtube_dl/extractor/camwithher.py @@ -72,7 +72,6 @@ class CamWithHerIE(InfoExtractor): upload_date = unified_strdate(self._search_regex( r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None)) - return { 'id': flv_id, 'url': rtmp_url, From df634be2ed85b33968973a3e85935bb5d578ce42 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 1 Apr 2016 19:39:02 +0100 Subject: [PATCH 0102/3599] [common] prefer using mime type over ext for smil subtitle extraction the subtitle ext for http://www.cnet.com/videos/download-amazon-prime-movies-and-tv/ is adb_xml while using the mime type it get tt(application/smptett+xml) --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 85ac0400c..94a583891 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1335,7 +1335,7 @@ class InfoExtractor(object): if not src or src in urls: continue urls.append(src) - ext = textstream.get('ext') or determine_ext(src) or mimetype2ext(textstream.get('type')) + ext = textstream.get('ext') or mimetype2ext(textstream.get('type')) or determine_ext(src) lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang subtitles.setdefault(lang, []).append({ 'url': src, From 0750b2491f5f14e51c2bf91584fd490944154393 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 1 Apr 2016 19:47:20 +0100 Subject: [PATCH 0103/3599] [ffmpeg] try to convert tt subtitles usng dfxp2srt --- youtube_dl/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 06b8c0548..b64cd396b 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -536,7 +536,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): sub_filenames.append(old_file) new_file = subtitles_filename(filename, lang, new_ext) - if ext == 'dfxp' or ext == 'ttml': + if ext == 'dfxp' or ext == 'ttml' or ext == 'tt': self._downloader.report_warning( 'You have requested to convert dfxp (TTML) subtitles into another format, ' 'which results in style information loss') From 5f705baf5ecda6be678481ff9ab9c27a6cd54dc0 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 1 Apr 2016 20:42:15 +0100 Subject: [PATCH 0104/3599] [cnet] extract more formats --- youtube_dl/extractor/cnet.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py index c154b3e19..58c26f20f 100644 --- a/youtube_dl/extractor/cnet.py +++ b/youtube_dl/extractor/cnet.py @@ -17,6 +17,8 @@ class CNETIE(ThePlatformIE): 'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861', 'uploader': 'Sarah Mitroff', 'duration': 70, + 'timestamp': 1396479627, + 'upload_date': '20140402', }, }, { 'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/', @@ -28,8 +30,11 @@ class CNETIE(ThePlatformIE): 'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40', 'uploader': 'Ashley Esqueda', 'duration': 1482, + 'timestamp': 1433289889, + 'upload_date': '20150603', }, }] + TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true' def _real_extract(self, url): display_id = self._match_id(url) @@ -51,16 +56,12 @@ class CNETIE(ThePlatformIE): uploader = None uploader_id = None - metadata = self.get_metadata('kYEXFC/%s' % list(vdata['files'].values())[0], video_id) - description = vdata.get('description') or metadata.get('description') - duration = int_or_none(vdata.get('duration')) or metadata.get('duration') - - formats = [] - subtitles = {} + media_guid_path = 'media/guid/2288573011/%s' % vdata['mpxRefId'] + formats, subtitles = self._extract_theplatform_smil(self.TP_RELEASE_URL_TEMPLATE % media_guid_path, video_id) for (fkey, vid) in vdata['files'].items(): if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']: continue - release_url = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true' % vid + release_url = self.TP_RELEASE_URL_TEMPLATE % vid if fkey == 'hds': release_url += '&manifest=f4m' tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey) @@ -68,15 +69,15 @@ class CNETIE(ThePlatformIE): subtitles = self._merge_subtitles(subtitles, tp_subtitles) self._sort_formats(formats) - return { + info = self.get_metadata('kYEXFC/%s' % media_guid_path, video_id) + info.update({ 'id': video_id, 'display_id': display_id, 'title': title, - 'description': description, - 'thumbnail': metadata.get('thumbnail'), - 'duration': duration, + 'duration': int_or_none(vdata.get('duration')), 'uploader': uploader, 'uploader_id': uploader_id, 'subtitles': subtitles, 'formats': formats, - } + }) + return info From fe7ef95e91cec1c1794692029561a68e2aaa7809 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 1 Apr 2016 23:53:32 +0100 Subject: [PATCH 0105/3599] [cbsinteractive] Add support for ZDNet videos --- youtube_dl/extractor/__init__.py | 2 +- .../extractor/{cnet.py => cbsinteractive.py} | 37 ++++++++++++++++--- 2 files changed, 32 insertions(+), 7 deletions(-) rename youtube_dl/extractor/{cnet.py => cbsinteractive.py} (69%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 438e1cc63..ebf5ca3e9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -104,6 +104,7 @@ from .cbc import ( CBCPlayerIE, ) from .cbs import CBSIE +from .cbsinteractive import CBSInteractiveIE from .cbsnews import ( CBSNewsIE, CBSNewsLiveVideoIE, @@ -129,7 +130,6 @@ from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE from .cnbc import CNBCIE -from .cnet import CNETIE from .cnn import ( CNNIE, CNNBlogsIE, diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cbsinteractive.py similarity index 69% rename from youtube_dl/extractor/cnet.py rename to youtube_dl/extractor/cbsinteractive.py index 58c26f20f..0011c3029 100644 --- a/youtube_dl/extractor/cnet.py +++ b/youtube_dl/extractor/cbsinteractive.py @@ -1,12 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .theplatform import ThePlatformIE from ..utils import int_or_none -class CNETIE(ThePlatformIE): - _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P[^/]+)/' +class CBSInteractiveIE(ThePlatformIE): + _VALID_URL = r'https?://(?:www\.)?(?Pcnet|zdnet)\.com/(?:videos|video/share)/(?P[^/?]+)' _TESTS = [{ 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', 'info_dict': { @@ -33,15 +35,35 @@ class CNETIE(ThePlatformIE): 'timestamp': 1433289889, 'upload_date': '20150603', }, + }, { + 'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/', + 'info_dict': { + 'id': 'bc1af9f0-a2b5-4e54-880d-0d95525781c0', + 'ext': 'mp4', + 'title': 'Video: Keeping Android smartphones and tablets secure', + 'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.', + 'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0', + 'uploader': 'Adrian Kingsley-Hughes', + 'timestamp': 1448961720, + 'upload_date': '20151201', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } }] TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true' + MPX_ACCOUNTS = { + 'cnet': 2288573011, + 'zdnet': 2387448114, + } def _real_extract(self, url): - display_id = self._match_id(url) + site, display_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, display_id) data_json = self._html_search_regex( - r"data-cnet-video(?:-uvp)?-options='([^']+)'", + r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'", webpage, 'data json') data = self._parse_json(data_json, display_id) vdata = data.get('video') or data['videos'][0] @@ -56,8 +78,11 @@ class CNETIE(ThePlatformIE): uploader = None uploader_id = None - media_guid_path = 'media/guid/2288573011/%s' % vdata['mpxRefId'] - formats, subtitles = self._extract_theplatform_smil(self.TP_RELEASE_URL_TEMPLATE % media_guid_path, video_id) + media_guid_path = 'media/guid/%d/%s' % (self.MPX_ACCOUNTS[site], vdata['mpxRefId']) + formats, subtitles = [], {} + if site == 'cnet': + formats, subtitles = self._extract_theplatform_smil( + self.TP_RELEASE_URL_TEMPLATE % media_guid_path, video_id) for (fkey, vid) in vdata['files'].items(): if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']: continue From 08136dc13805abb1832587e03e68066f07bd5776 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 2 Apr 2016 10:57:57 +0100 Subject: [PATCH 0106/3599] [brightcove] fix format sorting --- youtube_dl/extractor/brightcove.py | 3 ++- youtube_dl/extractor/thestar.py | 4 ++++ youtube_dl/extractor/tv3.py | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 0d162d337..a8919001d 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -396,6 +396,7 @@ class BrightcoveNewIE(InfoExtractor): 'formats': 'mincount:41', }, 'params': { + # m3u8 download 'skip_download': True, } }, { @@ -533,7 +534,7 @@ class BrightcoveNewIE(InfoExtractor): f.update({ 'url': src or streaming_src, 'format_id': build_format_id('http' if src else 'http-streaming'), - 'preference': 2 if src else 1, + 'source_preference': 0 if src else -1, }) else: f.update({ diff --git a/youtube_dl/extractor/thestar.py b/youtube_dl/extractor/thestar.py index b7e9af2af..ba1380abc 100644 --- a/youtube_dl/extractor/thestar.py +++ b/youtube_dl/extractor/thestar.py @@ -19,6 +19,10 @@ class TheStarIE(InfoExtractor): 'uploader_id': '794267642001', 'timestamp': 1454353482, 'upload_date': '20160201', + }, + 'params': { + # m3u8 download + 'skip_download': True, } } BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/794267642001/default_default/index.html?videoId=%s' diff --git a/youtube_dl/extractor/tv3.py b/youtube_dl/extractor/tv3.py index d3f690dc7..3867ec90d 100644 --- a/youtube_dl/extractor/tv3.py +++ b/youtube_dl/extractor/tv3.py @@ -21,6 +21,7 @@ class TV3IE(InfoExtractor): 'Failed to download MPD manifest' ], 'params': { + # m3u8 download 'skip_download': True, }, } From db8ee7ec0598f8893e4259ac9373c44726e4f84f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 2 Apr 2016 18:48:05 +0600 Subject: [PATCH 0107/3599] [extractor/common] Fix numeric identifiers conversion in DASH URL templates --- youtube_dl/extractor/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 94a583891..011edcc0a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1515,7 +1515,8 @@ class InfoExtractor(object): representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) media_template = representation_ms_info['media_template'] media_template = media_template.replace('$RepresentationID$', representation_id) - media_template = re.sub(r'\$(Number|Bandwidth)(?:%(0\d+)d)?\$', r'%(\1)\2d', media_template) + media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template) + media_template = re.sub(r'\$(Number|Bandwidth)%(\d+)\$', r'%(\1)\2d', media_template) media_template.replace('$$', '$') representation_ms_info['segment_urls'] = [media_template % {'Number': segment_number, 'Bandwidth': representation_attrib.get('bandwidth')} for segment_number in range(representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])] if 'segment_urls' in representation_ms_info: From b507cc925b8dbb37b0abce748ff73a7ad102494a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 2 Apr 2016 18:49:58 +0600 Subject: [PATCH 0108/3599] [extractor/common] Carry long line --- youtube_dl/extractor/common.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 011edcc0a..ec6625eea 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1518,7 +1518,13 @@ class InfoExtractor(object): media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template) media_template = re.sub(r'\$(Number|Bandwidth)%(\d+)\$', r'%(\1)\2d', media_template) media_template.replace('$$', '$') - representation_ms_info['segment_urls'] = [media_template % {'Number': segment_number, 'Bandwidth': representation_attrib.get('bandwidth')} for segment_number in range(representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])] + representation_ms_info['segment_urls'] = [ + media_template % { + 'Number': segment_number, + 'Bandwidth': representation_attrib.get('bandwidth')} + for segment_number in range( + representation_ms_info['start_number'], + representation_ms_info['total_number'] + representation_ms_info['start_number'])] if 'segment_urls' in representation_ms_info: f.update({ 'segment_urls': representation_ms_info['segment_urls'], From bbc26c8a012d215e98a98a671471cd75e7765286 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 2 Apr 2016 19:00:38 +0600 Subject: [PATCH 0109/3599] [bbc] Set vcodec to none for audio formats --- youtube_dl/extractor/bbc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index dedf721bd..425f08f2b 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -328,6 +328,7 @@ class BBCCoUkIE(InfoExtractor): 'format_id': '%s_%s' % (service, format['format_id']), 'abr': abr, 'acodec': acodec, + 'vcodec': 'none', }) formats.extend(conn_formats) return formats From 2defa7d75aa424c16ca76a25a05297daed0bb5bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 2 Apr 2016 18:01:58 +0200 Subject: [PATCH 0110/3599] [instagram:user] Fix extraction (fixes #9059) The URL for the next page was incorrect and we always got the same page, therefore it got trapped in an infinite loop. --- youtube_dl/extractor/instagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 4e62098b0..11bb58d8a 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -152,7 +152,7 @@ class InstagramUserIE(InfoExtractor): if not page['items']: break - max_id = page['items'][-1]['id'] + max_id = page['items'][-1]['id'].split('_')[0] media_url = ( 'http://instagram.com/%s/media?max_id=%s' % ( uploader_id, max_id)) From 41f5492fbcddfcbae133dc27e8d94ece3755df2e Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 2 Apr 2016 18:54:40 +0100 Subject: [PATCH 0111/3599] [brightcove:legacy] improve format extraction and extract uploader_id, duration and timestamp --- youtube_dl/extractor/brightcove.py | 48 +++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index a8919001d..a5091238b 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -46,6 +46,9 @@ class BrightcoveLegacyIE(InfoExtractor): 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', 'uploader': '8TV', 'description': 'md5:a950cc4285c43e44d763d036710cd9cd', + 'timestamp': 1368213670, + 'upload_date': '20130510', + 'uploader_id': 1589608506001, } }, { @@ -57,6 +60,9 @@ class BrightcoveLegacyIE(InfoExtractor): 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.', 'uploader': 'Oracle', + 'timestamp': 1344975024, + 'upload_date': '20120814', + 'uploader_id': 1460825906, }, }, { @@ -68,6 +74,9 @@ class BrightcoveLegacyIE(InfoExtractor): 'title': 'This Bracelet Acts as a Personal Thermostat', 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0', 'uploader': 'Mashable', + 'timestamp': 1382041798, + 'upload_date': '20131017', + 'uploader_id': 1130468786001, }, }, { @@ -85,14 +94,17 @@ class BrightcoveLegacyIE(InfoExtractor): { # test flv videos served by akamaihd.net # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william - 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3ABC2996102916001&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D', + 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D', # The md5 checksum changes on each download 'info_dict': { - 'id': '2996102916001', + 'id': '3750436379001', 'ext': 'flv', 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', - 'uploader': 'Red Bull TV', + 'uploader': 'RBTV Old (do not use)', 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', + 'timestamp': 1409122195, + 'upload_date': '20140827', + 'uploader_id': 710858724001, }, }, { @@ -106,6 +118,12 @@ class BrightcoveLegacyIE(InfoExtractor): 'playlist_mincount': 7, }, ] + FLV_VCODECS = { + 1: 'SORENSON', + 2: 'ON2', + 3: 'H264', + 4: 'VP8', + } @classmethod def _build_brighcove_url(cls, object_str): @@ -295,6 +313,9 @@ class BrightcoveLegacyIE(InfoExtractor): 'description': video_info.get('shortDescription'), 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), 'uploader': video_info.get('publisherName'), + 'uploader_id': video_info.get('publisherId'), + 'duration': float_or_none(video_info.get('length'), 1000), + 'timestamp': int_or_none(video_info.get('creationDate'), 1000), } renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', []) @@ -318,19 +339,30 @@ class BrightcoveLegacyIE(InfoExtractor): ext = 'flv' if ext is None: ext = determine_ext(url) - size = rend.get('size') + tbr = int_or_none(rend.get('encodingRate'), 1000), a_format = { + 'format_id': 'http%s' % ('-%s' % tbr if tbr else ''), 'url': url, 'ext': ext, - 'height': rend.get('frameHeight'), - 'width': rend.get('frameWidth'), - 'filesize': size if size != 0 else None, + 'filesize': int_or_none(rend.get('size')) or None, + 'tbr': tbr, } + if rend.get('audioOnly'): + a_format.update({ + 'vcodec': 'none', + }) + else: + a_format.update({ + 'height': int_or_none(rend.get('frameHeight')), + 'width': int_or_none(rend.get('frameWidth')), + 'vcodec': rend.get('videoCodec'), + }) # m3u8 manifests with remote == false are media playlists # Not calling _extract_m3u8_formats here to save network traffic if ext == 'm3u8': a_format.update({ + 'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''), 'ext': 'mp4', 'protocol': 'm3u8', }) @@ -341,6 +373,8 @@ class BrightcoveLegacyIE(InfoExtractor): elif video_info.get('FLVFullLengthURL') is not None: info.update({ 'url': video_info['FLVFullLengthURL'], + 'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')), + 'filesize': int_or_none(video_info.get('FLVFullSize')), }) if self._downloader.params.get('include_ads', False): From e47d19e991456fe4afdab1a76a653f7821e99c3f Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 2 Apr 2016 18:56:01 +0100 Subject: [PATCH 0112/3599] [brightcove:new] extract subtitles and strip video title --- youtube_dl/extractor/brightcove.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index a5091238b..6128b6762 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -515,7 +515,7 @@ class BrightcoveNewIE(InfoExtractor): raise ExtractorError(json_data[0]['message'], expected=True) raise - title = json_data['name'] + title = json_data['name'].strip() formats = [] for source in json_data.get('sources', []): @@ -579,20 +579,22 @@ class BrightcoveNewIE(InfoExtractor): formats.append(f) self._sort_formats(formats) - description = json_data.get('description') - thumbnail = json_data.get('thumbnail') - timestamp = parse_iso8601(json_data.get('published_at')) - duration = float_or_none(json_data.get('duration'), 1000) - tags = json_data.get('tags', []) + subtitles = {} + for text_track in json_data.get('text_tracks', []): + if text_track.get('src'): + subtitles.setdefault(text_track.get('srclang'), []).append({ + 'url': text_track['src'], + }) return { 'id': video_id, 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, + 'description': json_data.get('description'), + 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), + 'duration': float_or_none(json_data.get('duration'), 1000), + 'timestamp': parse_iso8601(json_data.get('published_at')), 'uploader_id': account_id, 'formats': formats, - 'tags': tags, + 'subtitles': subtitles, + 'tags': json_data.get('tags', []), } From 3aac9b2fb1a103f1d350ba10060e59bb04a6a2e8 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 2 Apr 2016 18:56:31 +0100 Subject: [PATCH 0113/3599] [nowness] update tests --- youtube_dl/extractor/nowness.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py index 446f5901c..74860eb20 100644 --- a/youtube_dl/extractor/nowness.py +++ b/youtube_dl/extractor/nowness.py @@ -63,8 +63,11 @@ class NownessIE(NownessBaseIE): 'title': 'Candor: The Art of Gesticulation', 'description': 'Candor: The Art of Gesticulation', 'thumbnail': 're:^https?://.*\.jpg', - 'uploader': 'Nowness', + 'timestamp': 1446745676, + 'upload_date': '20151105', + 'uploader_id': '2385340575001', }, + 'add_ie': ['BrightcoveNew'], }, { 'url': 'https://cn.nowness.com/story/kasper-bjorke-ft-jaakko-eino-kalevi-tnr', 'md5': 'e79cf125e387216f86b2e0a5b5c63aa3', @@ -74,8 +77,11 @@ class NownessIE(NownessBaseIE): 'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR', 'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR', 'thumbnail': 're:^https?://.*\.jpg', - 'uploader': 'Nowness', + 'timestamp': 1407315371, + 'upload_date': '20140806', + 'uploader_id': '2385340575001', }, + 'add_ie': ['BrightcoveNew'], }, { # vimeo 'url': 'https://www.nowness.com/series/nowness-picks/jean-luc-godard-supercut', @@ -90,6 +96,7 @@ class NownessIE(NownessBaseIE): 'uploader': 'Cinema Sem Lei', 'uploader_id': 'cinemasemlei', }, + 'add_ie': ['Vimeo'], }] def _real_extract(self, url): From 4d4cd35f485c652a39a631fbf3d40c9f4353e807 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 2 Apr 2016 20:55:44 +0100 Subject: [PATCH 0114/3599] [brightcove:legacy] extract uploader_id as a string --- youtube_dl/extractor/brightcove.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 6128b6762..f9056f514 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -48,7 +48,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'description': 'md5:a950cc4285c43e44d763d036710cd9cd', 'timestamp': 1368213670, 'upload_date': '20130510', - 'uploader_id': 1589608506001, + 'uploader_id': '1589608506001', } }, { @@ -62,7 +62,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'uploader': 'Oracle', 'timestamp': 1344975024, 'upload_date': '20120814', - 'uploader_id': 1460825906, + 'uploader_id': '1460825906', }, }, { @@ -76,7 +76,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'uploader': 'Mashable', 'timestamp': 1382041798, 'upload_date': '20131017', - 'uploader_id': 1130468786001, + 'uploader_id': '1130468786001', }, }, { @@ -104,7 +104,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', 'timestamp': 1409122195, 'upload_date': '20140827', - 'uploader_id': 710858724001, + 'uploader_id': '710858724001', }, }, { @@ -313,7 +313,7 @@ class BrightcoveLegacyIE(InfoExtractor): 'description': video_info.get('shortDescription'), 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), 'uploader': video_info.get('publisherName'), - 'uploader_id': video_info.get('publisherId'), + 'uploader_id': compat_str(video_info.get('publisherId')), 'duration': float_or_none(video_info.get('length'), 1000), 'timestamp': int_or_none(video_info.get('creationDate'), 1000), } From 23576edbfcaa3d7f0283631516c82715a85c6856 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 2 Apr 2016 21:31:21 +0100 Subject: [PATCH 0115/3599] [brightcove:legacy] skip None value for uploader_id --- youtube_dl/extractor/brightcove.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index f9056f514..c718cf385 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -307,13 +307,14 @@ class BrightcoveLegacyIE(InfoExtractor): playlist_title=playlist_info['mediaCollectionDTO']['displayName']) def _extract_video_info(self, video_info): + publisher_id = video_info.get('publisherId') info = { 'id': compat_str(video_info['id']), 'title': video_info['displayName'].strip(), 'description': video_info.get('shortDescription'), 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), 'uploader': video_info.get('publisherName'), - 'uploader_id': compat_str(video_info.get('publisherId')), + 'uploader_id': compat_str(publisher_id) if publisher_id else None, 'duration': float_or_none(video_info.get('length'), 1000), 'timestamp': int_or_none(video_info.get('creationDate'), 1000), } From 6d4fc66bfc9bb3ed2a4f68366f372a9bedf6e708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 4 Apr 2016 02:26:20 +0600 Subject: [PATCH 0116/3599] [youtube] Add support for zwearz (Closes #9062) --- youtube_dl/extractor/youtube.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 28355bf46..188066561 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -270,7 +270,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): )) |(?: youtu\.be| # just youtu.be/xxxx - vid\.plus # or vid.plus/xxxx + vid\.plus| # or vid.plus/xxxx + zwearz\.com/watch| # or zwearz.com/watch/xxxx )/ |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= ) @@ -758,6 +759,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'http://vid.plus/FlRa-iH7PGw', 'only_matching': True, }, + { + 'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html', + 'only_matching': True, + }, { # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468) # Also tests cut-off URL expansion in video description (see From 8c7d6e8e2279beccf638cd0fae9d91876e0486b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 4 Apr 2016 20:44:06 +0200 Subject: [PATCH 0117/3599] [zdf] Extract subtitles (closes #9081) --- youtube_dl/extractor/zdf.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 81c22a627..2ef177275 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -85,6 +85,13 @@ class ZDFIE(InfoExtractor): uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader') uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id') upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date')) + subtitles = {} + captions_url = doc.find('.//caption/url') + if captions_url is not None: + subtitles['de'] = [{ + 'url': captions_url.text, + 'ext': 'ttml', + }] def xml_to_thumbnails(fnode): thumbnails = [] @@ -190,6 +197,7 @@ class ZDFIE(InfoExtractor): 'uploader_id': uploader_id, 'upload_date': upload_date, 'formats': formats, + 'subtitles': subtitles, } def _real_extract(self, url): From 5bf28d7864d83be98233b6d1e478d7911f99e2cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 4 Apr 2016 20:46:35 +0200 Subject: [PATCH 0118/3599] [utils] dfxp2srt: add additional namespace Used by the ZDF subtitles (#9081). --- youtube_dl/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5c4ab2748..8e53962c9 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2131,6 +2131,7 @@ def dfxp2srt(dfxp_data): _x = functools.partial(xpath_with_ns, ns_map={ 'ttml': 'http://www.w3.org/ns/ttml', 'ttaf1': 'http://www.w3.org/2006/10/ttaf1', + 'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1', }) class TTMLPElementParser(object): @@ -2157,7 +2158,7 @@ def dfxp2srt(dfxp_data): dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) out = [] - paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p') + paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall(_x('.//ttaf1_0604:p')) or dfxp.findall('.//p') if not paras: raise ValueError('Invalid dfxp/TTML subtitle') From 995cf05c96e888f7f1a818f9886345ea25671aa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Apr 2016 21:40:43 +0600 Subject: [PATCH 0119/3599] [novamov] Make title fatal --- youtube_dl/extractor/novamov.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index a131f7dbd..63a77b9bf 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -81,7 +81,7 @@ class NovaMovIE(InfoExtractor): filekey = extract_filekey() - title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title', fatal=False) + title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title') description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False) api_response = self._download_webpage( From e7d77efb9dddc145b3d431047f2f98e19df4114e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Apr 2016 21:52:07 +0600 Subject: [PATCH 0120/3599] [auroravid] Add extractor (Closes #9070) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/novamov.py | 35 ++++++++++++++++++++++---------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ebf5ca3e9..69ea21a23 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -501,6 +501,7 @@ from .novamov import ( NowVideoIE, VideoWeedIE, CloudTimeIE, + AuroraVidIE, ) from .nowness import ( NownessIE, diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index 63a77b9bf..5771a675d 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -27,17 +27,7 @@ class NovaMovIE(InfoExtractor): _DESCRIPTION_REGEX = r'(?s)
\s*

[^<]+

([^<]+)

' _URL_TEMPLATE = 'http://%s/video/%s' - _TEST = { - 'url': 'http://www.novamov.com/video/4rurhn9x446jj', - 'md5': '7205f346a52bbeba427603ba10d4b935', - 'info_dict': { - 'id': '4rurhn9x446jj', - 'ext': 'flv', - 'title': 'search engine optimization', - 'description': 'search engine optimization is used to rank the web page in the google search engine' - }, - 'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)' - } + _TEST = None def _check_existence(self, webpage, video_id): if re.search(self._FILE_DELETED_REGEX, webpage) is not None: @@ -187,3 +177,26 @@ class CloudTimeIE(NovaMovIE): _TITLE_REGEX = r']+class=["\']video_det["\'][^>]*>\s*([^<]+)' _TEST = None + + +class AuroraVidIE(NovaMovIE): + IE_NAME = 'auroravid' + IE_DESC = 'AuroraVid' + + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'auroravid\.to'} + + _HOST = 'www.auroravid.to' + + _FILE_DELETED_REGEX = r'This file no longer exists on our servers!<' + + _TEST = { + 'url': 'http://www.auroravid.to/video/4rurhn9x446jj', + 'md5': '7205f346a52bbeba427603ba10d4b935', + 'info_dict': { + 'id': '4rurhn9x446jj', + 'ext': 'flv', + 'title': 'search engine optimization', + 'description': 'search engine optimization is used to rank the web page in the google search engine' + }, + 'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)' + } From 40a056d85d7711e2f93bd5f7cc057672650386b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Apr 2016 21:54:09 +0600 Subject: [PATCH 0121/3599] [extractor/__init__] Remove novamov extractor and sort novamov based extractors alphabetically --- youtube_dl/extractor/__init__.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 69ea21a23..c3121d83c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -496,12 +496,11 @@ from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE from .nova import NovaIE from .novamov import ( - NovaMovIE, - WholeCloudIE, + AuroraVidIE, + CloudTimeIE, NowVideoIE, VideoWeedIE, - CloudTimeIE, - AuroraVidIE, + WholeCloudIE, ) from .nowness import ( NownessIE, From e42f413716de822918356d15b0cb3c66608681b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Apr 2016 22:23:20 +0600 Subject: [PATCH 0122/3599] [rte] Improve thumbnail extraction (Closes #9085) --- youtube_dl/extractor/rte.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py index 9c89974e7..ebe563ebb 100644 --- a/youtube_dl/extractor/rte.py +++ b/youtube_dl/extractor/rte.py @@ -39,9 +39,14 @@ class RteIE(InfoExtractor): duration = float_or_none(self._html_search_meta( 'duration', webpage, 'duration', fatal=False), 1000) - thumbnail_id = self._search_regex( - r'', webpage, 'thumbnail') - thumbnail = 'http://img.rasset.ie/' + thumbnail_id + '.jpg' + thumbnail = None + thumbnail_meta = self._html_search_meta('thumbnail', webpage) + if thumbnail_meta: + thumbnail_id = self._search_regex( + r'uri:irus:(.+)', thumbnail_meta, + 'thumbnail id', fatal=False) + if thumbnail_id: + thumbnail = 'http://img.rasset.ie/%s.jpg' % thumbnail_id feeds_url = self._html_search_meta('feeds-prefix', webpage, 'feeds url') + video_id json_string = self._download_json(feeds_url, video_id) From 65150b41bb3055820938c3c572ccb2ffd9f312aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 5 Apr 2016 22:27:33 +0600 Subject: [PATCH 0123/3599] [deezer] Fix extraction (Closes #9086) --- youtube_dl/extractor/deezer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py index c3205ff5f..7a07f3267 100644 --- a/youtube_dl/extractor/deezer.py +++ b/youtube_dl/extractor/deezer.py @@ -41,7 +41,9 @@ class DeezerPlaylistIE(InfoExtractor): 'Deezer said: %s' % geoblocking_msg, expected=True) data_json = self._search_regex( - r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n', webpage, 'data JSON') + (r'__DZR_APP_STATE__\s*=\s*({.+?})\s*', + r'naboo\.display\(\'[^\']+\',\s*(.*?)\);\n'), + webpage, 'data JSON') data = json.loads(data_json) playlist_title = data.get('DATA', {}).get('TITLE') From 416930d45007cb1e24e4cd8638d9867dd84f3961 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 5 Apr 2016 18:36:24 +0200 Subject: [PATCH 0124/3599] release 2016.04.05 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 5 +++-- youtube_dl/version.py | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 128ba2fc0..e0a7d8282 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.05** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.04.01 +[debug] youtube-dl version 2016.04.05 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b9bcf450c..d6ee8476b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -57,6 +57,7 @@ - **AudioBoom** - **audiomack** - **audiomack:album** + - **auroravid**: AuroraVid - **Azubu** - **AzubuLive** - **BaiduVideo**: 百度视频 @@ -92,12 +93,14 @@ - **BYUtv** - **Camdemy** - **CamdemyFolder** + - **CamWithHer** - **canalc2.tv** - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - **Canvas** - **CBC** - **CBCPlayer** - **CBS** + - **CBSInteractive** - **CBSNews**: CBS News - **CBSNewsLiveVideo**: CBS News Live Videos - **CBSSports** @@ -119,7 +122,6 @@ - **Clyp** - **cmt.com** - **CNBC** - - **CNET** - **CNN** - **CNNArticle** - **CNNBlogs** @@ -419,7 +421,6 @@ - **Normalboots** - **NosVideo** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - - **novamov**: NovaMov - **nowness** - **nowness:playlist** - **nowness:series** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d9e1cb2a8..45d152902 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.04.01' +__version__ = '2016.04.05' From 8790249c6860374b4d7eeeffae9e7b30d346eaf7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 6 Apr 2016 16:12:16 +0800 Subject: [PATCH 0125/3599] [iqiyi] Improve error detection for VIP-only videos Closes #9071 --- youtube_dl/extractor/iqiyi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 9e8c9432a..88570f261 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -368,7 +368,10 @@ class IqiyiIE(InfoExtractor): auth_req, video_id, note='Downloading video authentication JSON', errnote='Unable to download video authentication JSON') - if auth_result['code'] == 'Q00506': # requires a VIP account + + if auth_result['code'] == 'Q00505': # No preview available (不允许试看鉴权失败) + raise ExtractorError('This video requires a VIP account', expected=True) + if auth_result['code'] == 'Q00506': # End of preview time (试看结束鉴权失败) if do_report_warning: self.report_warning('Needs a VIP account for full video') return False From 92d5477d84c6663e71d6d22e261753a16b78000f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 6 Apr 2016 18:29:54 +0800 Subject: [PATCH 0126/3599] [compat] Handle tuples properly in urlencode() Fixes #9055 --- test/test_compat.py | 4 ++++ youtube_dl/compat.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/test/test_compat.py b/test/test_compat.py index cc105807a..618668210 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -76,6 +76,10 @@ class TestCompat(unittest.TestCase): self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def') def test_compat_shlex_split(self): self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 76b6b0e38..0b6c5ca7a 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -181,7 +181,8 @@ except ImportError: # Python 2 if isinstance(e, dict): e = encode_dict(e) elif isinstance(e, (list, tuple,)): - e = encode_list(e) + list_e = encode_list(e) + e = tuple(list_e) if isinstance(e, tuple) else list_e elif isinstance(e, compat_str): e = e.encode(encoding) return e From be0c7009fb21ebbbe530ad594a9465dff9d72467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 6 Apr 2016 14:09:31 +0200 Subject: [PATCH 0127/3599] Makefile: use full path for the ISSUE_TEMPLATE.md file --- Makefile | 4 ++-- devscripts/release.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 3a6c37944..1b08a4783 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all: youtube-dl README.md CONTRIBUTING.md ISSUE_TEMPLATE.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites +all: youtube-dl README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe @@ -59,7 +59,7 @@ README.md: youtube_dl/*.py youtube_dl/*/*.py CONTRIBUTING.md: README.md $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md -ISSUE_TEMPLATE.md: +.github/ISSUE_TEMPLATE.md: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md youtube_dl/version.py $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md .github/ISSUE_TEMPLATE.md supportedsites: diff --git a/devscripts/release.sh b/devscripts/release.sh index 6718ce39b..8dea55dbb 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -46,7 +46,7 @@ fi sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py /bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." -make README.md CONTRIBUTING.md ISSUE_TEMPLATE.md supportedsites +make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py git commit -m "release $version" From ec4a4c6fccebc1c8ae7a35129171b1181d8badb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 6 Apr 2016 14:16:05 +0200 Subject: [PATCH 0128/3599] Makefile: remove ISSUE_TEMPLATE.md from the 'all' target (fixes #9088) It isn't included in the tar file, causing build failures. Since it's only used for GitHub, I think we don't need to store it in the tar file. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1b08a4783..ba7f7ed36 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all: youtube-dl README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites +all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe From c41cf65d4ab41f08f98905082b7a137ac57927ca Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 6 Apr 2016 15:13:08 +0200 Subject: [PATCH 0129/3599] release 2016.04.06 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e0a7d8282..bf9494646 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.05*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.05** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.04.06*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.04.06** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.04.05 +[debug] youtube-dl version 2016.04.06 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 45d152902..167b16e24 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.04.05' +__version__ = '2016.04.06' From 7a93ab5f3f1535efc948376869f61716ed2af1f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 7 Apr 2016 02:53:53 +0600 Subject: [PATCH 0130/3599] [extractor/common] Introduce music album metafields --- youtube_dl/extractor/common.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ec6625eea..2b40f3b7c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -232,6 +232,24 @@ class InfoExtractor(object): episode_number: Number of the video episode within a season, as an integer. episode_id: Id of the video episode, as a unicode string. + The following fields should only be used when the media is a track or a part of + a music album: + + track: Title of the track. + track_number: Number of the track within an album or a disc, as an integer. + track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii), + as a unicode string. + artist: Artist(s) of the track. + genre: Genre(s) of the track. + album: Title of the album the track belongs to. + album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc). + album_artist: List of all artists appeared on the album (e.g. + "Ash Borer / Fell Voices" or "Various Artists", useful for splits + and compilations). + disc_number: Number of the disc or other physical medium the track belongs to, + as an integer. + release_year: Year (YYYY) when the album was released. + Unless mentioned otherwise, the fields should be Unicode strings. Unless mentioned otherwise, None is equivalent to absence of information. From e90d175436e61e207e0b0cae7f699494dcf15922 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 7 Apr 2016 02:56:13 +0600 Subject: [PATCH 0131/3599] [yandexmusic] Extract music album metafields (Closes #7354) --- youtube_dl/extractor/yandexmusic.py | 40 ++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 025716958..7a90cc60c 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -39,9 +39,14 @@ class YandexMusicTrackIE(YandexMusicBaseIE): 'info_dict': { 'id': '4878838', 'ext': 'mp3', - 'title': 'Carlo Ambrosio - Gypsy Eyes 1', + 'title': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio - Gypsy Eyes 1', 'filesize': 4628061, 'duration': 193.04, + 'track': 'Gypsy Eyes 1', + 'album': 'Gypsy Soul', + 'album_artist': 'Carlo Ambrosio', + 'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio', + 'release_year': '2009', } } @@ -64,16 +69,45 @@ class YandexMusicTrackIE(YandexMusicBaseIE): thumbnail = cover_uri.replace('%%', 'orig') if not thumbnail.startswith('http'): thumbnail = 'http://' + thumbnail - return { + + track_title = track['title'] + track_info = { 'id': track['id'], 'ext': 'mp3', 'url': self._get_track_url(track['storageDir'], track['id']), - 'title': '%s - %s' % (track['artists'][0]['name'], track['title']), 'filesize': int_or_none(track.get('fileSize')), 'duration': float_or_none(track.get('durationMs'), 1000), 'thumbnail': thumbnail, + 'track': track_title, } + def extract_artist(artist_list): + if artist_list and isinstance(artist_list, list): + artists_names = [a['name'] for a in artist_list if a.get('name')] + if artists_names: + return ', '.join(artists_names) + + albums = track.get('albums') + if albums and isinstance(albums, list): + album = albums[0] + if isinstance(album, dict): + year = album.get('year') + track_info.update({ + 'album': album.get('title'), + 'album_artist': extract_artist(album.get('artists')), + 'release_year': compat_str(year) if year else None, + }) + + track_artist = extract_artist(track.get('artists')) + if track_artist: + track_info.update({ + 'artist': track_artist, + 'title': '%s - %s' % (track_artist, track_title), + }) + else: + track_info['title'] = track_title + return track_info + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) album_id, track_id = mobj.group('album_id'), mobj.group('id') From 3afef2e3fc60a7baa2d923e9cfbaf521c7f5ca0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 7 Apr 2016 22:40:35 +0600 Subject: [PATCH 0132/3599] [beeg] Improve extraction --- youtube_dl/extractor/beeg.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index 9072949dd..956c7680e 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -33,8 +33,33 @@ class BeegIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + cpl_url = self._search_regex( + r']+src=(["\'])(?P(?:https?:)?//static\.beeg\.com/cpl/\d+\.js.*?)\1', + webpage, 'cpl', default=None, group='url') + + beeg_version, beeg_salt = [None] * 2 + + if cpl_url: + cpl = self._download_webpage( + self._proto_relative_url(cpl_url), video_id, + 'Downloading cpl JS', fatal=False) + if cpl: + beeg_version = self._search_regex( + r'beeg_version\s*=\s*(\d+)', cpl, + 'beeg version', default=None) or self._search_regex( + r'/(\d+)\.js', cpl_url, 'beeg version', default=None) + beeg_salt = self._search_regex( + r'beeg_salt\s*=\s*(["\'])(?P.+?)\1', cpl, 'beeg beeg_salt', + default=None, group='beeg_salt') + + beeg_version = beeg_version or '1750' + beeg_salt = beeg_salt or 'MIDtGaw96f0N1kMMAM1DE46EC9pmFr' + video = self._download_json( - 'https://api.beeg.com/api/v6/1738/video/%s' % video_id, video_id) + 'http://api.beeg.com/api/v6/%s/video/%s' % (beeg_version, video_id), + video_id) def split(o, e): def cut(s, x): @@ -51,7 +76,7 @@ class BeegIE(InfoExtractor): def decrypt_key(key): # Reverse engineered from http://static.beeg.com/cpl/1738.js - a = 'GUuyodcfS8FW8gQp4OKLMsZBcX0T7B' + a = beeg_salt e = compat_urllib_parse_unquote(key) o = ''.join([ compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21) @@ -101,5 +126,5 @@ class BeegIE(InfoExtractor): 'duration': duration, 'tags': tags, 'formats': formats, - 'age_limit': 18, + 'age_limit': self._rta_search(webpage), } From ed6fb8b804448724fcd1ba4abc3fa028b817efe2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 7 Apr 2016 23:22:43 +0600 Subject: [PATCH 0133/3599] [vrt] Add support for direct hls playlists and YouTube (Closes #9108) --- youtube_dl/extractor/vrt.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index 2b6bae89b..8e35f24e8 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import float_or_none +from ..utils import ( + determine_ext, + float_or_none, +) class VRTIE(InfoExtractor): @@ -52,6 +55,11 @@ class VRTIE(InfoExtractor): 'duration': 661, } }, + { + # YouTube video + 'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957', + 'only_matching': True, + }, { 'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055', 'only_matching': True, @@ -66,7 +74,17 @@ class VRTIE(InfoExtractor): video_id = self._search_regex( r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False) + src = self._search_regex( + r'data-video-src="([^"]+)"', webpage, 'video src', default=None) + + video_type = self._search_regex( + r'data-video-type="([^"]+)"', webpage, 'video type', default=None) + + if video_type == 'YouTubeVideo': + return self.url_result(src, 'Youtube') + formats = [] + mobj = re.search( r'data-video-iphone-server="(?P[^"]+)"\s+data-video-iphone-path="(?P[^"]+)"', webpage) @@ -74,11 +92,15 @@ class VRTIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( '%s/%s' % (mobj.group('server'), mobj.group('path')), video_id, 'mp4', m3u8_id='hls', fatal=False)) - mobj = re.search(r'data-video-src="(?P[^"]+)"', webpage) - if mobj: - formats.extend(self._extract_f4m_formats( - '%s/manifest.f4m' % mobj.group('src'), - video_id, f4m_id='hds', fatal=False)) + + if src: + if determine_ext(src) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.extend(self._extract_f4m_formats( + '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False)) if not formats and 'data-video-geoblocking="true"' in webpage: self.raise_geo_restricted('This video is only available in Belgium') From 536a55dabd7bcc2f34195beb84211028c934ed7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 Apr 2016 00:17:47 +0600 Subject: [PATCH 0134/3599] [YoutubeDL] Sanitize single thumbnail URL --- youtube_dl/YoutubeDL.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d7aa951ff..cd0805303 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1240,7 +1240,10 @@ class YoutubeDL(object): self.list_thumbnails(info_dict) return - if thumbnails and 'thumbnail' not in info_dict: + thumbnail = info_dict.get('thumbnail') + if thumbnail: + info_dict['thumbnail'] = sanitize_url(thumbnail) + elif thumbnails: info_dict['thumbnail'] = thumbnails[-1]['url'] if 'display_id' not in info_dict and 'id' in info_dict: From 9a32e80477f470b8d8d320db38b5de02e0e6bc92 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 8 Apr 2016 14:51:00 +0100 Subject: [PATCH 0135/3599] [acast] fix extraction(#9117) --- youtube_dl/extractor/acast.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index 92eee8119..79a17e73a 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -26,13 +26,7 @@ class ACastIE(InfoExtractor): def _real_extract(self, url): channel, display_id = re.match(self._VALID_URL, url).groups() - - embed_page = self._download_webpage( - re.sub('(?:www\.)?acast\.com', 'embedcdn.acast.com', url), display_id) - cast_data = self._parse_json(self._search_regex( - r'window\[\'acast/queries\'\]\s*=\s*([^;]+);', embed_page, 'acast data'), - display_id)['GetAcast/%s/%s' % (channel, display_id)] - + cast_data = self._download_json('https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id) return { 'id': compat_str(cast_data['id']), 'display_id': display_id, From a1ff3cd5f98980e37b5bdb1fd24cdba56ed6e618 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 8 Apr 2016 15:15:34 +0100 Subject: [PATCH 0136/3599] [acast] fix channel extraction(closes #9117) --- youtube_dl/extractor/acast.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index 79a17e73a..94ce88c83 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -2,10 +2,14 @@ from __future__ import unicode_literals import re +import functools from .common import InfoExtractor from ..compat import compat_str -from ..utils import int_or_none +from ..utils import ( + int_or_none, + OnDemandPagedList, +) class ACastIE(InfoExtractor): @@ -26,7 +30,8 @@ class ACastIE(InfoExtractor): def _real_extract(self, url): channel, display_id = re.match(self._VALID_URL, url).groups() - cast_data = self._download_json('https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id) + cast_data = self._download_json( + 'https://embed.acast.com/api/acasts/%s/%s' % (channel, display_id), display_id) return { 'id': compat_str(cast_data['id']), 'display_id': display_id, @@ -52,15 +57,26 @@ class ACastChannelIE(InfoExtractor): 'playlist_mincount': 20, } _API_BASE_URL = 'https://www.acast.com/api/' + _PAGE_SIZE = 10 @classmethod def suitable(cls, url): return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url) - def _real_extract(self, url): - display_id = self._match_id(url) - channel_data = self._download_json(self._API_BASE_URL + 'channels/%s' % display_id, display_id) - casts = self._download_json(self._API_BASE_URL + 'channels/%s/acasts' % display_id, display_id) - entries = [self.url_result('https://www.acast.com/%s/%s' % (display_id, cast['url']), 'ACast') for cast in casts] + def _fetch_page(self, channel_slug, page): + casts = self._download_json( + self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page), + channel_slug, note='Download page %d of channel data' % page) + for cast in casts: + yield self.url_result( + 'https://www.acast.com/%s/%s' % (channel_slug, cast['url']), + 'ACast', cast['id']) - return self.playlist_result(entries, compat_str(channel_data['id']), channel_data['name'], channel_data.get('description')) + def _real_extract(self, url): + channel_slug = self._match_id(url) + channel_data = self._download_json( + self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug) + entries = OnDemandPagedList(functools.partial( + self._fetch_page, channel_slug), self._PAGE_SIZE) + return self.playlist_result(entries, compat_str( + channel_data['id']), channel_data['name'], channel_data.get('description')) From 56019444cb2fe64f0937fb52aff9e36f30b3b343 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 Apr 2016 21:26:42 +0600 Subject: [PATCH 0137/3599] [novamov] Improve _VALID_URL template (Closes #9116) --- youtube_dl/extractor/novamov.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index 5771a675d..3bbd47355 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -16,7 +16,14 @@ class NovaMovIE(InfoExtractor): IE_NAME = 'novamov' IE_DESC = 'NovaMov' - _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video|mobile/#/videos)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P[a-z\d]{13})' + _VALID_URL_TEMPLATE = r'''(?x) + http:// + (?: + (?:www\.)?%(host)s/(?:file|video|mobile/\#/videos)/| + (?:(?:embed|www)\.)%(host)s/embed(?:\.php|/)?\?(?:.*?&)?\bv= + ) + (?P[a-z\d]{13}) + ''' _VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'} _HOST = 'www.novamov.com' @@ -189,7 +196,7 @@ class AuroraVidIE(NovaMovIE): _FILE_DELETED_REGEX = r'This file no longer exists on our servers!<' - _TEST = { + _TESTS = [{ 'url': 'http://www.auroravid.to/video/4rurhn9x446jj', 'md5': '7205f346a52bbeba427603ba10d4b935', 'info_dict': { @@ -199,4 +206,7 @@ class AuroraVidIE(NovaMovIE): 'description': 'search engine optimization is used to rank the web page in the google search engine' }, 'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)' - } + }, { + 'url': 'http://www.auroravid.to/embed/?v=4rurhn9x446jj', + 'only_matching': True, + }] From a64c0c9b06b24085b1332452b5859fe6ac0a26d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 Apr 2016 22:15:36 +0600 Subject: [PATCH 0138/3599] [democracynow] Make description optional (Closes #9115) --- youtube_dl/extractor/democracynow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/democracynow.py b/youtube_dl/extractor/democracynow.py index 6cd395e11..4b6d2652a 100644 --- a/youtube_dl/extractor/democracynow.py +++ b/youtube_dl/extractor/democracynow.py @@ -38,7 +38,7 @@ class DemocracynowIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - description = self._og_search_description(webpage) + description = self._og_search_description(webpage, default=None) json_data = self._parse_json(self._search_regex( r']+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'), From a134426d619ac711f6adc24242b1e7d66d0b346a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 Apr 2016 22:19:16 +0600 Subject: [PATCH 0139/3599] [democracynow] Fix tests --- youtube_dl/extractor/democracynow.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/democracynow.py b/youtube_dl/extractor/democracynow.py index 4b6d2652a..188f890ce 100644 --- a/youtube_dl/extractor/democracynow.py +++ b/youtube_dl/extractor/democracynow.py @@ -17,22 +17,23 @@ class DemocracynowIE(InfoExtractor): IE_NAME = 'democracynow' _TESTS = [{ 'url': 'http://www.democracynow.org/shows/2015/7/3', - 'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d', + 'md5': '3757c182d3d84da68f5c8f506c18c196', 'info_dict': { 'id': '2015-0703-001', 'ext': 'mp4', - 'title': 'July 03, 2015 - Democracy Now!', - 'description': 'A daily independent global news hour with Amy Goodman & Juan González "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs', + 'title': 'Daily Show', }, }, { 'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree', - 'md5': 'fbb8fe3d7a56a5e12431ce2f9b2fab0d', 'info_dict': { 'id': '2015-0703-001', 'ext': 'mp4', 'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag', 'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21', }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): From 18da24634c38ff6af4deaf606badfcbb9e6c3d68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 Apr 2016 22:27:27 +0600 Subject: [PATCH 0140/3599] [democracynow] Improve extraction --- youtube_dl/extractor/democracynow.py | 36 ++++++++++++++++------------ 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/democracynow.py b/youtube_dl/extractor/democracynow.py index 188f890ce..65a98d789 100644 --- a/youtube_dl/extractor/democracynow.py +++ b/youtube_dl/extractor/democracynow.py @@ -38,17 +38,32 @@ class DemocracynowIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) - description = self._og_search_description(webpage, default=None) json_data = self._parse_json(self._search_regex( r']+type="text/json"[^>]*>\s*({[^>]+})', webpage, 'json'), display_id) - video_id = None + + title = json_data['title'] formats = [] - default_lang = 'en' + video_id = None + for key in ('file', 'audio', 'video', 'high_res_video'): + media_url = json_data.get(key, '') + if not media_url: + continue + media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url)) + video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn') + formats.append({ + 'url': media_url, + 'vcodec': 'none' if key == 'audio' else None, + }) + + self._sort_formats(formats) + + default_lang = 'en' subtitles = {} def add_subtitle_item(lang, info_dict): @@ -68,22 +83,13 @@ class DemocracynowIE(InfoExtractor): 'url': compat_urlparse.urljoin(url, subtitle_item['url']), }) - for key in ('file', 'audio', 'video'): - media_url = json_data.get(key, '') - if not media_url: - continue - media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url)) - video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn') - formats.append({ - 'url': media_url, - }) - - self._sort_formats(formats) + description = self._og_search_description(webpage, default=None) return { 'id': video_id or display_id, - 'title': json_data['title'], + 'title': title, 'description': description, + 'thumbnail': json_data.get('image'), 'subtitles': subtitles, 'formats': formats, } From fb38aa8b53d25606d2582e1043d09ad1a077bf61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 Apr 2016 22:48:08 +0600 Subject: [PATCH 0141/3599] [extractor/common] Support arbitrary format strings for template based identifiers in mpd manifests (Closes #9119, closes #9120) --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2b40f3b7c..a7324af5c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1534,7 +1534,7 @@ class InfoExtractor(object): media_template = representation_ms_info['media_template'] media_template = media_template.replace('$RepresentationID$', representation_id) media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template) - media_template = re.sub(r'\$(Number|Bandwidth)%(\d+)\$', r'%(\1)\2d', media_template) + media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template) media_template.replace('$$', '$') representation_ms_info['segment_urls'] = [ media_template % { From 3c6c7e7d7e5a7cbf87385bf3c649342c3ee4327d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 Apr 2016 23:16:02 +0600 Subject: [PATCH 0142/3599] [gdcvault] Fix extraction (Closes #9107, closes #9114) --- youtube_dl/extractor/gdcvault.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 59ed4c38f..25e93c9a4 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -159,9 +159,10 @@ class GDCVaultIE(InfoExtractor): 'title': title, } + PLAYER_REGEX = r'', - start_page, 'xml root', default=None) + PLAYER_REGEX, start_page, 'xml root', default=None) if xml_root is None: # Probably need to authenticate login_res = self._login(webpage_url, display_id) @@ -171,18 +172,19 @@ class GDCVaultIE(InfoExtractor): start_page = login_res # Grab the url from the authenticated page xml_root = self._html_search_regex( - r'', start_page, 'xml filename', default=None) if xml_name is None: # Fallback to the older format - xml_name = self._html_search_regex(r'', + start_page, 'xml filename') - xml_description_url = xml_root + 'xml/' + xml_name - xml_description = self._download_xml(xml_description_url, display_id) + xml_description = self._download_xml( + '%s/xml/%s' % (xml_root, xml_name), display_id) video_title = xml_description.find('./metadata/title').text video_formats = self._parse_mp4(xml_description) From bacec0397ff2abddac460148e8ceb49989fce6a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 Apr 2016 23:33:45 +0600 Subject: [PATCH 0143/3599] [extractor/common] Relax _hidden_inputs --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a7324af5c..17d00721c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -843,7 +843,7 @@ class InfoExtractor(object): for input in re.findall(r'(?i)]+)>', html): if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): continue - name = re.search(r'name=(["\'])(?P.+?)\1', input) + name = re.search(r'(?:name|id)=(["\'])(?P.+?)\1', input) if not name: continue value = re.search(r'value=(["\'])(?P.*?)\1', input) From 2f2fcf1a3304a7134d280726ae1a3d7010adbd59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 Apr 2016 23:34:59 +0600 Subject: [PATCH 0144/3599] [tnaflix] Fix extraction (Closes #9074) --- youtube_dl/extractor/tnaflix.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 79f036fe4..bc32c0926 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -76,7 +76,11 @@ class TNAFlixNetworkBaseIE(InfoExtractor): webpage = self._download_webpage(url, display_id) cfg_url = self._proto_relative_url(self._html_search_regex( - self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:') + self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:') + + if not cfg_url: + inputs = self._hidden_inputs(webpage) + cfg_url = 'https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s' % (inputs['vkey'], inputs['nkey']) cfg_xml = self._download_xml( cfg_url, display_id, 'Downloading metadata', From 568d2f78d635c3993e95334b9f8f6d2b47ecee51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Apr 2016 00:27:24 +0600 Subject: [PATCH 0145/3599] [tnaflix] Fix metadata extraction --- youtube_dl/extractor/tnaflix.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index bc32c0926..78174178e 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -136,7 +136,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor): average_rating = float_or_none(extract_field(self._AVERAGE_RATING_REGEX, 'average rating')) categories_str = extract_field(self._CATEGORIES_REGEX, 'categories') - categories = categories_str.split(', ') if categories_str is not None else [] + categories = [c.strip() for c in categories_str.split(',')] if categories_str is not None else [] return { 'id': video_id, @@ -190,13 +190,14 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P[^/]+)/video(?P\d+)' _TITLE_REGEX = r'(.+?) - TNAFlix Porn Videos' - _DESCRIPTION_REGEX = r'

([^<]+)

' - _UPLOADER_REGEX = r'(?s)]+class="infoTitle"[^>]*>Uploaded By:(.+?)]+name="description"[^>]+content="([^"]+)"' + _UPLOADER_REGEX = r'\s*Verified Member\s*\s*

(.+?)

' + _CATEGORIES_REGEX = r'(?s)]*>Categories:(.+?)
' _TESTS = [{ # anonymous uploader, no categories 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878', - 'md5': 'ecf3498417d09216374fc5907f9c6ec0', + 'md5': '7e569419fe6d69543d01e6be22f5f7c4', 'info_dict': { 'id': '553878', 'display_id': 'Carmella-Decesare-striptease', @@ -205,17 +206,16 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): 'thumbnail': 're:https?://.*\.jpg$', 'duration': 91, 'age_limit': 18, - 'uploader': 'Anonymous', - 'categories': [], + 'categories': ['Porn Stars'], } }, { # non-anonymous uploader, categories 'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538', - 'md5': '0f5d4d490dbfd117b8607054248a07c0', + 'md5': 'fcba2636572895aba116171a899a5658', 'info_dict': { 'id': '6538', 'display_id': 'Educational-xxx-video', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Educational xxx video', 'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8', 'thumbnail': 're:https?://.*\.jpg$', From e52d7f85f25e806527d7b618d8c3ad16d27681f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 10 Feb 2016 13:16:18 +0100 Subject: [PATCH 0146/3599] Delay initialization of InfoExtractors until they are needed --- youtube_dl/YoutubeDL.py | 10 ++++++---- youtube_dl/extractor/__init__.py | 9 ++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index cd0805303..f18a8e840 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -82,7 +82,7 @@ from .utils import ( YoutubeDLHandler, ) from .cache import Cache -from .extractor import get_info_extractor, gen_extractors +from .extractor import get_info_extractor, gen_extractor_classes from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( @@ -378,8 +378,9 @@ class YoutubeDL(object): def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) - self._ies_instances[ie.ie_key()] = ie - ie.set_downloader(self) + if not isinstance(ie, type): + self._ies_instances[ie.ie_key()] = ie + ie.set_downloader(self) def get_info_extractor(self, ie_key): """ @@ -397,7 +398,7 @@ class YoutubeDL(object): """ Add the InfoExtractors returned by gen_extractors to the end of the list """ - for ie in gen_extractors(): + for ie in gen_extractor_classes(): self.add_info_extractor(ie) def add_post_processor(self, pp): @@ -661,6 +662,7 @@ class YoutubeDL(object): if not ie.suitable(url): continue + ie = self.get_info_extractor(ie.ie_key()) if not ie.working(): self.report_warning('The program functionality for this site has been marked as broken, ' 'and will probably not work.') diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c3121d83c..cd1f116e2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -997,11 +997,18 @@ _ALL_CLASSES = [ _ALL_CLASSES.append(GenericIE) +def gen_extractor_classes(): + """ Return a list of supported extractors. + The order does matter; the first extractor matched is the one handling the URL. + """ + return _ALL_CLASSES + + def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. """ - return [klass() for klass in _ALL_CLASSES] + return [klass() for klass in gen_extractor_classes()] def list_extractors(age_limit): From 1b3d5e05a824f880f1171eb840235e13cd8848dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 10 Feb 2016 13:24:49 +0100 Subject: [PATCH 0147/3599] Move the extreactors import to youtube_dl/extractor/extractors.py --- README.md | 4 +- youtube_dl/extractor/__init__.py | 989 +--------------------------- youtube_dl/extractor/extractors.py | 991 +++++++++++++++++++++++++++++ 3 files changed, 994 insertions(+), 990 deletions(-) create mode 100644 youtube_dl/extractor/extractors.py diff --git a/README.md b/README.md index e972bf69f..cd18edd87 100644 --- a/README.md +++ b/README.md @@ -889,14 +889,14 @@ After you have ensured this site is distributing it's content legally, you can f # TODO more properties (see youtube_dl/extractor/common.py) } ``` -5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). +5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want. 8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. 9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: - $ git add youtube_dl/extractor/__init__.py + $ git add youtube_dl/extractor/extractors.py $ git add youtube_dl/extractor/yourextractor.py $ git commit -m '[yourextractor] Add new extractor' $ git push origin yourextractor diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index cd1f116e2..a0a53445a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -1,993 +1,6 @@ from __future__ import unicode_literals -from .abc import ABCIE -from .abc7news import Abc7NewsIE -from .academicearth import AcademicEarthCourseIE -from .acast import ( - ACastIE, - ACastChannelIE, -) -from .addanime import AddAnimeIE -from .adobetv import ( - AdobeTVIE, - AdobeTVShowIE, - AdobeTVChannelIE, - AdobeTVVideoIE, -) -from .adultswim import AdultSwimIE -from .aenetworks import AENetworksIE -from .aftonbladet import AftonbladetIE -from .airmozilla import AirMozillaIE -from .aljazeera import AlJazeeraIE -from .alphaporno import AlphaPornoIE -from .animeondemand import AnimeOnDemandIE -from .anitube import AnitubeIE -from .anysex import AnySexIE -from .aol import ( - AolIE, - AolFeaturesIE, -) -from .allocine import AllocineIE -from .aparat import AparatIE -from .appleconnect import AppleConnectIE -from .appletrailers import ( - AppleTrailersIE, - AppleTrailersSectionIE, -) -from .archiveorg import ArchiveOrgIE -from .ard import ( - ARDIE, - ARDMediathekIE, - SportschauIE, -) -from .arte import ( - ArteTvIE, - ArteTVPlus7IE, - ArteTVCreativeIE, - ArteTVConcertIE, - ArteTVFutureIE, - ArteTVCinemaIE, - ArteTVDDCIE, - ArteTVMagazineIE, - ArteTVEmbedIE, -) -from .atresplayer import AtresPlayerIE -from .atttechchannel import ATTTechChannelIE -from .audimedia import AudiMediaIE -from .audioboom import AudioBoomIE -from .audiomack import AudiomackIE, AudiomackAlbumIE -from .azubu import AzubuIE, AzubuLiveIE -from .baidu import BaiduVideoIE -from .bambuser import BambuserIE, BambuserChannelIE -from .bandcamp import BandcampIE, BandcampAlbumIE -from .bbc import ( - BBCCoUkIE, - BBCCoUkArticleIE, - BBCIE, -) -from .beeg import BeegIE -from .behindkink import BehindKinkIE -from .beatportpro import BeatportProIE -from .bet import BetIE -from .bigflix import BigflixIE -from .bild import BildIE -from .bilibili import BiliBiliIE -from .biobiochiletv import BioBioChileTVIE -from .bleacherreport import ( - BleacherReportIE, - BleacherReportCMSIE, -) -from .blinkx import BlinkxIE -from .bloomberg import BloombergIE -from .bokecc import BokeCCIE -from .bpb import BpbIE -from .br import BRIE -from .bravotv import BravoTVIE -from .breakcom import BreakIE -from .brightcove import ( - BrightcoveLegacyIE, - BrightcoveNewIE, -) -from .buzzfeed import BuzzFeedIE -from .byutv import BYUtvIE -from .c56 import C56IE -from .camdemy import ( - CamdemyIE, - CamdemyFolderIE -) -from .camwithher import CamWithHerIE -from .canalplus import CanalplusIE -from .canalc2 import Canalc2IE -from .canvas import CanvasIE -from .cbc import ( - CBCIE, - CBCPlayerIE, -) -from .cbs import CBSIE -from .cbsinteractive import CBSInteractiveIE -from .cbsnews import ( - CBSNewsIE, - CBSNewsLiveVideoIE, -) -from .cbssports import CBSSportsIE -from .ccc import CCCIE -from .cda import CDAIE -from .ceskatelevize import CeskaTelevizeIE -from .channel9 import Channel9IE -from .chaturbate import ChaturbateIE -from .chilloutzone import ChilloutzoneIE -from .chirbit import ( - ChirbitIE, - ChirbitProfileIE, -) -from .cinchcast import CinchcastIE -from .cinemassacre import CinemassacreIE -from .clipfish import ClipfishIE -from .cliphunter import CliphunterIE -from .clipsyndicate import ClipsyndicateIE -from .cloudy import CloudyIE -from .clubic import ClubicIE -from .clyp import ClypIE -from .cmt import CMTIE -from .cnbc import CNBCIE -from .cnn import ( - CNNIE, - CNNBlogsIE, - CNNArticleIE, -) -from .collegehumor import CollegeHumorIE -from .collegerama import CollegeRamaIE -from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE -from .comcarcoff import ComCarCoffIE -from .commonmistakes import CommonMistakesIE, UnicodeBOMIE -from .commonprotocols import RtmpIE -from .condenast import CondeNastIE -from .cracked import CrackedIE -from .crackle import CrackleIE -from .criterion import CriterionIE -from .crooksandliars import CrooksAndLiarsIE -from .crunchyroll import ( - CrunchyrollIE, - CrunchyrollShowPlaylistIE -) -from .cspan import CSpanIE -from .ctsnews import CtsNewsIE -from .cultureunplugged import CultureUnpluggedIE -from .cwtv import CWTVIE -from .dailymotion import ( - DailymotionIE, - DailymotionPlaylistIE, - DailymotionUserIE, - DailymotionCloudIE, -) -from .daum import ( - DaumIE, - DaumClipIE, - DaumPlaylistIE, - DaumUserIE, -) -from .dbtv import DBTVIE -from .dcn import ( - DCNIE, - DCNVideoIE, - DCNLiveIE, - DCNSeasonIE, -) -from .dctp import DctpTvIE -from .deezer import DeezerPlaylistIE -from .democracynow import DemocracynowIE -from .dfb import DFBIE -from .dhm import DHMIE -from .dotsub import DotsubIE -from .douyutv import DouyuTVIE -from .dplay import DPlayIE -from .dramafever import ( - DramaFeverIE, - DramaFeverSeriesIE, -) -from .dreisat import DreiSatIE -from .drbonanza import DRBonanzaIE -from .drtuber import DrTuberIE -from .drtv import DRTVIE -from .dvtv import DVTVIE -from .dump import DumpIE -from .dumpert import DumpertIE -from .defense import DefenseGouvFrIE -from .discovery import DiscoveryIE -from .dropbox import DropboxIE -from .dw import ( - DWIE, - DWArticleIE, -) -from .eagleplatform import EaglePlatformIE -from .ebaumsworld import EbaumsWorldIE -from .echomsk import EchoMskIE -from .ehow import EHowIE -from .eighttracks import EightTracksIE -from .einthusan import EinthusanIE -from .eitb import EitbIE -from .ellentv import ( - EllenTVIE, - EllenTVClipsIE, -) -from .elpais import ElPaisIE -from .embedly import EmbedlyIE -from .engadget import EngadgetIE -from .eporner import EpornerIE -from .eroprofile import EroProfileIE -from .escapist import EscapistIE -from .espn import ESPNIE -from .esri import EsriVideoIE -from .europa import EuropaIE -from .everyonesmixtape import EveryonesMixtapeIE -from .exfm import ExfmIE -from .expotv import ExpoTVIE -from .extremetube import ExtremeTubeIE -from .facebook import FacebookIE -from .faz import FazIE -from .fc2 import FC2IE -from .fczenit import FczenitIE -from .firstpost import FirstpostIE -from .firsttv import FirstTVIE -from .fivemin import FiveMinIE -from .fivetv import FiveTVIE -from .fktv import FKTVIE -from .flickr import FlickrIE -from .folketinget import FolketingetIE -from .footyroom import FootyRoomIE -from .fourtube import FourTubeIE -from .fox import FOXIE -from .foxgay import FoxgayIE -from .foxnews import FoxNewsIE -from .foxsports import FoxSportsIE -from .franceculture import ( - FranceCultureIE, - FranceCultureEmissionIE, -) -from .franceinter import FranceInterIE -from .francetv import ( - PluzzIE, - FranceTvInfoIE, - FranceTVIE, - GenerationQuoiIE, - CultureboxIE, -) -from .freesound import FreesoundIE -from .freespeech import FreespeechIE -from .freevideo import FreeVideoIE -from .funimation import FunimationIE -from .funnyordie import FunnyOrDieIE -from .gameinformer import GameInformerIE -from .gamekings import GamekingsIE -from .gameone import ( - GameOneIE, - GameOnePlaylistIE, -) -from .gamersyde import GamersydeIE -from .gamespot import GameSpotIE -from .gamestar import GameStarIE -from .gametrailers import GametrailersIE -from .gazeta import GazetaIE -from .gdcvault import GDCVaultIE -from .generic import GenericIE -from .gfycat import GfycatIE -from .giantbomb import GiantBombIE -from .giga import GigaIE -from .glide import GlideIE -from .globo import ( - GloboIE, - GloboArticleIE, -) -from .godtube import GodTubeIE -from .goldenmoustache import GoldenMoustacheIE -from .golem import GolemIE -from .googledrive import GoogleDriveIE -from .googleplus import GooglePlusIE -from .googlesearch import GoogleSearchIE -from .goshgay import GoshgayIE -from .gputechconf import GPUTechConfIE -from .groupon import GrouponIE -from .hark import HarkIE -from .hbo import HBOIE -from .hearthisat import HearThisAtIE -from .heise import HeiseIE -from .hellporno import HellPornoIE -from .helsinki import HelsinkiIE -from .hentaistigma import HentaiStigmaIE -from .historicfilms import HistoricFilmsIE -from .hitbox import HitboxIE, HitboxLiveIE -from .hornbunny import HornBunnyIE -from .hotnewhiphop import HotNewHipHopIE -from .hotstar import HotStarIE -from .howcast import HowcastIE -from .howstuffworks import HowStuffWorksIE -from .huffpost import HuffPostIE -from .hypem import HypemIE -from .iconosquare import IconosquareIE -from .ign import ( - IGNIE, - OneUPIE, - PCMagIE, -) -from .imdb import ( - ImdbIE, - ImdbListIE -) -from .imgur import ( - ImgurIE, - ImgurAlbumIE, -) -from .ina import InaIE -from .indavideo import ( - IndavideoIE, - IndavideoEmbedIE, -) -from .infoq import InfoQIE -from .instagram import InstagramIE, InstagramUserIE -from .internetvideoarchive import InternetVideoArchiveIE -from .iprima import IPrimaIE -from .iqiyi import IqiyiIE -from .ir90tv import Ir90TvIE -from .ivi import ( - IviIE, - IviCompilationIE -) -from .ivideon import IvideonIE -from .izlesene import IzleseneIE -from .jadorecettepub import JadoreCettePubIE -from .jeuxvideo import JeuxVideoIE -from .jove import JoveIE -from .jwplatform import JWPlatformIE -from .jpopsukitv import JpopsukiIE -from .kaltura import KalturaIE -from .kanalplay import KanalPlayIE -from .kankan import KankanIE -from .karaoketv import KaraoketvIE -from .karrierevideos import KarriereVideosIE -from .keezmovies import KeezMoviesIE -from .khanacademy import KhanAcademyIE -from .kickstarter import KickStarterIE -from .keek import KeekIE -from .konserthusetplay import KonserthusetPlayIE -from .kontrtube import KontrTubeIE -from .krasview import KrasViewIE -from .ku6 import Ku6IE -from .kusi import KUSIIE -from .kuwo import ( - KuwoIE, - KuwoAlbumIE, - KuwoChartIE, - KuwoSingerIE, - KuwoCategoryIE, - KuwoMvIE, -) -from .la7 import LA7IE -from .laola1tv import Laola1TvIE -from .lecture2go import Lecture2GoIE -from .lemonde import LemondeIE -from .leeco import ( - LeIE, - LePlaylistIE, - LetvCloudIE, -) -from .libsyn import LibsynIE -from .lifenews import ( - LifeNewsIE, - LifeEmbedIE, -) -from .limelight import ( - LimelightMediaIE, - LimelightChannelIE, - LimelightChannelListIE, -) -from .liveleak import LiveLeakIE -from .livestream import ( - LivestreamIE, - LivestreamOriginalIE, - LivestreamShortenerIE, -) -from .lnkgo import LnkGoIE -from .lovehomeporn import LoveHomePornIE -from .lrt import LRTIE -from .lynda import ( - LyndaIE, - LyndaCourseIE -) -from .m6 import M6IE -from .macgamestore import MacGameStoreIE -from .mailru import MailRuIE -from .makerschannel import MakersChannelIE -from .makertv import MakerTVIE -from .malemotion import MalemotionIE -from .matchtv import MatchTVIE -from .mdr import MDRIE -from .metacafe import MetacafeIE -from .metacritic import MetacriticIE -from .mgoon import MgoonIE -from .minhateca import MinhatecaIE -from .ministrygrid import MinistryGridIE -from .minoto import MinotoIE -from .miomio import MioMioIE -from .mit import TechTVMITIE, MITIE, OCWMITIE -from .mitele import MiTeleIE -from .mixcloud import MixcloudIE -from .mlb import MLBIE -from .mnet import MnetIE -from .mpora import MporaIE -from .moevideo import MoeVideoIE -from .mofosex import MofosexIE -from .mojvideo import MojvideoIE -from .moniker import MonikerIE -from .mooshare import MooshareIE -from .morningstar import MorningstarIE -from .motherless import MotherlessIE -from .motorsport import MotorsportIE -from .movieclips import MovieClipsIE -from .moviezine import MoviezineIE -from .mtv import ( - MTVIE, - MTVServicesEmbeddedIE, - MTVIggyIE, - MTVDEIE, -) -from .muenchentv import MuenchenTVIE -from .musicplayon import MusicPlayOnIE -from .muzu import MuzuTVIE -from .mwave import MwaveIE -from .myspace import MySpaceIE, MySpaceAlbumIE -from .myspass import MySpassIE -from .myvi import MyviIE -from .myvideo import MyVideoIE -from .myvidster import MyVidsterIE -from .nationalgeographic import ( - NationalGeographicIE, - NationalGeographicChannelIE, -) -from .naver import NaverIE -from .nba import NBAIE -from .nbc import ( - CSNNEIE, - NBCIE, - NBCNewsIE, - NBCSportsIE, - NBCSportsVPlayerIE, - MSNBCIE, -) -from .ndr import ( - NDRIE, - NJoyIE, - NDREmbedBaseIE, - NDREmbedIE, - NJoyEmbedIE, -) -from .ndtv import NDTVIE -from .netzkino import NetzkinoIE -from .nerdcubed import NerdCubedFeedIE -from .nerdist import NerdistIE -from .neteasemusic import ( - NetEaseMusicIE, - NetEaseMusicAlbumIE, - NetEaseMusicSingerIE, - NetEaseMusicListIE, - NetEaseMusicMvIE, - NetEaseMusicProgramIE, - NetEaseMusicDjRadioIE, -) -from .newgrounds import NewgroundsIE -from .newstube import NewstubeIE -from .nextmedia import ( - NextMediaIE, - NextMediaActionNewsIE, - AppleDailyIE, -) -from .nextmovie import NextMovieIE -from .nfb import NFBIE -from .nfl import NFLIE -from .nhl import ( - NHLIE, - NHLNewsIE, - NHLVideocenterIE, -) -from .nick import NickIE -from .niconico import NiconicoIE, NiconicoPlaylistIE -from .ninegag import NineGagIE -from .noco import NocoIE -from .normalboots import NormalbootsIE -from .nosvideo import NosVideoIE -from .nova import NovaIE -from .novamov import ( - AuroraVidIE, - CloudTimeIE, - NowVideoIE, - VideoWeedIE, - WholeCloudIE, -) -from .nowness import ( - NownessIE, - NownessPlaylistIE, - NownessSeriesIE, -) -from .nowtv import ( - NowTVIE, - NowTVListIE, -) -from .noz import NozIE -from .npo import ( - NPOIE, - NPOLiveIE, - NPORadioIE, - NPORadioFragmentIE, - SchoolTVIE, - VPROIE, - WNLIE -) -from .npr import NprIE -from .nrk import ( - NRKIE, - NRKPlaylistIE, - NRKSkoleIE, - NRKTVIE, -) -from .ntvde import NTVDeIE -from .ntvru import NTVRuIE -from .nytimes import ( - NYTimesIE, - NYTimesArticleIE, -) -from .nuvid import NuvidIE -from .odnoklassniki import OdnoklassnikiIE -from .oktoberfesttv import OktoberfestTVIE -from .onionstudios import OnionStudiosIE -from .ooyala import ( - OoyalaIE, - OoyalaExternalIE, -) -from .openload import OpenloadIE -from .ora import OraTVIE -from .orf import ( - ORFTVthekIE, - ORFOE1IE, - ORFFM4IE, - ORFIPTVIE, -) -from .pandoratv import PandoraTVIE -from .parliamentliveuk import ParliamentLiveUKIE -from .patreon import PatreonIE -from .pbs import PBSIE -from .periscope import PeriscopeIE -from .philharmoniedeparis import PhilharmonieDeParisIE -from .phoenix import PhoenixIE -from .photobucket import PhotobucketIE -from .pinkbike import PinkbikeIE -from .planetaplay import PlanetaPlayIE -from .pladform import PladformIE -from .played import PlayedIE -from .playfm import PlayFMIE -from .plays import PlaysTVIE -from .playtvak import PlaytvakIE -from .playvid import PlayvidIE -from .playwire import PlaywireIE -from .pluralsight import ( - PluralsightIE, - PluralsightCourseIE, -) -from .podomatic import PodomaticIE -from .porn91 import Porn91IE -from .pornhd import PornHdIE -from .pornhub import ( - PornHubIE, - PornHubPlaylistIE, - PornHubUserVideosIE, -) -from .pornotube import PornotubeIE -from .pornovoisines import PornoVoisinesIE -from .pornoxo import PornoXOIE -from .primesharetv import PrimeShareTVIE -from .promptfile import PromptFileIE -from .prosiebensat1 import ProSiebenSat1IE -from .puls4 import Puls4IE -from .pyvideo import PyvideoIE -from .qqmusic import ( - QQMusicIE, - QQMusicSingerIE, - QQMusicAlbumIE, - QQMusicToplistIE, - QQMusicPlaylistIE, -) -from .quickvid import QuickVidIE -from .r7 import R7IE -from .radiode import RadioDeIE -from .radiojavan import RadioJavanIE -from .radiobremen import RadioBremenIE -from .radiofrance import RadioFranceIE -from .rai import ( - RaiTVIE, - RaiIE, -) -from .rbmaradio import RBMARadioIE -from .rds import RDSIE -from .redtube import RedTubeIE -from .regiotv import RegioTVIE -from .restudy import RestudyIE -from .reverbnation import ReverbNationIE -from .revision3 import Revision3IE -from .rice import RICEIE -from .ringtv import RingTVIE -from .ro220 import Ro220IE -from .rottentomatoes import RottenTomatoesIE -from .roxwel import RoxwelIE -from .rtbf import RTBFIE -from .rte import RteIE, RteRadioIE -from .rtlnl import RtlNlIE -from .rtl2 import RTL2IE -from .rtp import RTPIE -from .rts import RTSIE -from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE -from .rtvnh import RTVNHIE -from .ruhd import RUHDIE -from .ruleporn import RulePornIE -from .rutube import ( - RutubeIE, - RutubeChannelIE, - RutubeEmbedIE, - RutubeMovieIE, - RutubePersonIE, -) -from .rutv import RUTVIE -from .ruutu import RuutuIE -from .sandia import SandiaIE -from .safari import ( - SafariIE, - SafariApiIE, - SafariCourseIE, -) -from .sapo import SapoIE -from .savefrom import SaveFromIE -from .sbs import SBSIE -from .scivee import SciVeeIE -from .screencast import ScreencastIE -from .screencastomatic import ScreencastOMaticIE -from .screenjunkies import ScreenJunkiesIE -from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE -from .senateisvp import SenateISVPIE -from .servingsys import ServingSysIE -from .sexu import SexuIE -from .sexykarma import SexyKarmaIE -from .shahid import ShahidIE -from .shared import SharedIE -from .sharesix import ShareSixIE -from .sina import SinaIE -from .skynewsarabia import ( - SkyNewsArabiaIE, - SkyNewsArabiaArticleIE, -) -from .slideshare import SlideshareIE -from .slutload import SlutloadIE -from .smotri import ( - SmotriIE, - SmotriCommunityIE, - SmotriUserIE, - SmotriBroadcastIE, -) -from .snagfilms import ( - SnagFilmsIE, - SnagFilmsEmbedIE, -) -from .snotr import SnotrIE -from .sohu import SohuIE -from .soundcloud import ( - SoundcloudIE, - SoundcloudSetIE, - SoundcloudUserIE, - SoundcloudPlaylistIE, - SoundcloudSearchIE -) -from .soundgasm import ( - SoundgasmIE, - SoundgasmProfileIE -) -from .southpark import ( - SouthParkIE, - SouthParkDeIE, - SouthParkDkIE, - SouthParkEsIE, - SouthParkNlIE -) -from .spankbang import SpankBangIE -from .spankwire import SpankwireIE -from .spiegel import SpiegelIE, SpiegelArticleIE -from .spiegeltv import SpiegeltvIE -from .spike import SpikeIE -from .stitcher import StitcherIE -from .sport5 import Sport5IE -from .sportbox import ( - SportBoxIE, - SportBoxEmbedIE, -) -from .sportdeutschland import SportDeutschlandIE -from .srgssr import ( - SRGSSRIE, - SRGSSRPlayIE, -) -from .srmediathek import SRMediathekIE -from .ssa import SSAIE -from .stanfordoc import StanfordOpenClassroomIE -from .steam import SteamIE -from .streamcloud import StreamcloudIE -from .streamcz import StreamCZIE -from .streetvoice import StreetVoiceIE -from .sunporno import SunPornoIE -from .svt import ( - SVTIE, - SVTPlayIE, -) -from .swrmediathek import SWRMediathekIE -from .syfy import SyfyIE -from .sztvhu import SztvHuIE -from .tagesschau import TagesschauIE -from .tapely import TapelyIE -from .tass import TassIE -from .teachertube import ( - TeacherTubeIE, - TeacherTubeUserIE, -) -from .teachingchannel import TeachingChannelIE -from .teamcoco import TeamcocoIE -from .techtalks import TechTalksIE -from .ted import TEDIE -from .tele13 import Tele13IE -from .telebruxelles import TeleBruxellesIE -from .telecinco import TelecincoIE -from .telegraaf import TelegraafIE -from .telemb import TeleMBIE -from .teletask import TeleTaskIE -from .testurl import TestURLIE -from .tf1 import TF1IE -from .theintercept import TheInterceptIE -from .theonion import TheOnionIE -from .theplatform import ( - ThePlatformIE, - ThePlatformFeedIE, -) -from .thescene import TheSceneIE -from .thesixtyone import TheSixtyOneIE -from .thestar import TheStarIE -from .thisamericanlife import ThisAmericanLifeIE -from .thisav import ThisAVIE -from .tinypic import TinyPicIE -from .tlc import TlcDeIE -from .tmz import ( - TMZIE, - TMZArticleIE, -) -from .tnaflix import ( - TNAFlixNetworkEmbedIE, - TNAFlixIE, - EMPFlixIE, - MovieFapIE, -) -from .toggle import ToggleIE -from .thvideo import ( - THVideoIE, - THVideoPlaylistIE -) -from .toutv import TouTvIE -from .toypics import ToypicsUserIE, ToypicsIE -from .traileraddict import TrailerAddictIE -from .trilulilu import TriluliluIE -from .trollvids import TrollvidsIE -from .trutube import TruTubeIE -from .tube8 import Tube8IE -from .tubitv import TubiTvIE -from .tudou import ( - TudouIE, - TudouPlaylistIE, - TudouAlbumIE, -) -from .tumblr import TumblrIE -from .tunein import ( - TuneInClipIE, - TuneInStationIE, - TuneInProgramIE, - TuneInTopicIE, - TuneInShortenerIE, -) -from .turbo import TurboIE -from .tutv import TutvIE -from .tv2 import ( - TV2IE, - TV2ArticleIE, -) -from .tv3 import TV3IE -from .tv4 import TV4IE -from .tvc import ( - TVCIE, - TVCArticleIE, -) -from .tvigle import TvigleIE -from .tvland import TVLandIE -from .tvp import TvpIE, TvpSeriesIE -from .tvplay import TVPlayIE -from .tweakers import TweakersIE -from .twentyfourvideo import TwentyFourVideoIE -from .twentymin import TwentyMinutenIE -from .twentytwotracks import ( - TwentyTwoTracksIE, - TwentyTwoTracksGenreIE -) -from .twitch import ( - TwitchVideoIE, - TwitchChapterIE, - TwitchVodIE, - TwitchProfileIE, - TwitchPastBroadcastsIE, - TwitchBookmarksIE, - TwitchStreamIE, -) -from .twitter import ( - TwitterCardIE, - TwitterIE, - TwitterAmplifyIE, -) -from .ubu import UbuIE -from .udemy import ( - UdemyIE, - UdemyCourseIE -) -from .udn import UDNEmbedIE -from .digiteka import DigitekaIE -from .unistra import UnistraIE -from .urort import UrortIE -from .usatoday import USATodayIE -from .ustream import UstreamIE, UstreamChannelIE -from .ustudio import UstudioIE -from .varzesh3 import Varzesh3IE -from .vbox7 import Vbox7IE -from .veehd import VeeHDIE -from .veoh import VeohIE -from .vessel import VesselIE -from .vesti import VestiIE -from .vevo import VevoIE -from .vgtv import ( - BTArticleIE, - BTVestlendingenIE, - VGTVIE, -) -from .vh1 import VH1IE -from .vice import ( - ViceIE, - ViceShowIE, -) -from .viddler import ViddlerIE -from .videodetective import VideoDetectiveIE -from .videofyme import VideofyMeIE -from .videomega import VideoMegaIE -from .videomore import ( - VideomoreIE, - VideomoreVideoIE, - VideomoreSeasonIE, -) -from .videopremium import VideoPremiumIE -from .videott import VideoTtIE -from .vidme import ( - VidmeIE, - VidmeUserIE, - VidmeUserLikesIE, -) -from .vidzi import VidziIE -from .vier import VierIE, VierVideosIE -from .viewster import ViewsterIE -from .viidea import ViideaIE -from .vimeo import ( - VimeoIE, - VimeoAlbumIE, - VimeoChannelIE, - VimeoGroupsIE, - VimeoLikesIE, - VimeoOndemandIE, - VimeoReviewIE, - VimeoUserIE, - VimeoWatchLaterIE, -) -from .vimple import VimpleIE -from .vine import ( - VineIE, - VineUserIE, -) -from .viki import ( - VikiIE, - VikiChannelIE, -) -from .vk import ( - VKIE, - VKUserVideosIE, -) -from .vlive import VLiveIE -from .vodlocker import VodlockerIE -from .voicerepublic import VoiceRepublicIE -from .voxmedia import VoxMediaIE -from .vporn import VpornIE -from .vrt import VRTIE -from .vube import VubeIE -from .vuclip import VuClipIE -from .vulture import VultureIE -from .walla import WallaIE -from .washingtonpost import WashingtonPostIE -from .wat import WatIE -from .wayofthemaster import WayOfTheMasterIE -from .wdr import ( - WDRIE, - WDRMobileIE, - WDRMausIE, -) -from .webofstories import ( - WebOfStoriesIE, - WebOfStoriesPlaylistIE, -) -from .weibo import WeiboIE -from .weiqitv import WeiqiTVIE -from .wimp import WimpIE -from .wistia import WistiaIE -from .worldstarhiphop import WorldStarHipHopIE -from .wrzuta import WrzutaIE -from .wsj import WSJIE -from .xbef import XBefIE -from .xboxclips import XboxClipsIE -from .xfileshare import XFileShareIE -from .xhamster import ( - XHamsterIE, - XHamsterEmbedIE, -) -from .xminus import XMinusIE -from .xnxx import XNXXIE -from .xstream import XstreamIE -from .xtube import XTubeUserIE, XTubeIE -from .xuite import XuiteIE -from .xvideos import XVideosIE -from .xxxymovies import XXXYMoviesIE -from .yahoo import ( - YahooIE, - YahooSearchIE, -) -from .yam import YamIE -from .yandexmusic import ( - YandexMusicTrackIE, - YandexMusicAlbumIE, - YandexMusicPlaylistIE, -) -from .yesjapan import YesJapanIE -from .yinyuetai import YinYueTaiIE -from .ynet import YnetIE -from .youjizz import YouJizzIE -from .youku import YoukuIE -from .youporn import YouPornIE -from .yourupload import YourUploadIE -from .youtube import ( - YoutubeIE, - YoutubeChannelIE, - YoutubeFavouritesIE, - YoutubeHistoryIE, - YoutubeLiveIE, - YoutubePlaylistIE, - YoutubePlaylistsIE, - YoutubeRecommendedIE, - YoutubeSearchDateIE, - YoutubeSearchIE, - YoutubeSearchURLIE, - YoutubeShowIE, - YoutubeSubscriptionsIE, - YoutubeTruncatedIDIE, - YoutubeTruncatedURLIE, - YoutubeUserIE, - YoutubeWatchLaterIE, -) -from .zapiks import ZapiksIE -from .zdf import ZDFIE, ZDFChannelIE -from .zingmp3 import ( - ZingMp3SongIE, - ZingMp3AlbumIE, -) -from .zippcast import ZippCastIE +from .extractors import * _ALL_CLASSES = [ klass diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py new file mode 100644 index 000000000..de29c7956 --- /dev/null +++ b/youtube_dl/extractor/extractors.py @@ -0,0 +1,991 @@ +# flake8: noqa +from __future__ import unicode_literals + +from .abc import ABCIE +from .abc7news import Abc7NewsIE +from .academicearth import AcademicEarthCourseIE +from .acast import ( + ACastIE, + ACastChannelIE, +) +from .addanime import AddAnimeIE +from .adobetv import ( + AdobeTVIE, + AdobeTVShowIE, + AdobeTVChannelIE, + AdobeTVVideoIE, +) +from .adultswim import AdultSwimIE +from .aenetworks import AENetworksIE +from .aftonbladet import AftonbladetIE +from .airmozilla import AirMozillaIE +from .aljazeera import AlJazeeraIE +from .alphaporno import AlphaPornoIE +from .animeondemand import AnimeOnDemandIE +from .anitube import AnitubeIE +from .anysex import AnySexIE +from .aol import ( + AolIE, + AolFeaturesIE, +) +from .allocine import AllocineIE +from .aparat import AparatIE +from .appleconnect import AppleConnectIE +from .appletrailers import ( + AppleTrailersIE, + AppleTrailersSectionIE, +) +from .archiveorg import ArchiveOrgIE +from .ard import ( + ARDIE, + ARDMediathekIE, + SportschauIE, +) +from .arte import ( + ArteTvIE, + ArteTVPlus7IE, + ArteTVCreativeIE, + ArteTVConcertIE, + ArteTVFutureIE, + ArteTVCinemaIE, + ArteTVDDCIE, + ArteTVMagazineIE, + ArteTVEmbedIE, +) +from .atresplayer import AtresPlayerIE +from .atttechchannel import ATTTechChannelIE +from .audimedia import AudiMediaIE +from .audioboom import AudioBoomIE +from .audiomack import AudiomackIE, AudiomackAlbumIE +from .azubu import AzubuIE, AzubuLiveIE +from .baidu import BaiduVideoIE +from .bambuser import BambuserIE, BambuserChannelIE +from .bandcamp import BandcampIE, BandcampAlbumIE +from .bbc import ( + BBCCoUkIE, + BBCCoUkArticleIE, + BBCIE, +) +from .beeg import BeegIE +from .behindkink import BehindKinkIE +from .beatportpro import BeatportProIE +from .bet import BetIE +from .bigflix import BigflixIE +from .bild import BildIE +from .bilibili import BiliBiliIE +from .biobiochiletv import BioBioChileTVIE +from .bleacherreport import ( + BleacherReportIE, + BleacherReportCMSIE, +) +from .blinkx import BlinkxIE +from .bloomberg import BloombergIE +from .bokecc import BokeCCIE +from .bpb import BpbIE +from .br import BRIE +from .bravotv import BravoTVIE +from .breakcom import BreakIE +from .brightcove import ( + BrightcoveLegacyIE, + BrightcoveNewIE, +) +from .buzzfeed import BuzzFeedIE +from .byutv import BYUtvIE +from .c56 import C56IE +from .camdemy import ( + CamdemyIE, + CamdemyFolderIE +) +from .camwithher import CamWithHerIE +from .canalplus import CanalplusIE +from .canalc2 import Canalc2IE +from .canvas import CanvasIE +from .cbc import ( + CBCIE, + CBCPlayerIE, +) +from .cbs import CBSIE +from .cbsinteractive import CBSInteractiveIE +from .cbsnews import ( + CBSNewsIE, + CBSNewsLiveVideoIE, +) +from .cbssports import CBSSportsIE +from .ccc import CCCIE +from .cda import CDAIE +from .ceskatelevize import CeskaTelevizeIE +from .channel9 import Channel9IE +from .chaturbate import ChaturbateIE +from .chilloutzone import ChilloutzoneIE +from .chirbit import ( + ChirbitIE, + ChirbitProfileIE, +) +from .cinchcast import CinchcastIE +from .cinemassacre import CinemassacreIE +from .clipfish import ClipfishIE +from .cliphunter import CliphunterIE +from .clipsyndicate import ClipsyndicateIE +from .cloudy import CloudyIE +from .clubic import ClubicIE +from .clyp import ClypIE +from .cmt import CMTIE +from .cnbc import CNBCIE +from .cnn import ( + CNNIE, + CNNBlogsIE, + CNNArticleIE, +) +from .collegehumor import CollegeHumorIE +from .collegerama import CollegeRamaIE +from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE +from .comcarcoff import ComCarCoffIE +from .commonmistakes import CommonMistakesIE, UnicodeBOMIE +from .commonprotocols import RtmpIE +from .condenast import CondeNastIE +from .cracked import CrackedIE +from .crackle import CrackleIE +from .criterion import CriterionIE +from .crooksandliars import CrooksAndLiarsIE +from .crunchyroll import ( + CrunchyrollIE, + CrunchyrollShowPlaylistIE +) +from .cspan import CSpanIE +from .ctsnews import CtsNewsIE +from .cultureunplugged import CultureUnpluggedIE +from .cwtv import CWTVIE +from .dailymotion import ( + DailymotionIE, + DailymotionPlaylistIE, + DailymotionUserIE, + DailymotionCloudIE, +) +from .daum import ( + DaumIE, + DaumClipIE, + DaumPlaylistIE, + DaumUserIE, +) +from .dbtv import DBTVIE +from .dcn import ( + DCNIE, + DCNVideoIE, + DCNLiveIE, + DCNSeasonIE, +) +from .dctp import DctpTvIE +from .deezer import DeezerPlaylistIE +from .democracynow import DemocracynowIE +from .dfb import DFBIE +from .dhm import DHMIE +from .dotsub import DotsubIE +from .douyutv import DouyuTVIE +from .dplay import DPlayIE +from .dramafever import ( + DramaFeverIE, + DramaFeverSeriesIE, +) +from .dreisat import DreiSatIE +from .drbonanza import DRBonanzaIE +from .drtuber import DrTuberIE +from .drtv import DRTVIE +from .dvtv import DVTVIE +from .dump import DumpIE +from .dumpert import DumpertIE +from .defense import DefenseGouvFrIE +from .discovery import DiscoveryIE +from .dropbox import DropboxIE +from .dw import ( + DWIE, + DWArticleIE, +) +from .eagleplatform import EaglePlatformIE +from .ebaumsworld import EbaumsWorldIE +from .echomsk import EchoMskIE +from .ehow import EHowIE +from .eighttracks import EightTracksIE +from .einthusan import EinthusanIE +from .eitb import EitbIE +from .ellentv import ( + EllenTVIE, + EllenTVClipsIE, +) +from .elpais import ElPaisIE +from .embedly import EmbedlyIE +from .engadget import EngadgetIE +from .eporner import EpornerIE +from .eroprofile import EroProfileIE +from .escapist import EscapistIE +from .espn import ESPNIE +from .esri import EsriVideoIE +from .europa import EuropaIE +from .everyonesmixtape import EveryonesMixtapeIE +from .exfm import ExfmIE +from .expotv import ExpoTVIE +from .extremetube import ExtremeTubeIE +from .facebook import FacebookIE +from .faz import FazIE +from .fc2 import FC2IE +from .fczenit import FczenitIE +from .firstpost import FirstpostIE +from .firsttv import FirstTVIE +from .fivemin import FiveMinIE +from .fivetv import FiveTVIE +from .fktv import FKTVIE +from .flickr import FlickrIE +from .folketinget import FolketingetIE +from .footyroom import FootyRoomIE +from .fourtube import FourTubeIE +from .fox import FOXIE +from .foxgay import FoxgayIE +from .foxnews import FoxNewsIE +from .foxsports import FoxSportsIE +from .franceculture import ( + FranceCultureIE, + FranceCultureEmissionIE, +) +from .franceinter import FranceInterIE +from .francetv import ( + PluzzIE, + FranceTvInfoIE, + FranceTVIE, + GenerationQuoiIE, + CultureboxIE, +) +from .freesound import FreesoundIE +from .freespeech import FreespeechIE +from .freevideo import FreeVideoIE +from .funimation import FunimationIE +from .funnyordie import FunnyOrDieIE +from .gameinformer import GameInformerIE +from .gamekings import GamekingsIE +from .gameone import ( + GameOneIE, + GameOnePlaylistIE, +) +from .gamersyde import GamersydeIE +from .gamespot import GameSpotIE +from .gamestar import GameStarIE +from .gametrailers import GametrailersIE +from .gazeta import GazetaIE +from .gdcvault import GDCVaultIE +from .generic import GenericIE +from .gfycat import GfycatIE +from .giantbomb import GiantBombIE +from .giga import GigaIE +from .glide import GlideIE +from .globo import ( + GloboIE, + GloboArticleIE, +) +from .godtube import GodTubeIE +from .goldenmoustache import GoldenMoustacheIE +from .golem import GolemIE +from .googledrive import GoogleDriveIE +from .googleplus import GooglePlusIE +from .googlesearch import GoogleSearchIE +from .goshgay import GoshgayIE +from .gputechconf import GPUTechConfIE +from .groupon import GrouponIE +from .hark import HarkIE +from .hbo import HBOIE +from .hearthisat import HearThisAtIE +from .heise import HeiseIE +from .hellporno import HellPornoIE +from .helsinki import HelsinkiIE +from .hentaistigma import HentaiStigmaIE +from .historicfilms import HistoricFilmsIE +from .hitbox import HitboxIE, HitboxLiveIE +from .hornbunny import HornBunnyIE +from .hotnewhiphop import HotNewHipHopIE +from .hotstar import HotStarIE +from .howcast import HowcastIE +from .howstuffworks import HowStuffWorksIE +from .huffpost import HuffPostIE +from .hypem import HypemIE +from .iconosquare import IconosquareIE +from .ign import ( + IGNIE, + OneUPIE, + PCMagIE, +) +from .imdb import ( + ImdbIE, + ImdbListIE +) +from .imgur import ( + ImgurIE, + ImgurAlbumIE, +) +from .ina import InaIE +from .indavideo import ( + IndavideoIE, + IndavideoEmbedIE, +) +from .infoq import InfoQIE +from .instagram import InstagramIE, InstagramUserIE +from .internetvideoarchive import InternetVideoArchiveIE +from .iprima import IPrimaIE +from .iqiyi import IqiyiIE +from .ir90tv import Ir90TvIE +from .ivi import ( + IviIE, + IviCompilationIE +) +from .ivideon import IvideonIE +from .izlesene import IzleseneIE +from .jadorecettepub import JadoreCettePubIE +from .jeuxvideo import JeuxVideoIE +from .jove import JoveIE +from .jwplatform import JWPlatformIE +from .jpopsukitv import JpopsukiIE +from .kaltura import KalturaIE +from .kanalplay import KanalPlayIE +from .kankan import KankanIE +from .karaoketv import KaraoketvIE +from .karrierevideos import KarriereVideosIE +from .keezmovies import KeezMoviesIE +from .khanacademy import KhanAcademyIE +from .kickstarter import KickStarterIE +from .keek import KeekIE +from .konserthusetplay import KonserthusetPlayIE +from .kontrtube import KontrTubeIE +from .krasview import KrasViewIE +from .ku6 import Ku6IE +from .kusi import KUSIIE +from .kuwo import ( + KuwoIE, + KuwoAlbumIE, + KuwoChartIE, + KuwoSingerIE, + KuwoCategoryIE, + KuwoMvIE, +) +from .la7 import LA7IE +from .laola1tv import Laola1TvIE +from .lecture2go import Lecture2GoIE +from .lemonde import LemondeIE +from .leeco import ( + LeIE, + LePlaylistIE, + LetvCloudIE, +) +from .libsyn import LibsynIE +from .lifenews import ( + LifeNewsIE, + LifeEmbedIE, +) +from .limelight import ( + LimelightMediaIE, + LimelightChannelIE, + LimelightChannelListIE, +) +from .liveleak import LiveLeakIE +from .livestream import ( + LivestreamIE, + LivestreamOriginalIE, + LivestreamShortenerIE, +) +from .lnkgo import LnkGoIE +from .lovehomeporn import LoveHomePornIE +from .lrt import LRTIE +from .lynda import ( + LyndaIE, + LyndaCourseIE +) +from .m6 import M6IE +from .macgamestore import MacGameStoreIE +from .mailru import MailRuIE +from .makerschannel import MakersChannelIE +from .makertv import MakerTVIE +from .malemotion import MalemotionIE +from .matchtv import MatchTVIE +from .mdr import MDRIE +from .metacafe import MetacafeIE +from .metacritic import MetacriticIE +from .mgoon import MgoonIE +from .minhateca import MinhatecaIE +from .ministrygrid import MinistryGridIE +from .minoto import MinotoIE +from .miomio import MioMioIE +from .mit import TechTVMITIE, MITIE, OCWMITIE +from .mitele import MiTeleIE +from .mixcloud import MixcloudIE +from .mlb import MLBIE +from .mnet import MnetIE +from .mpora import MporaIE +from .moevideo import MoeVideoIE +from .mofosex import MofosexIE +from .mojvideo import MojvideoIE +from .moniker import MonikerIE +from .mooshare import MooshareIE +from .morningstar import MorningstarIE +from .motherless import MotherlessIE +from .motorsport import MotorsportIE +from .movieclips import MovieClipsIE +from .moviezine import MoviezineIE +from .mtv import ( + MTVIE, + MTVServicesEmbeddedIE, + MTVIggyIE, + MTVDEIE, +) +from .muenchentv import MuenchenTVIE +from .musicplayon import MusicPlayOnIE +from .muzu import MuzuTVIE +from .mwave import MwaveIE +from .myspace import MySpaceIE, MySpaceAlbumIE +from .myspass import MySpassIE +from .myvi import MyviIE +from .myvideo import MyVideoIE +from .myvidster import MyVidsterIE +from .nationalgeographic import ( + NationalGeographicIE, + NationalGeographicChannelIE, +) +from .naver import NaverIE +from .nba import NBAIE +from .nbc import ( + CSNNEIE, + NBCIE, + NBCNewsIE, + NBCSportsIE, + NBCSportsVPlayerIE, + MSNBCIE, +) +from .ndr import ( + NDRIE, + NJoyIE, + NDREmbedBaseIE, + NDREmbedIE, + NJoyEmbedIE, +) +from .ndtv import NDTVIE +from .netzkino import NetzkinoIE +from .nerdcubed import NerdCubedFeedIE +from .nerdist import NerdistIE +from .neteasemusic import ( + NetEaseMusicIE, + NetEaseMusicAlbumIE, + NetEaseMusicSingerIE, + NetEaseMusicListIE, + NetEaseMusicMvIE, + NetEaseMusicProgramIE, + NetEaseMusicDjRadioIE, +) +from .newgrounds import NewgroundsIE +from .newstube import NewstubeIE +from .nextmedia import ( + NextMediaIE, + NextMediaActionNewsIE, + AppleDailyIE, +) +from .nextmovie import NextMovieIE +from .nfb import NFBIE +from .nfl import NFLIE +from .nhl import ( + NHLIE, + NHLNewsIE, + NHLVideocenterIE, +) +from .nick import NickIE +from .niconico import NiconicoIE, NiconicoPlaylistIE +from .ninegag import NineGagIE +from .noco import NocoIE +from .normalboots import NormalbootsIE +from .nosvideo import NosVideoIE +from .nova import NovaIE +from .novamov import ( + AuroraVidIE, + CloudTimeIE, + NowVideoIE, + VideoWeedIE, + WholeCloudIE, +) +from .nowness import ( + NownessIE, + NownessPlaylistIE, + NownessSeriesIE, +) +from .nowtv import ( + NowTVIE, + NowTVListIE, +) +from .noz import NozIE +from .npo import ( + NPOIE, + NPOLiveIE, + NPORadioIE, + NPORadioFragmentIE, + SchoolTVIE, + VPROIE, + WNLIE +) +from .npr import NprIE +from .nrk import ( + NRKIE, + NRKPlaylistIE, + NRKSkoleIE, + NRKTVIE, +) +from .ntvde import NTVDeIE +from .ntvru import NTVRuIE +from .nytimes import ( + NYTimesIE, + NYTimesArticleIE, +) +from .nuvid import NuvidIE +from .odnoklassniki import OdnoklassnikiIE +from .oktoberfesttv import OktoberfestTVIE +from .onionstudios import OnionStudiosIE +from .ooyala import ( + OoyalaIE, + OoyalaExternalIE, +) +from .openload import OpenloadIE +from .ora import OraTVIE +from .orf import ( + ORFTVthekIE, + ORFOE1IE, + ORFFM4IE, + ORFIPTVIE, +) +from .pandoratv import PandoraTVIE +from .parliamentliveuk import ParliamentLiveUKIE +from .patreon import PatreonIE +from .pbs import PBSIE +from .periscope import PeriscopeIE +from .philharmoniedeparis import PhilharmonieDeParisIE +from .phoenix import PhoenixIE +from .photobucket import PhotobucketIE +from .pinkbike import PinkbikeIE +from .planetaplay import PlanetaPlayIE +from .pladform import PladformIE +from .played import PlayedIE +from .playfm import PlayFMIE +from .plays import PlaysTVIE +from .playtvak import PlaytvakIE +from .playvid import PlayvidIE +from .playwire import PlaywireIE +from .pluralsight import ( + PluralsightIE, + PluralsightCourseIE, +) +from .podomatic import PodomaticIE +from .porn91 import Porn91IE +from .pornhd import PornHdIE +from .pornhub import ( + PornHubIE, + PornHubPlaylistIE, + PornHubUserVideosIE, +) +from .pornotube import PornotubeIE +from .pornovoisines import PornoVoisinesIE +from .pornoxo import PornoXOIE +from .primesharetv import PrimeShareTVIE +from .promptfile import PromptFileIE +from .prosiebensat1 import ProSiebenSat1IE +from .puls4 import Puls4IE +from .pyvideo import PyvideoIE +from .qqmusic import ( + QQMusicIE, + QQMusicSingerIE, + QQMusicAlbumIE, + QQMusicToplistIE, + QQMusicPlaylistIE, +) +from .quickvid import QuickVidIE +from .r7 import R7IE +from .radiode import RadioDeIE +from .radiojavan import RadioJavanIE +from .radiobremen import RadioBremenIE +from .radiofrance import RadioFranceIE +from .rai import ( + RaiTVIE, + RaiIE, +) +from .rbmaradio import RBMARadioIE +from .rds import RDSIE +from .redtube import RedTubeIE +from .regiotv import RegioTVIE +from .restudy import RestudyIE +from .reverbnation import ReverbNationIE +from .revision3 import Revision3IE +from .rice import RICEIE +from .ringtv import RingTVIE +from .ro220 import Ro220IE +from .rottentomatoes import RottenTomatoesIE +from .roxwel import RoxwelIE +from .rtbf import RTBFIE +from .rte import RteIE, RteRadioIE +from .rtlnl import RtlNlIE +from .rtl2 import RTL2IE +from .rtp import RTPIE +from .rts import RTSIE +from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE +from .rtvnh import RTVNHIE +from .ruhd import RUHDIE +from .ruleporn import RulePornIE +from .rutube import ( + RutubeIE, + RutubeChannelIE, + RutubeEmbedIE, + RutubeMovieIE, + RutubePersonIE, +) +from .rutv import RUTVIE +from .ruutu import RuutuIE +from .sandia import SandiaIE +from .safari import ( + SafariIE, + SafariApiIE, + SafariCourseIE, +) +from .sapo import SapoIE +from .savefrom import SaveFromIE +from .sbs import SBSIE +from .scivee import SciVeeIE +from .screencast import ScreencastIE +from .screencastomatic import ScreencastOMaticIE +from .screenjunkies import ScreenJunkiesIE +from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE +from .senateisvp import SenateISVPIE +from .servingsys import ServingSysIE +from .sexu import SexuIE +from .sexykarma import SexyKarmaIE +from .shahid import ShahidIE +from .shared import SharedIE +from .sharesix import ShareSixIE +from .sina import SinaIE +from .skynewsarabia import ( + SkyNewsArabiaIE, + SkyNewsArabiaArticleIE, +) +from .slideshare import SlideshareIE +from .slutload import SlutloadIE +from .smotri import ( + SmotriIE, + SmotriCommunityIE, + SmotriUserIE, + SmotriBroadcastIE, +) +from .snagfilms import ( + SnagFilmsIE, + SnagFilmsEmbedIE, +) +from .snotr import SnotrIE +from .sohu import SohuIE +from .soundcloud import ( + SoundcloudIE, + SoundcloudSetIE, + SoundcloudUserIE, + SoundcloudPlaylistIE, + SoundcloudSearchIE +) +from .soundgasm import ( + SoundgasmIE, + SoundgasmProfileIE +) +from .southpark import ( + SouthParkIE, + SouthParkDeIE, + SouthParkDkIE, + SouthParkEsIE, + SouthParkNlIE +) +from .spankbang import SpankBangIE +from .spankwire import SpankwireIE +from .spiegel import SpiegelIE, SpiegelArticleIE +from .spiegeltv import SpiegeltvIE +from .spike import SpikeIE +from .stitcher import StitcherIE +from .sport5 import Sport5IE +from .sportbox import ( + SportBoxIE, + SportBoxEmbedIE, +) +from .sportdeutschland import SportDeutschlandIE +from .srgssr import ( + SRGSSRIE, + SRGSSRPlayIE, +) +from .srmediathek import SRMediathekIE +from .ssa import SSAIE +from .stanfordoc import StanfordOpenClassroomIE +from .steam import SteamIE +from .streamcloud import StreamcloudIE +from .streamcz import StreamCZIE +from .streetvoice import StreetVoiceIE +from .sunporno import SunPornoIE +from .svt import ( + SVTIE, + SVTPlayIE, +) +from .swrmediathek import SWRMediathekIE +from .syfy import SyfyIE +from .sztvhu import SztvHuIE +from .tagesschau import TagesschauIE +from .tapely import TapelyIE +from .tass import TassIE +from .teachertube import ( + TeacherTubeIE, + TeacherTubeUserIE, +) +from .teachingchannel import TeachingChannelIE +from .teamcoco import TeamcocoIE +from .techtalks import TechTalksIE +from .ted import TEDIE +from .tele13 import Tele13IE +from .telebruxelles import TeleBruxellesIE +from .telecinco import TelecincoIE +from .telegraaf import TelegraafIE +from .telemb import TeleMBIE +from .teletask import TeleTaskIE +from .testurl import TestURLIE +from .tf1 import TF1IE +from .theintercept import TheInterceptIE +from .theonion import TheOnionIE +from .theplatform import ( + ThePlatformIE, + ThePlatformFeedIE, +) +from .thescene import TheSceneIE +from .thesixtyone import TheSixtyOneIE +from .thestar import TheStarIE +from .thisamericanlife import ThisAmericanLifeIE +from .thisav import ThisAVIE +from .tinypic import TinyPicIE +from .tlc import TlcDeIE +from .tmz import ( + TMZIE, + TMZArticleIE, +) +from .tnaflix import ( + TNAFlixNetworkEmbedIE, + TNAFlixIE, + EMPFlixIE, + MovieFapIE, +) +from .toggle import ToggleIE +from .thvideo import ( + THVideoIE, + THVideoPlaylistIE +) +from .toutv import TouTvIE +from .toypics import ToypicsUserIE, ToypicsIE +from .traileraddict import TrailerAddictIE +from .trilulilu import TriluliluIE +from .trollvids import TrollvidsIE +from .trutube import TruTubeIE +from .tube8 import Tube8IE +from .tubitv import TubiTvIE +from .tudou import ( + TudouIE, + TudouPlaylistIE, + TudouAlbumIE, +) +from .tumblr import TumblrIE +from .tunein import ( + TuneInClipIE, + TuneInStationIE, + TuneInProgramIE, + TuneInTopicIE, + TuneInShortenerIE, +) +from .turbo import TurboIE +from .tutv import TutvIE +from .tv2 import ( + TV2IE, + TV2ArticleIE, +) +from .tv3 import TV3IE +from .tv4 import TV4IE +from .tvc import ( + TVCIE, + TVCArticleIE, +) +from .tvigle import TvigleIE +from .tvland import TVLandIE +from .tvp import TvpIE, TvpSeriesIE +from .tvplay import TVPlayIE +from .tweakers import TweakersIE +from .twentyfourvideo import TwentyFourVideoIE +from .twentymin import TwentyMinutenIE +from .twentytwotracks import ( + TwentyTwoTracksIE, + TwentyTwoTracksGenreIE +) +from .twitch import ( + TwitchVideoIE, + TwitchChapterIE, + TwitchVodIE, + TwitchProfileIE, + TwitchPastBroadcastsIE, + TwitchBookmarksIE, + TwitchStreamIE, +) +from .twitter import ( + TwitterCardIE, + TwitterIE, + TwitterAmplifyIE, +) +from .ubu import UbuIE +from .udemy import ( + UdemyIE, + UdemyCourseIE +) +from .udn import UDNEmbedIE +from .digiteka import DigitekaIE +from .unistra import UnistraIE +from .urort import UrortIE +from .usatoday import USATodayIE +from .ustream import UstreamIE, UstreamChannelIE +from .ustudio import UstudioIE +from .varzesh3 import Varzesh3IE +from .vbox7 import Vbox7IE +from .veehd import VeeHDIE +from .veoh import VeohIE +from .vessel import VesselIE +from .vesti import VestiIE +from .vevo import VevoIE +from .vgtv import ( + BTArticleIE, + BTVestlendingenIE, + VGTVIE, +) +from .vh1 import VH1IE +from .vice import ( + ViceIE, + ViceShowIE, +) +from .viddler import ViddlerIE +from .videodetective import VideoDetectiveIE +from .videofyme import VideofyMeIE +from .videomega import VideoMegaIE +from .videomore import ( + VideomoreIE, + VideomoreVideoIE, + VideomoreSeasonIE, +) +from .videopremium import VideoPremiumIE +from .videott import VideoTtIE +from .vidme import ( + VidmeIE, + VidmeUserIE, + VidmeUserLikesIE, +) +from .vidzi import VidziIE +from .vier import VierIE, VierVideosIE +from .viewster import ViewsterIE +from .viidea import ViideaIE +from .vimeo import ( + VimeoIE, + VimeoAlbumIE, + VimeoChannelIE, + VimeoGroupsIE, + VimeoLikesIE, + VimeoOndemandIE, + VimeoReviewIE, + VimeoUserIE, + VimeoWatchLaterIE, +) +from .vimple import VimpleIE +from .vine import ( + VineIE, + VineUserIE, +) +from .viki import ( + VikiIE, + VikiChannelIE, +) +from .vk import ( + VKIE, + VKUserVideosIE, +) +from .vlive import VLiveIE +from .vodlocker import VodlockerIE +from .voicerepublic import VoiceRepublicIE +from .voxmedia import VoxMediaIE +from .vporn import VpornIE +from .vrt import VRTIE +from .vube import VubeIE +from .vuclip import VuClipIE +from .vulture import VultureIE +from .walla import WallaIE +from .washingtonpost import WashingtonPostIE +from .wat import WatIE +from .wayofthemaster import WayOfTheMasterIE +from .wdr import ( + WDRIE, + WDRMobileIE, + WDRMausIE, +) +from .webofstories import ( + WebOfStoriesIE, + WebOfStoriesPlaylistIE, +) +from .weibo import WeiboIE +from .weiqitv import WeiqiTVIE +from .wimp import WimpIE +from .wistia import WistiaIE +from .worldstarhiphop import WorldStarHipHopIE +from .wrzuta import WrzutaIE +from .wsj import WSJIE +from .xbef import XBefIE +from .xboxclips import XboxClipsIE +from .xfileshare import XFileShareIE +from .xhamster import ( + XHamsterIE, + XHamsterEmbedIE, +) +from .xminus import XMinusIE +from .xnxx import XNXXIE +from .xstream import XstreamIE +from .xtube import XTubeUserIE, XTubeIE +from .xuite import XuiteIE +from .xvideos import XVideosIE +from .xxxymovies import XXXYMoviesIE +from .yahoo import ( + YahooIE, + YahooSearchIE, +) +from .yam import YamIE +from .yandexmusic import ( + YandexMusicTrackIE, + YandexMusicAlbumIE, + YandexMusicPlaylistIE, +) +from .yesjapan import YesJapanIE +from .yinyuetai import YinYueTaiIE +from .ynet import YnetIE +from .youjizz import YouJizzIE +from .youku import YoukuIE +from .youporn import YouPornIE +from .yourupload import YourUploadIE +from .youtube import ( + YoutubeIE, + YoutubeChannelIE, + YoutubeFavouritesIE, + YoutubeHistoryIE, + YoutubeLiveIE, + YoutubePlaylistIE, + YoutubePlaylistsIE, + YoutubeRecommendedIE, + YoutubeSearchDateIE, + YoutubeSearchIE, + YoutubeSearchURLIE, + YoutubeShowIE, + YoutubeSubscriptionsIE, + YoutubeTruncatedIDIE, + YoutubeTruncatedURLIE, + YoutubeUserIE, + YoutubeWatchLaterIE, +) +from .zapiks import ZapiksIE +from .zdf import ZDFIE, ZDFChannelIE +from .zingmp3 import ( + ZingMp3SongIE, + ZingMp3AlbumIE, +) +from .zippcast import ZippCastIE From 779822d945dc7ebba7062ac9a5e760d21a7f362a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 10 Feb 2016 14:01:31 +0100 Subject: [PATCH 0148/3599] Add experimental support for lazy loading the info extractors 'make lazy-extractors' creates the youtube_dl/extractor/lazy_extractors.py (imported by youtube_dl/extractor/__init__.py), which contains simplified classes that only have the 'suitable' class method and that load the appropiate class with the '__new__' method when a instance is created. --- .gitignore | 1 + Makefile | 8 +++- devscripts/lazy_load_template.py | 17 ++++++++ devscripts/make_lazy_extractors.py | 63 ++++++++++++++++++++++++++++++ youtube_dl/extractor/__init__.py | 18 +++++---- 5 files changed, 99 insertions(+), 8 deletions(-) create mode 100644 devscripts/lazy_load_template.py create mode 100644 devscripts/make_lazy_extractors.py diff --git a/.gitignore b/.gitignore index 26dbde73d..72c10425d 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.fish +youtube_dl/extractor/lazy_extractors.py youtube-dl youtube-dl.exe youtube-dl.tar.gz diff --git a/Makefile b/Makefile index ba7f7ed36..06cffcb71 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete @@ -88,6 +88,12 @@ youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in fish-completion: youtube-dl.fish +lazy-extractors: youtube_dl/extractor/lazy_extractors.py + +_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' +youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) + $(PYTHON) devscripts/make_lazy_extractors.py $@ + youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ --exclude '*.DS_Store' \ diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py new file mode 100644 index 000000000..ae2bd2701 --- /dev/null +++ b/devscripts/lazy_load_template.py @@ -0,0 +1,17 @@ +# flake8: noqa +from __future__ import unicode_literals + +import re + + +class LazyLoadExtractor(object): + _module = None + + @classmethod + def ie_key(cls): + return cls.__name__[:-2] + + def __new__(cls): + mod = __import__(cls._module, fromlist=(cls.__name__,)) + real_cls = getattr(mod, cls.__name__) + return real_cls.__new__(real_cls) diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py new file mode 100644 index 000000000..8627d0b1c --- /dev/null +++ b/devscripts/make_lazy_extractors.py @@ -0,0 +1,63 @@ +from __future__ import unicode_literals, print_function + +from inspect import getsource +import os +from os.path import dirname as dirn +import sys + +print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr) + +sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) + +lazy_extractors_filename = sys.argv[1] +if os.path.exists(lazy_extractors_filename): + os.remove(lazy_extractors_filename) + +from youtube_dl.extractor import _ALL_CLASSES +from youtube_dl.extractor.common import InfoExtractor + +with open('devscripts/lazy_load_template.py', 'rt') as f: + module_template = f.read() + +module_contents = [module_template + '\n' + getsource(InfoExtractor.suitable)] + +ie_template = ''' +class {name}(LazyLoadExtractor): + _VALID_URL = {valid_url!r} + _module = '{module}' +''' + +make_valid_template = ''' + @classmethod + def _make_valid_url(cls): + return {!r} +''' + + +def build_lazy_ie(ie, name): + valid_url = getattr(ie, '_VALID_URL', None) + s = ie_template.format( + name=name, + valid_url=valid_url, + module=ie.__module__) + if ie.suitable.__func__ is not InfoExtractor.suitable.__func__: + s += getsource(ie.suitable) + if hasattr(ie, '_make_valid_url'): + # search extractors + s += make_valid_template.format(ie._make_valid_url()) + return s + +names = [] +for ie in _ALL_CLASSES: + name = ie.ie_key() + 'IE' + src = build_lazy_ie(ie, name) + module_contents.append(src) + names.append(name) + +module_contents.append( + '_ALL_CLASSES = [{}]'.format(', '.join(names))) + +module_src = '\n'.join(module_contents) + +with open(lazy_extractors_filename, 'wt') as f: + f.write(module_src) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a0a53445a..b0d4d156b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -1,13 +1,17 @@ from __future__ import unicode_literals -from .extractors import * +try: + from .lazy_extractors import * + from .lazy_extractors import _ALL_CLASSES +except ImportError: + from .extractors import * -_ALL_CLASSES = [ - klass - for name, klass in globals().items() - if name.endswith('IE') and name != 'GenericIE' -] -_ALL_CLASSES.append(GenericIE) + _ALL_CLASSES = [ + klass + for name, klass in globals().items() + if name.endswith('IE') and name != 'GenericIE' + ] + _ALL_CLASSES.append(GenericIE) def gen_extractor_classes(): From 0d778b1db909c8d096be4e199384fff96a722fc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 11 Feb 2016 14:49:02 +0100 Subject: [PATCH 0149/3599] lazy extractors: specify the encoding When building with python3 the unicode characters are not escaped, python2 needs to know the encoding. --- devscripts/lazy_load_template.py | 1 + 1 file changed, 1 insertion(+) diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index ae2bd2701..563d629f8 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -1,3 +1,4 @@ +# encoding: utf-8 # flake8: noqa from __future__ import unicode_literals From c1ce6acdd73da7744f4bbe27698e96275467e14d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 21 Feb 2016 11:53:48 +0100 Subject: [PATCH 0150/3599] lazy extractors: Fix building with python2.6 --- devscripts/make_lazy_extractors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 8627d0b1c..5d0ddb401 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -30,7 +30,7 @@ class {name}(LazyLoadExtractor): make_valid_template = ''' @classmethod def _make_valid_url(cls): - return {!r} + return {valid_url!r} ''' @@ -44,7 +44,7 @@ def build_lazy_ie(ie, name): s += getsource(ie.suitable) if hasattr(ie, '_make_valid_url'): # search extractors - s += make_valid_template.format(ie._make_valid_url()) + s += make_valid_template.format(valid_url=ie._make_valid_url()) return s names = [] @@ -55,7 +55,7 @@ for ie in _ALL_CLASSES: names.append(name) module_contents.append( - '_ALL_CLASSES = [{}]'.format(', '.join(names))) + '_ALL_CLASSES = [{0}]'.format(', '.join(names))) module_src = '\n'.join(module_contents) From 6b97ca96fc242c1d7639d080e2c8e3ee9f9d0bed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 21 Feb 2016 12:22:12 +0100 Subject: [PATCH 0151/3599] lazy extractors: Style fixes * Sort extractors alphabetically * Add newlines when needed (youtube_dl/extractors/lazy_extractors.py pass the flake8 test now) --- devscripts/lazy_load_template.py | 1 - devscripts/make_lazy_extractors.py | 6 +++--- setup.cfg | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index 563d629f8..b984aab9b 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -1,5 +1,4 @@ # encoding: utf-8 -# flake8: noqa from __future__ import unicode_literals import re diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 5d0ddb401..b5a8b9190 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -41,14 +41,14 @@ def build_lazy_ie(ie, name): valid_url=valid_url, module=ie.__module__) if ie.suitable.__func__ is not InfoExtractor.suitable.__func__: - s += getsource(ie.suitable) + s += '\n' + getsource(ie.suitable) if hasattr(ie, '_make_valid_url'): # search extractors s += make_valid_template.format(valid_url=ie._make_valid_url()) return s names = [] -for ie in _ALL_CLASSES: +for ie in list(sorted(_ALL_CLASSES[:-1], key=lambda cls: cls.ie_key())) + _ALL_CLASSES[-1:]: name = ie.ie_key() + 'IE' src = build_lazy_ie(ie, name) module_contents.append(src) @@ -57,7 +57,7 @@ for ie in _ALL_CLASSES: module_contents.append( '_ALL_CLASSES = [{0}]'.format(', '.join(names))) -module_src = '\n'.join(module_contents) +module_src = '\n'.join(module_contents) + '\n' with open(lazy_extractors_filename, 'wt') as f: f.write(module_src) diff --git a/setup.cfg b/setup.cfg index 5760112d4..2dc06ffe4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,5 +2,5 @@ universal = True [flake8] -exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/make_issue_template.py,setup.py,build,.git +exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git ignore = E402,E501,E731 From e0986e31cfd57392aaf3cc84b17fbf32c6134ff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 21 Feb 2016 12:28:58 +0100 Subject: [PATCH 0152/3599] lazy extractors: Output if it's enabled in the verbose log --- youtube_dl/YoutubeDL.py | 4 +++- youtube_dl/extractor/__init__.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f18a8e840..a89a71a25 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -82,7 +82,7 @@ from .utils import ( YoutubeDLHandler, ) from .cache import Cache -from .extractor import get_info_extractor, gen_extractor_classes +from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( @@ -1959,6 +1959,8 @@ class YoutubeDL(object): write_string(encoding_str, encoding=None) self._write_string('[debug] youtube-dl version ' + __version__ + '\n') + if _LAZY_LOADER: + self._write_string('[debug] Lazy loading extractors enabled' + '\n') try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b0d4d156b..18d8dbcd6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -3,7 +3,9 @@ from __future__ import unicode_literals try: from .lazy_extractors import * from .lazy_extractors import _ALL_CLASSES + _LAZY_LOADER = True except ImportError: + _LAZY_LOADER = False from .extractors import * _ALL_CLASSES = [ From 8a5dc1c1e14cc19f143c84702f4bbc29e4f91e47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 21 Feb 2016 12:46:14 +0100 Subject: [PATCH 0153/3599] lazy extractors: Initialize the real info extractor According to the docs '__init__' is only called automatically if '__new__' returns an instance of the original class. --- devscripts/lazy_load_template.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index b984aab9b..2e6e6641b 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -11,7 +11,9 @@ class LazyLoadExtractor(object): def ie_key(cls): return cls.__name__[:-2] - def __new__(cls): + def __new__(cls, *args, **kwargs): mod = __import__(cls._module, fromlist=(cls.__name__,)) real_cls = getattr(mod, cls.__name__) - return real_cls.__new__(real_cls) + instance = real_cls.__new__(real_cls) + instance.__init__(*args, **kwargs) + return instance From 5a9858bfa9aba01c9dec549b83f5a0b17a520f13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 6 Mar 2016 19:36:39 +0100 Subject: [PATCH 0154/3599] setup.py: add command for building the lazy_extractors module --- setup.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index bfe931f5b..9444d403d 100644 --- a/setup.py +++ b/setup.py @@ -8,11 +8,12 @@ import warnings import sys try: - from setuptools import setup + from setuptools import setup, Command setuptools_available = True except ImportError: - from distutils.core import setup + from distutils.core import setup, Command setuptools_available = False +from distutils.spawn import spawn try: # This will create an exe that needs Microsoft Visual C++ 2008 @@ -70,6 +71,22 @@ else: else: params['scripts'] = ['bin/youtube-dl'] +class build_lazy_extractors(Command): + description = "Build the extractor lazy loading module" + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + spawn( + [sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], + dry_run=self.dry_run, + ) + # Get the version from youtube_dl/version.py without importing the package exec(compile(open('youtube_dl/version.py').read(), 'youtube_dl/version.py', 'exec')) @@ -107,5 +124,6 @@ setup( "Programming Language :: Python :: 3.4", ], + cmdclass={'build_lazy_extractors': build_lazy_extractors}, **params ) From bffb245a4882b10b5e66015fa89ef1cadf974415 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 9 Apr 2016 10:47:46 +0100 Subject: [PATCH 0155/3599] [aol] add support for videos with vidible IDs(closes #9124) --- youtube_dl/extractor/aol.py | 78 +++++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index 95a99c6b0..b729157d2 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -1,11 +1,17 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, +) class AolIE(InfoExtractor): IE_NAME = 'on.aol.com' - _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P[0-9]+)(?:$|\?)' + _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P[^/?-]+)' _TESTS = [{ 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', @@ -14,13 +20,79 @@ class AolIE(InfoExtractor): 'id': '518167793', 'ext': 'mp4', 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam', + 'description': 'A major phone scam has cost thousands of taxpayers more than $1 million, with less than a month until income tax returns are due to the IRS.', + 'timestamp': 1395405060, + 'upload_date': '20140321', + 'uploader': 'Newsy Studio', }, - 'add_ie': ['FiveMin'], + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://on.aol.com/video/netflix-is-raising-rates-5707d6b8e4b090497b04f706?context=PC:homepage:PL1944:1460189336183', + 'info_dict': { + 'id': '5707d6b8e4b090497b04f706', + 'ext': 'mp4', + 'title': 'Netflix is Raising Rates', + 'description': 'Netflix is rewarding millions of it’s long-standing members with an increase in cost. Veuer’s Carly Figueroa has more.', + 'upload_date': '20160408', + 'timestamp': 1460123280, + 'uploader': 'Veuer', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } }] def _real_extract(self, url): video_id = self._match_id(url) - return self.url_result('5min:%s' % video_id) + + response = self._download_json( + 'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, + video_id)['response'] + if response['statusText'] != 'Ok': + raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True) + + video_data = response['data'] + formats = [] + m3u8_url = video_data.get('videoMasterPlaylist') + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + for rendition in video_data.get('renditions', []): + video_url = rendition.get('url') + if not video_url: + continue + ext = rendition.get('format') + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + else: + f = { + 'url': video_url, + 'format_id': rendition.get('quality'), + } + mobj = re.search(r'(\d+)x(\d+)', video_url) + if mobj: + f.update({ + 'width': int(mobj.group(1)), + 'height': int(mobj.group(2)), + }) + formats.append(f) + self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) + + return { + 'id': video_id, + 'title': video_data['title'], + 'duration': int_or_none(video_data.get('duration')), + 'timestamp': int_or_none(video_data.get('publishDate')), + 'view_count': int_or_none(video_data.get('views')), + 'description': video_data.get('description'), + 'uploader': video_data.get('videoOwner'), + 'formats': formats, + } class AolFeaturesIE(InfoExtractor): From cacd9966624883523b264fa9ac48138074597730 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 9 Apr 2016 19:27:54 +0800 Subject: [PATCH 0156/3599] [utils] Don't touch URLs if not necessary Fix test_Generic_15 (Google redirect) --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 8e53962c9..999dfabb5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1792,6 +1792,8 @@ def urlencode_postdata(*args, **kargs): def update_url_query(url, query): + if not query: + return url parsed_url = compat_urlparse.urlparse(url) qs = compat_parse_qs(parsed_url.query) qs.update(query) From 92c7f3157aad87096aa1fdd1a4daed3bdf262178 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Apr 2016 17:32:23 +0600 Subject: [PATCH 0157/3599] [aol] Add coding cookie --- youtube_dl/extractor/aol.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index b729157d2..d4801a25b 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re From ab481b48e536dd2e03d6022abb7f4d1593294721 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 9 Apr 2016 20:12:11 +0800 Subject: [PATCH 0158/3599] [funnyordie] Relax M3U8 URL matching Also, m3u8_url extraction should be fatal as all formats depends directly or indirectly on it. This change fixes test_Generic_26 and TestFunnyOrDieSubtitles --- youtube_dl/extractor/funnyordie.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 4c4a87e2a..8c5ffc9e8 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -46,8 +46,8 @@ class FunnyOrDieIE(InfoExtractor): links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0) m3u8_url = self._search_regex( - r']+src=(["\'])(?P.+?/master\.m3u8)\1', - webpage, 'm3u8 url', default=None, group='url') + r']+src=(["\'])(?P.+?/master\.m3u8[^"\']*)\1', + webpage, 'm3u8 url', group='url') formats = [] From bfe96d7bea7c5227456bf1aecca51907c8f30c51 Mon Sep 17 00:00:00 2001 From: Philip Huppert Date: Fri, 9 Oct 2015 18:38:11 +0200 Subject: [PATCH 0159/3599] [presstv] Added extractor PressTV. Fixes #7060 --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/presstv.py | 80 ++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 youtube_dl/extractor/presstv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de29c7956..c2fa83918 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -583,6 +583,7 @@ from .pornhub import ( from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE +from .presstv import PressTVIE from .primesharetv import PrimeShareTVIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE diff --git a/youtube_dl/extractor/presstv.py b/youtube_dl/extractor/presstv.py new file mode 100644 index 000000000..724d8b1c4 --- /dev/null +++ b/youtube_dl/extractor/presstv.py @@ -0,0 +1,80 @@ +# coding: utf-8 +from __future__ import unicode_literals +import re + +from .common import InfoExtractor +from ..utils import str_to_int + + +class PressTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?presstv\.ir/Video/(?P[0-9]+)/(?P[0-9]+)/(?P[0-9]+)/(?P[0-9]+)/' + + _TEST = { + 'url': 'http://www.presstv.ir/Video/2015/10/04/431915/Max-Igan-Press-TV-Face-to-Face', + 'md5': 'e95736ac75088b5f1e5bbb68f248f90d', + 'info_dict': { + 'id': '431915', + 'ext': 'mp4', + 'title': 'Press TV’s full interview with Max Igan', + 'upload_date': '20151004', + 'thumbnail': 'http://217.218.67.233/photo/20151004/d5c333ad-98f9-4bd3-bc3e-a1ad6a192803.jpg', + 'description': ('Watch Press TV’s full interview with Max Igan, a radio talk show host and political ' + 'commentator.\nThe interview, conducted on Press TV’s Face ' + 'to Face program, was aired on October 3, 2015.') + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # extract video URL from webpage + video_url = self._html_search_regex(r'', webpage, + 'Video URL') + + # build list of available formats + # specified in http://www.presstv.ir/Scripts/playback.js + base_url = 'http://192.99.219.222:82/presstv' + formats = [ + { + 'url': base_url + video_url, + 'format': '1080p mp4', + 'format_id': '1080p' + }, { + 'url': base_url + video_url.replace(".mp4", "_low800.mp4"), + 'format': '720p mp4', + 'format_id': '720p' + }, { + 'url': base_url + video_url.replace(".mp4", "_low400.mp4"), + 'format': '360p mp4', + 'format_id': '360p' + }, { + 'url': base_url + video_url.replace(".mp4", "_low200.mp4"), + 'format': '180p mp4', + 'format_id': '180p' + } + ] + formats.reverse() + + # extract video metadata + title = self._html_search_meta('title', webpage, 'Title', True) + title = title.partition(' - ')[2] + + description = self._html_search_regex(r'
(.*?)
', webpage, + 'Description', flags=re.DOTALL) + + thumbnail = self._html_search_meta('og:image', webpage, 'Thumbnail', True) + + year = str_to_int(self._search_regex(PressTVIE._VALID_URL, url, 'Upload year', group='y')) + month = str_to_int(self._search_regex(PressTVIE._VALID_URL, url, 'Upload month', group='m')) + day = str_to_int(self._search_regex(PressTVIE._VALID_URL, url, 'Upload day', group='d')) + upload_date = '%04d%02d%02d' % (year, month, day) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'description': description + } From c05025fdd79993314e20a6074aed084889199e50 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 9 Apr 2016 21:46:51 +0800 Subject: [PATCH 0160/3599] [internetvideoarchive] Fix extraction and support json URLs --- youtube_dl/extractor/internetvideoarchive.py | 118 +++++++++---------- 1 file changed, 58 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index e60145b3d..45add007f 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -1,93 +1,91 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..compat import ( + compat_parse_qs, compat_urlparse, - compat_urllib_parse_urlencode, ) from ..utils import ( - xpath_with_ns, + determine_ext, + int_or_none, + xpath_text, ) class InternetVideoArchiveIE(InfoExtractor): - _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?' + _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?' _TEST = { - 'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247', + 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false', 'info_dict': { - 'id': '452693', + 'id': '194487', 'ext': 'mp4', - 'title': 'SKYFALL', - 'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.', - 'duration': 152, + 'title': 'KICK-ASS 2', + 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', + }, + 'params': { + # m3u8 download + 'skip_download': True, }, } @staticmethod - def _build_url(query): - return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query + def _build_json_url(query): + return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query @staticmethod - def _clean_query(query): - NEEDED_ARGS = ['publishedid', 'customerid'] - query_dic = compat_urlparse.parse_qs(query) - cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS) - # Other player ids return m3u8 urls - cleaned_dic['playerid'] = '247' - cleaned_dic['videokbrate'] = '100000' - return compat_urllib_parse_urlencode(cleaned_dic) + def _build_xml_url(query): + return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query def _real_extract(self, url): query = compat_urlparse.urlparse(url).query - query_dic = compat_urlparse.parse_qs(query) + query_dic = compat_parse_qs(query) video_id = query_dic['publishedid'][0] - url = self._build_url(query) - flashconfiguration = self._download_xml(url, video_id, - 'Downloading flash configuration') - file_url = flashconfiguration.find('file').text - file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx') - # Replace some of the parameters in the query to get the best quality - # and http links (no m3u8 manifests) - file_url = re.sub(r'(?<=\?)(.+)$', - lambda m: self._clean_query(m.group()), - file_url) - info = self._download_xml(file_url, video_id, - 'Downloading video info') - item = info.find('channel/item') + if '/player/' in url: + configuration = self._download_json(url, video_id) - def _bp(p): - return xpath_with_ns( - p, - { - 'media': 'http://search.yahoo.com/mrss/', - 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats', - } - ) - formats = [] - for content in item.findall(_bp('media:group/media:content')): - attr = content.attrib - f_url = attr['url'] - width = int(attr['width']) - bitrate = int(attr['bitrate']) - format_id = '%d-%dk' % (width, bitrate) - formats.append({ - 'format_id': format_id, - 'url': f_url, - 'width': width, - 'tbr': bitrate, - }) + # There are multiple videos in the playlist whlie only the first one + # matches the video played in browsers + video_info = configuration['playlist'][0] - self._sort_formats(formats) + formats = [] + for source in video_info['sources']: + file_url = source['file'] + if determine_ext(file_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + file_url, video_id, ext='mp4', m3u8_id='hls')) + else: + a_format = { + 'url': file_url, + } + + if source.get('label') and source['label'][-4:] == ' kbs': + tbr = int_or_none(source['label'][:-4]) + a_format.update({ + 'tbr': tbr, + 'format_id': 'http-%d' % tbr, + }) + formats.append(a_format) + + self._sort_formats(formats) + + title = video_info['title'] + description = video_info.get('description') + thumbnail = video_info.get('image') + else: + configuration = self._download_xml(url, video_id) + formats = [{ + 'url': xpath_text(configuration, './file', 'file URL', fatal=True), + }] + thumbnail = xpath_text(configuration, './image', 'thumbnail') + title = 'InternetVideoArchive video %s' % video_id + description = None return { 'id': video_id, - 'title': item.find('title').text, + 'title': title, 'formats': formats, - 'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'], - 'description': item.find('description').text, - 'duration': int(attr['duration']), + 'thumbnail': thumbnail, + 'description': description, } From dae2a058de81e42d73bdbe0041a598262703c352 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 9 Apr 2016 21:47:12 +0800 Subject: [PATCH 0161/3599] [rottentomatoes] Adapt to InternetVideoArchiveIE --- youtube_dl/extractor/rottentomatoes.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index e8bb20a08..f9cd48790 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -1,11 +1,11 @@ from __future__ import unicode_literals -from .videodetective import VideoDetectiveIE +from .common import InfoExtractor +from ..compat import compat_urlparse +from .internetvideoarchive import InternetVideoArchiveIE -# It just uses the same method as videodetective.com, -# the internetvideoarchive.com is extracted from the og:video property -class RottenTomatoesIE(VideoDetectiveIE): +class RottenTomatoesIE(InfoExtractor): _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P\d+)' _TEST = { @@ -13,7 +13,19 @@ class RottenTomatoesIE(VideoDetectiveIE): 'info_dict': { 'id': '613340', 'ext': 'mp4', - 'title': 'TOY STORY 3', - 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', + 'title': 'Toy Story 3', }, } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + og_video = self._og_search_video_url(webpage) + query = compat_urlparse.urlparse(og_video).query + + return { + '_type': 'url_transparent', + 'url': InternetVideoArchiveIE._build_xml_url(query), + 'ie_key': InternetVideoArchiveIE.ie_key(), + 'title': self._og_search_title(webpage), + } From c991106706c05401090bcba79e65feae5c7e3fda Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 9 Apr 2016 21:47:35 +0800 Subject: [PATCH 0162/3599] [videodetective] Adapt to InternetVideoArchiveIE --- youtube_dl/extractor/videodetective.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index 0ffc7ff7d..2ed5d9643 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -14,8 +14,11 @@ class VideoDetectiveIE(InfoExtractor): 'id': '194487', 'ext': 'mp4', 'title': 'KICK-ASS 2', - 'description': 'md5:65ba37ad619165afac7d432eaded6013', - 'duration': 138, + 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a', + }, + 'params': { + # m3u8 download + 'skip_download': True, }, } @@ -24,4 +27,4 @@ class VideoDetectiveIE(InfoExtractor): webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage) query = compat_urlparse.urlparse(og_video).query - return self.url_result(InternetVideoArchiveIE._build_url(query), ie=InternetVideoArchiveIE.ie_key()) + return self.url_result(InternetVideoArchiveIE._build_json_url(query), ie=InternetVideoArchiveIE.ie_key()) From 6c4c7539f222cd9e80dfae0b1c9dabbd45d1b3dc Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 9 Apr 2016 22:04:48 +0800 Subject: [PATCH 0163/3599] [test/helper] Check got values to be strings for md5: fields Seen in PBSIE tests --- test/helper.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/helper.py b/test/helper.py index f2d878212..b8e22c5cb 100644 --- a/test/helper.py +++ b/test/helper.py @@ -143,6 +143,9 @@ def expect_value(self, got, expected, field): expect_value(self, item_got, item_expected, field) else: if isinstance(expected, compat_str) and expected.startswith('md5:'): + self.assertTrue( + isinstance(got, compat_str), + 'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got))) got = 'md5:' + md5(got) elif isinstance(expected, compat_str) and expected.startswith('mincount:'): self.assertTrue( From 95153a960d098d75e6100e38e77fdaa32f5267a2 Mon Sep 17 00:00:00 2001 From: Philip Huppert Date: Sat, 9 Apr 2016 16:14:05 +0200 Subject: [PATCH 0164/3599] [presstv] updated extractor and tests to work with current PressTV website --- youtube_dl/extractor/presstv.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/presstv.py b/youtube_dl/extractor/presstv.py index 724d8b1c4..9af6780c1 100644 --- a/youtube_dl/extractor/presstv.py +++ b/youtube_dl/extractor/presstv.py @@ -7,20 +7,20 @@ from ..utils import str_to_int class PressTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?presstv\.ir/Video/(?P[0-9]+)/(?P[0-9]+)/(?P[0-9]+)/(?P[0-9]+)/' + _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P[0-9]+)/(?P[0-9]+)/(?P[0-9]+)/(?P[0-9]+)/' _TEST = { - 'url': 'http://www.presstv.ir/Video/2015/10/04/431915/Max-Igan-Press-TV-Face-to-Face', - 'md5': 'e95736ac75088b5f1e5bbb68f248f90d', + 'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/', + 'md5': '5d7e3195a447cb13e9267e931d8dd5a5', 'info_dict': { - 'id': '431915', + 'id': '459911', 'ext': 'mp4', - 'title': 'Press TV’s full interview with Max Igan', - 'upload_date': '20151004', - 'thumbnail': 'http://217.218.67.233/photo/20151004/d5c333ad-98f9-4bd3-bc3e-a1ad6a192803.jpg', - 'description': ('Watch Press TV’s full interview with Max Igan, a radio talk show host and political ' - 'commentator.\nThe interview, conducted on Press TV’s Face ' - 'to Face program, was aired on October 3, 2015.') + 'title': 'Organic mattresses used to clean waste water', + 'upload_date': '20160409', + 'thumbnail': 'http://media.presstv.com/photo/20160409/41719129-76fa-4372-a09d-bf348278eb5d.jpg', + 'description': ('A trial program at an Australian sewerage treatment facility hopes to change ' + 'the way waste water is treated by using plant mattresses to reduce chemical ' + 'and electricity use.') } } @@ -58,12 +58,10 @@ class PressTVIE(InfoExtractor): # extract video metadata title = self._html_search_meta('title', webpage, 'Title', True) - title = title.partition(' - ')[2] - - description = self._html_search_regex(r'
(.*?)
', webpage, - 'Description', flags=re.DOTALL) + title = title.partition('-')[2].strip() thumbnail = self._html_search_meta('og:image', webpage, 'Thumbnail', True) + description = self._html_search_meta('og:description', webpage, 'Description', True) year = str_to_int(self._search_regex(PressTVIE._VALID_URL, url, 'Upload year', group='y')) month = str_to_int(self._search_regex(PressTVIE._VALID_URL, url, 'Upload month', group='m')) From eb9c3edd5ec970abb349bd4c71040b75e9d19e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 9 Apr 2016 22:40:05 +0200 Subject: [PATCH 0165/3599] [test/utils] Add test for date_from_str --- test/test_utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index a35debfe1..0f36bb9f0 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -20,6 +20,7 @@ from youtube_dl.utils import ( args_to_str, encode_base_n, clean_html, + date_from_str, DateRange, detect_exe_version, determine_ext, @@ -234,6 +235,13 @@ class TestUtil(unittest.TestCase): self.assertEqual(unescapeHTML('é'), 'é') self.assertEqual(unescapeHTML('�'), '�') + def test_date_from_str(self): + self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day')) + self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week')) + self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week')) + self.assertEqual(date_from_str('now+365day'), date_from_str('now+1year')) + self.assertEqual(date_from_str('now+30day'), date_from_str('now+1month')) + def test_daterange(self): _20century = DateRange("19000101", "20000101") self.assertFalse("17890714" in _20century) From 61dd350a04a77abe86e46cfe8b7603514e8f2ca0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Apr 2016 03:02:35 +0600 Subject: [PATCH 0166/3599] [1tv] Fix extraction (Closes #9103) --- youtube_dl/extractor/firsttv.py | 145 ++++++++++++++++++++++---------- 1 file changed, 100 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 98b165143..88bca1007 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -2,78 +2,133 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..compat import compat_xpath +from ..utils import ( + int_or_none, + qualities, + unified_strdate, + xpath_attr, + xpath_element, + xpath_text, + xpath_with_ns, +) class FirstTVIE(InfoExtractor): IE_NAME = '1tv' IE_DESC = 'Первый канал' - _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P.+)' + _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+p?(?P\d+)' _TESTS = [{ - 'url': 'http://www.1tv.ru/videoarchive/73390', - 'md5': '777f525feeec4806130f4f764bc18a4f', - 'info_dict': { - 'id': '73390', - 'ext': 'mp4', - 'title': 'Олимпийские канатные дороги', - 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', - 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', - 'duration': 149, - 'like_count': int, - 'dislike_count': int, - }, - 'skip': 'Only works from Russia', - }, { + # single format via video_materials.json API 'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930', - 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', + 'md5': '82a2777648acae812d58b3f5bd42882b', 'info_dict': { 'id': '35930', 'ext': 'mp4', - 'title': 'Наедине со всеми. Людмила Сенчина', - 'description': 'md5:89553aed1d641416001fe8d450f06cb9', + 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', + 'description': 'md5:357933adeede13b202c7c21f91b871b2', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', + 'upload_date': '20150212', 'duration': 2694, }, - 'skip': 'Only works from Russia', + }, { + # multiple formats via video_materials.json API + 'url': 'http://www.1tv.ru/video_archive/projects/dobroeutro/p113641', + 'info_dict': { + 'id': '113641', + 'ext': 'mp4', + 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', + 'description': 'md5:8dcebb3dded0ff20fade39087fd1fee2', + 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', + 'upload_date': '20160407', + 'duration': 179, + 'formats': 'mincount:3', + }, + 'params': { + 'skip_download': True, + }, + }, { + # single format only available via ONE_ONLINE_VIDEOS.archive_single_xml API + 'url': 'http://www.1tv.ru/video_archive/series/f7552/p47038', + 'md5': '519d306c5b5669761fd8906c39dbee23', + 'info_dict': { + 'id': '47038', + 'ext': 'mp4', + 'title': '"Побег". Второй сезон. 3 серия', + 'description': 'md5:3abf8f6b9bce88201c33e9a3d794a00b', + 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', + 'upload_date': '20120516', + 'duration': 3080, + }, + }, { + 'url': 'http://www.1tv.ru/videoarchive/9967', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id, 'Downloading page') + # Videos with multiple formats only available via this API + video = self._download_json( + 'http://www.1tv.ru/video_materials.json?legacy_id=%s' % video_id, + video_id, fatal=False) - video_url = self._html_search_regex( - r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''', - webpage, 'video URL') + description, thumbnail, upload_date, duration = [None] * 4 - title = self._html_search_regex( - [r'
\s*

([^<]*)', - r"'title'\s*:\s*'([^']+)'"], webpage, 'title') - description = self._html_search_regex( - r'
\s*
 
\s*

([^<]*)

', - webpage, 'description', default=None) or self._html_search_meta( + if video: + item = video[0] + title = item['title'] + quality = qualities(('ld', 'sd', 'hd', )) + formats = [{ + 'url': f['src'], + 'format_id': f.get('name'), + 'quality': quality(f.get('name')), + } for f in item['mbr'] if f.get('src')] + thumbnail = item.get('poster') + else: + # Some videos are not available via video_materials.json + video = self._download_xml( + 'http://www.1tv.ru/owa/win/ONE_ONLINE_VIDEOS.archive_single_xml?pid=%s' % video_id, + video_id) + + NS_MAP = { + 'media': 'http://search.yahoo.com/mrss/', + } + + item = xpath_element(video, './channel/item', fatal=True) + title = xpath_text(item, './title', fatal=True) + formats = [{ + 'url': content.attrib['url'], + } for content in item.findall( + compat_xpath(xpath_with_ns('./media:content', NS_MAP))) if content.attrib.get('url')] + thumbnail = xpath_attr( + item, xpath_with_ns('./media:thumbnail', NS_MAP), 'url') + + self._sort_formats(formats) + + webpage = self._download_webpage(url, video_id, 'Downloading page', fatal=False) + if webpage: + title = self._html_search_regex( + (r'
\s*

([^<]*)', + r"'title'\s*:\s*'([^']+)'"), + webpage, 'title', default=None) or title + description = self._html_search_regex( + r'
\s*
 
\s*

([^<]*)

', + webpage, 'description', default=None) or self._html_search_meta( 'description', webpage, 'description') - - thumbnail = self._og_search_thumbnail(webpage) - duration = self._og_search_property( - 'video:duration', webpage, - 'video duration', fatal=False) - - like_count = self._html_search_regex( - r'title="Понравилось".*?/> \[(\d+)\]', - webpage, 'like count', default=None) - dislike_count = self._html_search_regex( - r'title="Не понравилось".*?/> \[(\d+)\]', - webpage, 'dislike count', default=None) + thumbnail = thumbnail or self._og_search_thumbnail(webpage) + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'video duration', fatal=False)) + upload_date = unified_strdate(self._html_search_meta( + 'ya:ovs:upload_date', webpage, 'upload date', fatal=False)) return { 'id': video_id, - 'url': video_url, 'thumbnail': thumbnail, 'title': title, 'description': description, + 'upload_date': upload_date, 'duration': int_or_none(duration), - 'like_count': int_or_none(like_count), - 'dislike_count': int_or_none(dislike_count), + 'formats': formats } From 6a801f44704c3df49563852108c104c43a0551cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 9 Apr 2016 23:18:41 +0200 Subject: [PATCH 0167/3599] [test/InfoExtractors] add test for _download_json --- test/test_InfoExtractor.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 938466a80..6404ac89f 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor import YoutubeIE, get_info_extractor +from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError class TestIE(InfoExtractor): @@ -66,5 +67,14 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._html_search_meta('e', html), '5') self.assertEqual(ie._html_search_meta('f', html), '6') + def test_download_json(self): + uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') + self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'}) + uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript') + self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'}) + uri = encode_data_uri(b'{"foo": invalid}', 'application/json') + self.assertRaises(ExtractorError, self.ie._download_json, uri, None) + self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) + if __name__ == '__main__': unittest.main() From 49caf3307f1ae713acaeed651984a6338293b8d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Apr 2016 17:10:27 +0600 Subject: [PATCH 0168/3599] [extractor/common] Remove irrelevant comment --- youtube_dl/extractor/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 17d00721c..5269059d0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -376,7 +376,6 @@ class InfoExtractor(object): self.to_screen('%s' % (note,)) else: self.to_screen('%s: %s' % (video_id, note)) - # data, headers and query params will be ignored for `Request` objects if isinstance(url_or_request, compat_urllib_request.Request): url_or_request = update_Request( url_or_request, data=data, headers=headers, query=query) From a1fa60a9340f61a8455a0cd85c18f63d9bdfe681 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Apr 2016 18:43:40 +0600 Subject: [PATCH 0169/3599] [cliprs] Add extractor (Closes #9099) --- youtube_dl/extractor/cliprs.py | 90 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 7 +++ 2 files changed, 97 insertions(+) create mode 100644 youtube_dl/extractor/cliprs.py diff --git a/youtube_dl/extractor/cliprs.py b/youtube_dl/extractor/cliprs.py new file mode 100644 index 000000000..4f9320ea5 --- /dev/null +++ b/youtube_dl/extractor/cliprs.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + parse_iso8601, +) + + +class ClipRsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P[^/]+)/\d+' + _TEST = { + 'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732', + 'md5': 'c412d57815ba07b56f9edc7b5d6a14e5', + 'info_dict': { + 'id': '1488842.1399140381', + 'ext': 'mp4', + 'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli', + 'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026', + 'duration': 229, + 'timestamp': 1459850243, + 'upload_date': '20160405', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_id = self._search_regex( + r'id=(["\'])mvp:(?P.+?)\1', webpage, 'mvp id', group='id') + + response = self._download_json( + 'http://qi.ckm.onetapi.pl/', video_id, + query={ + 'body[id]': video_id, + 'body[jsonrpc]': '2.0', + 'body[method]': 'get_asset_detail', + 'body[params][ID_Publikacji]': video_id, + 'body[params][Service]': 'www.onet.pl', + 'content-type': 'application/jsonp', + 'x-onet-app': 'player.front.onetapi.pl', + }) + + error = response.get('error') + if error: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error['message']), expected=True) + + video = response['result'].get('0') + + formats = [] + for _, formats_dict in video['formats'].items(): + if not isinstance(formats_dict, dict): + continue + for format_id, format_list in formats_dict.items(): + if not isinstance(format_list, list): + continue + for f in format_list: + if not f.get('url'): + continue + formats.append({ + 'url': f['url'], + 'format_id': format_id, + 'height': int_or_none(f.get('vertical_resolution')), + 'width': int_or_none(f.get('horizontal_resolution')), + 'abr': float_or_none(f.get('audio_bitrate')), + 'vbr': float_or_none(f.get('video_bitrate')), + }) + self._sort_formats(formats) + + meta = video.get('meta', {}) + + title = self._og_search_title(webpage, default=None) or meta['title'] + description = self._og_search_description(webpage, default=None) or meta.get('description') + duration = meta.get('length') or meta.get('lenght') + timestamp = parse_iso8601(meta.get('addDate'), ' ') + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'formats': formats, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de29c7956..aefc4df01 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -123,6 +123,7 @@ from .chirbit import ( ) from .cinchcast import CinchcastIE from .cinemassacre import CinemassacreIE +from .cliprs import ClipRsIE from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .clipsyndicate import ClipsyndicateIE @@ -939,6 +940,12 @@ from .xhamster import ( XHamsterIE, XHamsterEmbedIE, ) +from .xiami import ( + XiamiIE, + XiamiAlbumIE, + XiamiArtistIE, + XiamiCollectionIE +) from .xminus import XMinusIE from .xnxx import XNXXIE from .xstream import XstreamIE From f44c2768421bc3b0ead3ccf86b5e499d498674c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Apr 2016 19:21:58 +0600 Subject: [PATCH 0170/3599] [extractor/extractors] Remove non-existant imports --- youtube_dl/extractor/extractors.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index aefc4df01..c1a13c982 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -940,12 +940,6 @@ from .xhamster import ( XHamsterIE, XHamsterEmbedIE, ) -from .xiami import ( - XiamiIE, - XiamiAlbumIE, - XiamiArtistIE, - XiamiCollectionIE -) from .xminus import XMinusIE from .xnxx import XNXXIE from .xstream import XstreamIE From de728757ad7218ce175649ec0d3f0b5723f2c580 Mon Sep 17 00:00:00 2001 From: Philip Huppert Date: Sun, 10 Apr 2016 16:36:44 +0200 Subject: [PATCH 0171/3599] [presstv] Refactored extractor. --- youtube_dl/extractor/presstv.py | 52 +++++++++++++++------------------ 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/presstv.py b/youtube_dl/extractor/presstv.py index 9af6780c1..755e32528 100644 --- a/youtube_dl/extractor/presstv.py +++ b/youtube_dl/extractor/presstv.py @@ -17,10 +17,8 @@ class PressTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Organic mattresses used to clean waste water', 'upload_date': '20160409', - 'thumbnail': 'http://media.presstv.com/photo/20160409/41719129-76fa-4372-a09d-bf348278eb5d.jpg', - 'description': ('A trial program at an Australian sewerage treatment facility hopes to change ' - 'the way waste water is treated by using plant mattresses to reduce chemical ' - 'and electricity use.') + 'thumbnail': 're:^https?://.*\.jpg', + 'description': 'md5:20002e654bbafb6908395a5c0cfcd125' } } @@ -35,38 +33,34 @@ class PressTVIE(InfoExtractor): # build list of available formats # specified in http://www.presstv.ir/Scripts/playback.js base_url = 'http://192.99.219.222:82/presstv' - formats = [ - { - 'url': base_url + video_url, - 'format': '1080p mp4', - 'format_id': '1080p' - }, { - 'url': base_url + video_url.replace(".mp4", "_low800.mp4"), - 'format': '720p mp4', - 'format_id': '720p' - }, { - 'url': base_url + video_url.replace(".mp4", "_low400.mp4"), - 'format': '360p mp4', - 'format_id': '360p' - }, { - 'url': base_url + video_url.replace(".mp4", "_low200.mp4"), - 'format': '180p mp4', - 'format_id': '180p' - } + _formats = [ + ("180p", "_low200.mp4"), + ("360p", "_low400.mp4"), + ("720p", "_low800.mp4"), + ("1080p", ".mp4") ] - formats.reverse() + + formats = [] + for fmt in _formats: + format_id, extension = fmt + formats.append({ + 'url': base_url + video_url[:-4] + extension, + 'format_id': format_id + }) # extract video metadata title = self._html_search_meta('title', webpage, 'Title', True) title = title.partition('-')[2].strip() - thumbnail = self._html_search_meta('og:image', webpage, 'Thumbnail', True) - description = self._html_search_meta('og:description', webpage, 'Description', True) + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage) - year = str_to_int(self._search_regex(PressTVIE._VALID_URL, url, 'Upload year', group='y')) - month = str_to_int(self._search_regex(PressTVIE._VALID_URL, url, 'Upload month', group='m')) - day = str_to_int(self._search_regex(PressTVIE._VALID_URL, url, 'Upload day', group='d')) - upload_date = '%04d%02d%02d' % (year, month, day) + match = re.match(PressTVIE._VALID_URL, url) + upload_date = '%04d%02d%02d' % ( + str_to_int(match.group('y')), + str_to_int(match.group('m')), + str_to_int(match.group('d')) + ) return { 'id': video_id, From 443285aabef470f546f0b01b8e8194ca988bb315 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Apr 2016 22:15:11 +0600 Subject: [PATCH 0172/3599] [ebaumsworlds] Update _VALID_URL (Closes #9135) --- youtube_dl/extractor/ebaumsworld.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ebaumsworld.py b/youtube_dl/extractor/ebaumsworld.py index b6bfd2b2d..c97682cd3 100644 --- a/youtube_dl/extractor/ebaumsworld.py +++ b/youtube_dl/extractor/ebaumsworld.py @@ -4,10 +4,10 @@ from .common import InfoExtractor class EbaumsWorldIE(InfoExtractor): - _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?ebaumsworld\.com/videos/[^/]+/(?P\d+)' _TEST = { - 'url': 'http://www.ebaumsworld.com/video/watch/83367677/', + 'url': 'http://www.ebaumsworld.com/videos/a-giant-python-opens-the-door/83367677/', 'info_dict': { 'id': '83367677', 'ext': 'mp4', From 66fa49586879418e357337ff82794fe851e71e7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Apr 2016 22:37:14 +0600 Subject: [PATCH 0173/3599] [screencastomatic] Fix extraction (Closes #9136) --- youtube_dl/extractor/screencastomatic.py | 35 ++++++++---------------- 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dl/extractor/screencastomatic.py index 05337421c..c08c89d94 100644 --- a/youtube_dl/extractor/screencastomatic.py +++ b/youtube_dl/extractor/screencastomatic.py @@ -1,15 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - ExtractorError, - js_to_json, -) +from .jwplatform import JWPlatformBaseIE +from ..utils import js_to_json -class ScreencastOMaticIE(InfoExtractor): +class ScreencastOMaticIE(JWPlatformBaseIE): _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P[0-9a-zA-Z]+)' _TEST = { 'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl', @@ -27,23 +23,14 @@ class ScreencastOMaticIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - setup_js = self._search_regex( - r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", - webpage, 'setup code') - data = self._parse_json(setup_js, video_id, transform_source=js_to_json) - try: - video_data = next( - m for m in data['modes'] if m.get('type') == 'html5') - except StopIteration: - raise ExtractorError('Could not find any video entries!') - video_url = compat_urlparse.urljoin(url, video_data['config']['file']) - thumbnail = data.get('image') + jwplayer_data = self._parse_json( + self._search_regex( + r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", webpage, 'setup code'), + video_id, transform_source=js_to_json) - return { - 'id': video_id, + info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False) + info_dict.update({ 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), - 'url': video_url, - 'ext': 'mp4', - 'thumbnail': thumbnail, - } + }) + return info_dict From a6d6722c8fc2174ce72ed462e649d397d1448a0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Apr 2016 22:47:38 +0600 Subject: [PATCH 0174/3599] [jwplatform:base] Extract duration --- youtube_dl/extractor/jwplatform.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 6770685d7..01601c59e 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + float_or_none, + int_or_none, +) class JWPlatformBaseIE(InfoExtractor): @@ -41,6 +44,7 @@ class JWPlatformBaseIE(InfoExtractor): 'description': video_data.get('description'), 'thumbnail': self._proto_relative_url(video_data.get('image')), 'timestamp': int_or_none(video_data.get('pubdate')), + 'duration': float_or_none(jwplayer_data.get('duration')), 'subtitles': subtitles, 'formats': formats, } From d7eb052fa2ab26839b050a7c3fa3f8874d508a02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Apr 2016 22:48:04 +0600 Subject: [PATCH 0175/3599] [screencastomatic] Add duration to test --- youtube_dl/extractor/screencastomatic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dl/extractor/screencastomatic.py index c08c89d94..7a88a42cd 100644 --- a/youtube_dl/extractor/screencastomatic.py +++ b/youtube_dl/extractor/screencastomatic.py @@ -16,6 +16,7 @@ class ScreencastOMaticIE(JWPlatformBaseIE): 'title': 'Welcome to 3-4 Philosophy @ DECV!', 'thumbnail': 're:^https?://.*\.jpg$', 'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.', + 'duration': 369.163, } } From 7ebc36900d15888321a45f04113eeda169469004 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Apr 2016 22:55:07 +0600 Subject: [PATCH 0176/3599] [jwplatform:base] Improve subtitles extraction --- youtube_dl/extractor/jwplatform.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 01601c59e..8a5e562db 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -13,10 +13,6 @@ from ..utils import ( class JWPlatformBaseIE(InfoExtractor): def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True): video_data = jwplayer_data['playlist'][0] - subtitles = {} - for track in video_data['tracks']: - if track['kind'] == 'captions': - subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}] formats = [] for source in video_data['sources']: @@ -38,6 +34,15 @@ class JWPlatformBaseIE(InfoExtractor): }) self._sort_formats(formats) + subtitles = {} + tracks = video_data.get('tracks') + if tracks and isinstance(tracks, list): + for track in tracks: + if track.get('file') and track.get('kind') == 'captions': + subtitles.setdefault(track.get('label') or 'en', []).append({ + 'url': self._proto_relative_url(track['file']) + }) + return { 'id': video_id, 'title': video_data['title'] if require_title else video_data.get('title'), From 4a121d29bb0700beb19e8b6edb5d479e9fe7ac1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Apr 2016 23:45:17 +0600 Subject: [PATCH 0177/3599] [glide] Fix extraction (Closes #9141) --- youtube_dl/extractor/glide.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py index 9561ed5fb..0ab23f766 100644 --- a/youtube_dl/extractor/glide.py +++ b/youtube_dl/extractor/glide.py @@ -23,8 +23,9 @@ class GlideIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex( r'(.*?)', webpage, 'title') - video_url = self.http_scheme() + self._search_regex( - r'', webpage, 'video URL') + video_url = self._proto_relative_url(self._search_regex( + r']+src=(["\'])(?P.+?)\1', + webpage, 'video URL', group='url'), self.http_scheme()) thumbnail_url = self._search_regex( r' Date: Sun, 10 Apr 2016 23:56:23 +0600 Subject: [PATCH 0178/3599] [glide] Improve extraction and extract upload info --- youtube_dl/extractor/glide.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py index 0ab23f766..62ff84835 100644 --- a/youtube_dl/extractor/glide.py +++ b/youtube_dl/extractor/glide.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import unified_strdate class GlideIE(InfoExtractor): @@ -15,27 +16,38 @@ class GlideIE(InfoExtractor): 'ext': 'mp4', 'title': 'Damon Timm\'s Glide message', 'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$', + 'uploader': 'Damon Timm', + 'upload_date': '20140919', } } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex( - r'(.*?)', webpage, 'title') + r'(.+?)', webpage, 'title') video_url = self._proto_relative_url(self._search_regex( r']+src=(["\'])(?P.+?)\1', - webpage, 'video URL', group='url'), self.http_scheme()) - thumbnail_url = self._search_regex( - r']+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P.+?)\1', + webpage, 'thumbnail url', default=None, + group='url')) or self._og_search_thumbnail(webpage) + uploader = self._search_regex( + r']+class=["\']info-name["\'][^>]*>([^<]+)', + webpage, 'uploader', fatal=False) + upload_date = unified_strdate(self._search_regex( + r']+class="info-date"[^>]*>([^<]+)', + webpage, 'upload date', fatal=False)) return { 'id': video_id, 'title': title, 'url': video_url, 'thumbnail': thumbnail, + 'uploader': uploader, + 'upload_date': upload_date, } From 452908b257da1a5b228a2c0522c89fff87296622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 11 Apr 2016 00:06:05 +0600 Subject: [PATCH 0179/3599] [telebruxelles] Fix extraction (Closes #9142) --- youtube_dl/extractor/telebruxelles.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/telebruxelles.py b/youtube_dl/extractor/telebruxelles.py index a3d05f97d..eefecc490 100644 --- a/youtube_dl/extractor/telebruxelles.py +++ b/youtube_dl/extractor/telebruxelles.py @@ -1,11 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class TeleBruxellesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?telebruxelles\.be/(news|sport|dernier-jt)/?(?P[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P[^/#?]+)' _TESTS = [{ 'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/', 'md5': '59439e568c9ee42fb77588b2096b214f', @@ -39,18 +41,18 @@ class TeleBruxellesIE(InfoExtractor): webpage = self._download_webpage(url, display_id) article_id = self._html_search_regex( - r"
(.*?)

', webpage, 'title') - description = self._og_search_description(webpage) + description = self._og_search_description(webpage, default=None) rtmp_url = self._html_search_regex( - r"file: \"(rtmp://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}/vod/mp4:\" \+ \"\w+\" \+ \".mp4)\"", + r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"', webpage, 'RTMP url') - rtmp_url = rtmp_url.replace("\" + \"", "") + rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url) return { - 'id': article_id, + 'id': article_id or display_id, 'display_id': display_id, 'title': title, 'description': description, From dfbc7f7f3f44ff7f9ed2beff76dc37edbb66af8d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 11 Apr 2016 16:14:07 +0800 Subject: [PATCH 0180/3599] [presstv] Improve and simplify --- youtube_dl/extractor/presstv.py | 48 +++++++++++++++++---------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/presstv.py b/youtube_dl/extractor/presstv.py index 755e32528..2da93ed34 100644 --- a/youtube_dl/extractor/presstv.py +++ b/youtube_dl/extractor/presstv.py @@ -1,19 +1,21 @@ # coding: utf-8 from __future__ import unicode_literals + import re from .common import InfoExtractor -from ..utils import str_to_int +from ..utils import remove_start class PressTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P[0-9]+)/(?P[0-9]+)/(?P[0-9]+)/(?P[0-9]+)/' + _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P\d+)/(?P\d+)/(?P\d+)/(?P\d+)/(?P[^/]+)?' _TEST = { 'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/', 'md5': '5d7e3195a447cb13e9267e931d8dd5a5', 'info_dict': { 'id': '459911', + 'display_id': 'Australian-sewerage-treatment-facility-', 'ext': 'mp4', 'title': 'Organic mattresses used to clean waste water', 'upload_date': '20160409', @@ -23,47 +25,47 @@ class PressTVIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id + + webpage = self._download_webpage(url, display_id) # extract video URL from webpage - video_url = self._html_search_regex(r'', webpage, - 'Video URL') + video_url = self._hidden_inputs(webpage)['inpPlayback'] # build list of available formats # specified in http://www.presstv.ir/Scripts/playback.js base_url = 'http://192.99.219.222:82/presstv' _formats = [ - ("180p", "_low200.mp4"), - ("360p", "_low400.mp4"), - ("720p", "_low800.mp4"), - ("1080p", ".mp4") + (180, '_low200.mp4'), + (360, '_low400.mp4'), + (720, '_low800.mp4'), + (1080, '.mp4') ] - formats = [] - for fmt in _formats: - format_id, extension = fmt - formats.append({ - 'url': base_url + video_url[:-4] + extension, - 'format_id': format_id - }) + formats = [{ + 'url': base_url + video_url[:-4] + extension, + 'format_id': '%dp' % height, + 'height': height, + } for height, extension in _formats] # extract video metadata - title = self._html_search_meta('title', webpage, 'Title', True) - title = title.partition('-')[2].strip() + title = remove_start( + self._html_search_meta('title', webpage, fatal=True), 'PressTV-') thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage) - match = re.match(PressTVIE._VALID_URL, url) upload_date = '%04d%02d%02d' % ( - str_to_int(match.group('y')), - str_to_int(match.group('m')), - str_to_int(match.group('d')) + int(mobj.group('y')), + int(mobj.group('m')), + int(mobj.group('d')), ) return { 'id': video_id, + 'display_id': display_id, 'title': title, 'formats': formats, 'thumbnail': thumbnail, From 134c207e3faf1ad38a23e31d5067eafe0ef8e92a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 11 Apr 2016 19:17:11 +0800 Subject: [PATCH 0181/3599] [arte.tv:embed] Extended support (#2620) --- youtube_dl/extractor/arte.py | 2 +- youtube_dl/extractor/generic.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index ae0f27dcb..f042d9163 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -337,7 +337,7 @@ class ArteTVEmbedIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:embed' _VALID_URL = r'''(?x) http://www\.arte\.tv - /playerv2/embed\.php\?json_url= + /(?:playerv2/embed|arte_vp/index)\.php\?json_url= (?P http://arte\.tv/papi/tvguide/videos/stream/player/ (?P[^/]+)/(?P[^/]+)[^&]* diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 589d1e152..2aadd6a12 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1128,6 +1128,18 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # Another form of arte.tv embed + { + 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html', + 'md5': '850bfe45417ddf221288c88a0cffe2e2', + 'info_dict': { + 'id': '030273-562_PLUS7-F', + 'ext': 'mp4', + 'title': 'ARTE Reportage - Nulle part, en France', + 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d', + 'upload_date': '20160409', + }, + }, ] def report_following_redirect(self, new_url): @@ -1702,7 +1714,7 @@ class GenericIE(InfoExtractor): # Look for embedded arte.tv player mobj = re.search( - r'', webpage, 'JS code') + decoded = self.openload_decode(code) + video_url = self._search_regex( - r'return\s+"(https?://[^"]+)"', self.openload_decode(code), 'video URL') + r'return\s+"(https?://[^"]+)"', decoded, 'video URL') title = self._og_search_title(webpage, default=None) or self._search_regex( r']+class=["\']title["\'][^>]*>([^<]+)', webpage, 'title', default=None) or self._html_search_meta( 'description', webpage, 'title', fatal=True) + ext = mimetype2ext(self._search_regex( + r'window\.vt\s*=\s*(["\'])(?P.+?)\1', decoded, + 'mimetype', default=None, group='mimetype')) or determine_ext( + video_url, 'mp4') + return { 'id': video_id, 'title': title, + 'ext': ext, 'thumbnail': self._og_search_thumbnail(webpage), 'url': video_url, } From e9063b5de9a1118842185768f5e615b76ec8692c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 25 Apr 2016 00:22:55 +0600 Subject: [PATCH 0317/3599] [openload] Add test --- youtube_dl/extractor/openload.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 697f312c3..9704f2e9d 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -31,6 +31,11 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://openload.io/f/ZAn6oz-VZGE/', 'only_matching': True, + }, { + # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout + # for title and ext + 'url': 'https://openload.co/embed/Sxz5sADo82g/', + 'only_matching': True, }] @staticmethod From c83a352227401d7ca7eac045b58043ed576c0cdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 25 Apr 2016 00:26:06 +0600 Subject: [PATCH 0318/3599] [openload] Make thumbnail optional --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 9704f2e9d..456561bcc 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -122,6 +122,6 @@ class OpenloadIE(InfoExtractor): 'id': video_id, 'title': title, 'ext': ext, - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'url': video_url, } From fb72ec58ae2612590d661c9943fe6b2fa0864401 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 30 Jul 2015 17:34:38 +0100 Subject: [PATCH 0319/3599] [extractor/common] do not process f4m manifest that contain akamai playerVerificationChallenge --- youtube_dl/extractor/common.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index dc5080504..e3d1dd076 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -856,6 +856,13 @@ class InfoExtractor(object): # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244) transform_source=transform_source) + # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy + akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') + if akamai_pv is not None and ';' in akamai_pv.text: + playerVerificationChallenge = akamai_pv.text.split(';')[0] + if playerVerificationChallenge.strip() != '': + return [] + formats = [] manifest_version = '1.0' media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media') From abc1723edd03d38b256e012d465e3343064f5682 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 25 Apr 2016 22:24:40 +0800 Subject: [PATCH 0320/3599] [unistra] Sort formats Originally URLs are passed to set() and not sorted, so the result is not deterministic, causing occasional FAILs on Travis CI. --- youtube_dl/extractor/unistra.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py index 66d9f1bf3..a724cdbef 100644 --- a/youtube_dl/extractor/unistra.py +++ b/youtube_dl/extractor/unistra.py @@ -49,6 +49,7 @@ class UnistraIE(InfoExtractor): 'format_id': format_id, 'quality': quality(format_id) }) + self._sort_formats(formats) title = self._html_search_regex( r'UTV - (.*?)</', webpage, 'title') From 2beff95da5fb28440d26a3dee5de575c792d133c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Apr 2016 22:26:19 +0800 Subject: [PATCH 0321/3599] [nrk] Comment out unstable MD5 checksums Both are Akamai f4f fragments. --- youtube_dl/extractor/nrk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 9df200822..51dfc27ac 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -23,7 +23,7 @@ class NRKIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.nrk.no/video/PS*150533', - 'md5': 'bccd850baebefe23b56d708a113229c2', + # MD5 is unstable 'info_dict': { 'id': '150533', 'ext': 'flv', @@ -34,7 +34,7 @@ class NRKIE(InfoExtractor): }, { 'url': 'http://www.nrk.no/video/PS*154915', - 'md5': '0b1493ba1aae7d9579a5ad5531bc395a', + # MD5 is unstable 'info_dict': { 'id': '154915', 'ext': 'flv', From 6bdc2d5358c2843e3be4d073b2005e5196519664 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Apr 2016 22:27:25 +0800 Subject: [PATCH 0322/3599] [mitele] Comment out unstable MD5 Also Akamai f4f fragments --- youtube_dl/extractor/mitele.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 7b4581dc5..3589c223d 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -15,9 +15,9 @@ class MiTeleIE(InfoExtractor): IE_DESC = 'mitele.es' _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' - _TESTS = [{ + _TEST = { 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', - 'md5': '0ff1a13aebb35d9bc14081ff633dd324', + # MD5 is unstable 'info_dict': { 'id': '0NF1jJnxS1Wu3pHrmvFyw2', 'display_id': 'programa-144', @@ -27,7 +27,7 @@ class MiTeleIE(InfoExtractor): 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 2913, }, - }] + } def _real_extract(self, url): display_id = self._match_id(url) From 4645432d7a92bfb950571dde5dd690110e0f2284 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Apr 2016 22:48:17 +0800 Subject: [PATCH 0323/3599] [eagleplatform] Checking direct HTTP links Sometimes they fail with 404 --- youtube_dl/extractor/eagleplatform.py | 7 +++++-- youtube_dl/extractor/generic.py | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 0f8c73fd7..113a4966f 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -23,7 +23,7 @@ class EaglePlatformIE(InfoExtractor): _TESTS = [{ # http://lenta.ru/news/2015/03/06/navalny/ 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', - 'md5': '881ee8460e1b7735a8be938e2ffb362b', + # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used 'info_dict': { 'id': '227304', 'ext': 'mp4', @@ -109,8 +109,11 @@ class EaglePlatformIE(InfoExtractor): mobj = re.search('/([^/]+)/index\.m3u8', m3u8_format['url']) if mobj: http_format = m3u8_format.copy() + video_url = mp4_url.replace(mp4_url_basename, mobj.group(1)) + if not self._is_valid_url(video_url, video_id): + continue http_format.update({ - 'url': mp4_url.replace(mp4_url_basename, mobj.group(1)), + 'url': video_url, 'format_id': m3u8_format['format_id'].replace('hls', 'http'), 'protocol': 'http', }) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c63bdbd08..a95501d86 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -887,6 +887,7 @@ class GenericIE(InfoExtractor): # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', + # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used 'info_dict': { 'id': '227304', 'ext': 'mp4', @@ -901,6 +902,7 @@ class GenericIE(InfoExtractor): # ClipYou (Eagle.Platform) embed (custom URL) { 'url': 'http://muz-tv.ru/play/7129/', + # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used 'info_dict': { 'id': '12820', 'ext': 'mp4', From ad58942d57996f7f43601f22c85b8c6a9afe1b09 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Apr 2016 23:35:05 +0800 Subject: [PATCH 0324/3599] [muzu] Remove extractor MUZU is shutting down in October 2015. [1] [1] http://www.musicbusinessworldwide.com/youtube-rival-muzu-is-heading-into-liquidation/ --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/muzu.py | 63 ------------------------------ 2 files changed, 64 deletions(-) delete mode 100644 youtube_dl/extractor/muzu.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6de3438fc..8b215c5ab 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -439,7 +439,6 @@ from .mtv import ( ) from .muenchentv import MuenchenTVIE from .musicplayon import MusicPlayOnIE -from .muzu import MuzuTVIE from .mwave import MwaveIE from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE diff --git a/youtube_dl/extractor/muzu.py b/youtube_dl/extractor/muzu.py deleted file mode 100644 index cbc800481..000000000 --- a/youtube_dl/extractor/muzu.py +++ /dev/null @@ -1,63 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode - - -class MuzuTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.muzu\.tv/(.+?)/(.+?)/(?P<id>\d+)' - IE_NAME = 'muzu.tv' - - _TEST = { - 'url': 'http://www.muzu.tv/defected/marcashken-featuring-sos-cat-walk-original-mix-music-video/1981454/', - 'md5': '98f8b2c7bc50578d6a0364fff2bfb000', - 'info_dict': { - 'id': '1981454', - 'ext': 'mp4', - 'title': 'Cat Walk (Original Mix)', - 'description': 'md5:90e868994de201b2570e4e5854e19420', - 'uploader': 'MarcAshken featuring SOS', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - info_data = compat_urllib_parse_urlencode({ - 'format': 'json', - 'url': url, - }) - info = self._download_json( - 'http://www.muzu.tv/api/oembed/?%s' % info_data, - video_id, 'Downloading video info') - - player_info = self._download_json( - 'http://player.muzu.tv/player/playerInit?ai=%s' % video_id, - video_id, 'Downloading player info') - video_info = player_info['videos'][0] - for quality in ['1080', '720', '480', '360']: - if video_info.get('v%s' % quality): - break - - data = compat_urllib_parse_urlencode({ - 'ai': video_id, - # Even if each time you watch a video the hash changes, - # it seems to work for different videos, and it will work - # even if you use any non empty string as a hash - 'viewhash': 'VBNff6djeV4HV5TRPW5kOHub2k', - 'device': 'web', - 'qv': quality, - }) - video_url_info = self._download_json( - 'http://player.muzu.tv/player/requestVideo?%s' % data, - video_id, 'Downloading video url') - video_url = video_url_info['url'] - - return { - 'id': video_id, - 'title': info['title'], - 'url': video_url, - 'thumbnail': info['thumbnail_url'], - 'description': info['description'], - 'uploader': info['author_name'], - } From e3de3d6f2f9c82683e76b6bc12697aa7264372ca Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 25 Apr 2016 23:49:12 +0800 Subject: [PATCH 0325/3599] [normalboots] Fix extraction Now it's using ScreenwaveMedia --- youtube_dl/extractor/normalboots.py | 18 +++++++++--------- youtube_dl/extractor/screenwavemedia.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 77e091072..af44c3bb5 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .screenwavemedia import ScreenwaveMediaIE from ..utils import ( unified_strdate, @@ -12,7 +13,6 @@ class NormalbootsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?normalboots\.com/video/(?P<id>[0-9a-z-]*)/?$' _TEST = { 'url': 'http://normalboots.com/video/home-alone-games-jontron/', - 'md5': '8bf6de238915dd501105b44ef5f1e0f6', 'info_dict': { 'id': 'home-alone-games-jontron', 'ext': 'mp4', @@ -22,9 +22,10 @@ class NormalbootsIE(InfoExtractor): 'upload_date': '20140125', }, 'params': { - # rtmp download + # m3u8 download 'skip_download': True, }, + 'add_ie': ['ScreenwaveMedia'], } def _real_extract(self, url): @@ -38,16 +39,15 @@ class NormalbootsIE(InfoExtractor): r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', webpage, 'date', fatal=False)) - player_url = self._html_search_regex( - r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', - webpage, 'player url') - player_page = self._download_webpage(player_url, video_id) - video_url = self._html_search_regex( - r"file:\s'(?P<file>[^']+\.mp4)'", player_page, 'file') + screenwavemedia_url = self._html_search_regex( + ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL', + group='url') return { + '_type': 'url_transparent', 'id': video_id, - 'url': video_url, + 'url': screenwavemedia_url, + 'ie_key': ScreenwaveMediaIE.ie_key(), 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index 44b0bbee6..40333c825 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -12,7 +12,7 @@ from ..utils import ( class ScreenwaveMediaIE(InfoExtractor): - _VALID_URL = r'https?://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)' + _VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)' EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1' _TESTS = [{ 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', From 749b0046a8664d023ff622dd38844f5c8632f3f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 25 Apr 2016 22:05:47 +0600 Subject: [PATCH 0326/3599] [ok] Allow embeds without title (Closes #9303) --- youtube_dl/extractor/odnoklassniki.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index f9e064a60..cd614f427 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -60,6 +60,22 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader': 'Алина П', 'age_limit': 0, }, + }, { + # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field) + 'url': 'http://ok.ru/video/62036049272859-0', + 'info_dict': { + 'id': '62036049272859-0', + 'ext': 'mp4', + 'title': 'МУЗЫКА ДОЖДЯ .', + 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0', + 'upload_date': '20120106', + 'uploader_id': '473534735899', + 'uploader': 'МARINA D', + 'age_limit': 0, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 'only_matching': True, @@ -106,7 +122,14 @@ class OdnoklassnikiIE(InfoExtractor): video_id, 'Downloading metadata JSON') movie = metadata['movie'] - title = movie['title'] + + # Some embedded videos may not contain title in movie dict (e.g. + # http://ok.ru/video/62036049272859-0) thus we allow missing title + # here and it's going to be extracted later by an extractor that + # will process the actual embed. + provider = metadata.get('provider') + title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title') + thumbnail = movie.get('poster') duration = int_or_none(movie.get('duration')) @@ -137,7 +160,7 @@ class OdnoklassnikiIE(InfoExtractor): 'age_limit': age_limit, } - if metadata.get('provider') == 'USER_YOUTUBE': + if provider == 'USER_YOUTUBE': info.update({ '_type': 'url_transparent', 'url': movie['contentId'], From c9fd5306709d0c03487a3b0163b7a33cab6774aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 25 Apr 2016 22:15:15 +0600 Subject: [PATCH 0327/3599] [ok] Extract start time --- youtube_dl/extractor/odnoklassniki.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index cd614f427..986708e75 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -2,7 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_unquote, + compat_urllib_parse_urlparse, +) from ..utils import ( ExtractorError, unified_strdate, @@ -32,7 +36,7 @@ class OdnoklassnikiIE(InfoExtractor): 'skip': 'Video has been blocked', }, { # metadataUrl - 'url': 'http://ok.ru/video/63567059965189-0', + 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5', 'md5': '9676cf86eff5391d35dea675d224e131', 'info_dict': { 'id': '63567059965189-0', @@ -44,6 +48,7 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader': '☭ Андрей Мещанинов ☭', 'like_count': int, 'age_limit': 0, + 'start_time': 5, }, }, { # YouTube embed (metadataUrl, provider == USER_YOUTUBE) @@ -94,6 +99,9 @@ class OdnoklassnikiIE(InfoExtractor): }] def _real_extract(self, url): + start_time = int_or_none(compat_parse_qs( + compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) + video_id = self._match_id(url) webpage = self._download_webpage( @@ -158,6 +166,7 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader_id': uploader_id, 'like_count': like_count, 'age_limit': age_limit, + 'start_time': start_time, } if provider == 'USER_YOUTUBE': From f1f879098a38c786d78927df8915b547f7ac3569 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 26 Apr 2016 13:39:53 +0100 Subject: [PATCH 0328/3599] [viewster] extract more metadata for http formats --- youtube_dl/extractor/viewster.py | 37 ++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index 7839225d4..6edc2c44e 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -118,6 +118,7 @@ class ViewsterIE(InfoExtractor): formats = [] manifest_url = None + m3u8_formats = [] for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'): media = self._download_json( 'https://public-api.viewster.com/movies/%s/video?mediaType=%s' @@ -154,18 +155,32 @@ class ViewsterIE(InfoExtractor): 'qualities', default=None) if not qualities: continue - qualities = qualities.strip(',').split(',') - http_template = re.sub(QUALITIES_RE, r'%s', qualities_basename) + qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(','))) + qualities.sort() + http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename) http_url_basename = url_basename(video_url) - for q in qualities: - tbr = int_or_none(self._search_regex( - r'(\d+)k', q, 'bitrate', default=None)) - formats.append({ - 'url': video_url.replace(http_url_basename, http_template % q), - 'ext': 'mp4', - 'format_id': 'http' + ('-%d' % tbr if tbr else ''), - 'tbr': tbr, - }) + if m3u8_formats: + self._sort_formats(m3u8_formats) + m3u8_formats = list(filter( + lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + m3u8_formats)) + if len(qualities) == len(m3u8_formats): + for q, m3u8_format in zip(qualities, m3u8_formats): + f = m3u8_format.copy() + f.update({ + 'url': video_url.replace(http_url_basename, http_template % q), + 'format_id': f['format_id'].replace('hls', 'http'), + 'protocol': 'http', + }) + formats.append(f) + else: + for q in qualities: + formats.append({ + 'url': video_url.replace(http_url_basename, http_template % q), + 'ext': 'mp4', + 'format_id': 'http-%d' % q, + 'tbr': q, + }) if not formats and not info.get('LanguageSets') and not info.get('VODSettings'): self.raise_geo_restricted() From 175c2e9ec326f9ef820413837608eb4f5c8c5961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 26 Apr 2016 22:29:29 +0600 Subject: [PATCH 0329/3599] [youtube:search_url] Reimplement in terms of youtube:playlistbase --- youtube_dl/extractor/youtube.py | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 44f98d294..b7c3cb63f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2139,10 +2139,11 @@ class YoutubeSearchDateIE(YoutubeSearchIE): _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'} -class YoutubeSearchURLIE(InfoExtractor): +class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor): IE_DESC = 'YouTube.com search URLs' IE_NAME = 'youtube:search_url' _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)' + _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?' _TESTS = [{ 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 'playlist_mincount': 5, @@ -2157,32 +2158,8 @@ class YoutubeSearchURLIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) query = compat_urllib_parse_unquote_plus(mobj.group('query')) - webpage = self._download_webpage(url, query) - result_code = self._search_regex( - r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML') - - part_codes = re.findall( - r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code) - entries = [] - for part_code in part_codes: - part_title = self._html_search_regex( - [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False) - part_url_snippet = self._html_search_regex( - r'(?s)href="([^"]+)"', part_code, 'item URL') - part_url = compat_urlparse.urljoin( - 'https://www.youtube.com/', part_url_snippet) - entries.append({ - '_type': 'url', - 'url': part_url, - 'title': part_title, - }) - - return { - '_type': 'playlist', - 'entries': entries, - 'title': query, - } + return self.playlist_result(self._process_page(webpage), playlist_title=query) class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor): From 7464360379a1a3fc6ba3228f54dd4853df349142 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Wed, 27 Apr 2016 00:16:48 +0600 Subject: [PATCH 0330/3599] [README.md] Add FAQ entry on output template conflicts --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index e062444b3..fb57b0323 100644 --- a/README.md +++ b/README.md @@ -697,6 +697,10 @@ YouTube changed their playlist format in March 2014 and later on, so you'll need If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update. +### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number` + +Make sure you are not using `-o` with any of these options `-t`, `--title`, `--id`, `-A` or `--auto-number` set in command line or in a configuration file. Remove the latter if any. + ### Do I always have to pass `-citw`? By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`. From 046ea04a7d8601a85007430a7a3da3ce236549f7 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Wed, 27 Apr 2016 00:22:08 +0600 Subject: [PATCH 0331/3599] [README.md] Mention mpv --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fb57b0323..ecf737047 100644 --- a/README.md +++ b/README.md @@ -721,7 +721,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [ ### I have downloaded a video but how can I play it? -Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/). +Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/). ### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser. From a7e03861e8d0ce18ad698e0e38ffac40a09cef8b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 27 Apr 2016 13:52:04 +0800 Subject: [PATCH 0332/3599] [scivee] Skip the test Not accessible from either Travis CI or my machine. Closes #9315 --- youtube_dl/extractor/scivee.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/scivee.py b/youtube_dl/extractor/scivee.py index 3bf93c870..b1ca12fde 100644 --- a/youtube_dl/extractor/scivee.py +++ b/youtube_dl/extractor/scivee.py @@ -18,6 +18,7 @@ class SciVeeIE(InfoExtractor): 'title': 'Adam Arkin at the 2014 DOE JGI Genomics of Energy & Environment Meeting', 'description': 'md5:81f1710638e11a481358fab1b11059d7', }, + 'skip': 'Not accessible from Travis CI server', } def _real_extract(self, url): From 2ac2cbc0a351785e0c6d034bd1bab77973ec7a41 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 27 Apr 2016 13:55:32 +0800 Subject: [PATCH 0333/3599] [malemotion] Remove the extractor Announcement from their homepage: ``` MaleMotion is closed After another system crash, I'm forced to close the site This week all content will be erased Don't forget to cancel your subscription if any ! ``` Closes #9311. --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/malemotion.py | 46 ------------------------------ 2 files changed, 47 deletions(-) delete mode 100644 youtube_dl/extractor/malemotion.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8b215c5ab..00f8a7a85 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -400,7 +400,6 @@ from .macgamestore import MacGameStoreIE from .mailru import MailRuIE from .makerschannel import MakersChannelIE from .makertv import MakerTVIE -from .malemotion import MalemotionIE from .matchtv import MatchTVIE from .mdr import MDRIE from .metacafe import MetacafeIE diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py deleted file mode 100644 index 92511a671..000000000 --- a/youtube_dl/extractor/malemotion.py +++ /dev/null @@ -1,46 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote - - -class MalemotionIE(InfoExtractor): - _VALID_URL = r'https?://malemotion\.com/video/(.+?)\.(?P<id>.+?)(#|$)' - _TEST = { - 'url': 'http://malemotion.com/video/bete-de-concours.ltc', - 'md5': '3013e53a0afbde2878bc39998c33e8a5', - 'info_dict': { - 'id': 'ltc', - 'ext': 'mp4', - 'title': 'Bête de Concours', - 'age_limit': 18, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - video_url = compat_urllib_parse_unquote(self._search_regex( - r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL')) - video_title = self._html_search_regex( - r'<title>(.*?)</title', webpage, 'title') - video_thumbnail = self._search_regex( - r'<video .+?poster="(.+?)"', webpage, 'thumbnail', fatal=False) - - formats = [{ - 'url': video_url, - 'ext': 'mp4', - 'format_id': 'mp4', - 'preference': 1, - }] - self._sort_formats(formats) - - return { - 'id': video_id, - 'formats': formats, - 'title': video_title, - 'thumbnail': video_thumbnail, - 'age_limit': 18, - } From 5b5d7cc11e3037408aeedf8d6dc57ac228b02496 Mon Sep 17 00:00:00 2001 From: Peter Rowlands <peter@pmrowla.com> Date: Wed, 27 Apr 2016 15:57:17 +0900 Subject: [PATCH 0334/3599] [mwave] Add Mwave Meet & Greet extractor --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/mwave.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8b215c5ab..9d1992721 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -439,7 +439,7 @@ from .mtv import ( ) from .muenchentv import MuenchenTVIE from .musicplayon import MusicPlayOnIE -from .mwave import MwaveIE +from .mwave import MwaveIE, MwaveMeetGreetIE from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE from .myvi import MyviIE diff --git a/youtube_dl/extractor/mwave.py b/youtube_dl/extractor/mwave.py index 5c3c8d464..6485c6928 100644 --- a/youtube_dl/extractor/mwave.py +++ b/youtube_dl/extractor/mwave.py @@ -56,3 +56,26 @@ class MwaveIE(InfoExtractor): 'view_count': int_or_none(vod_info.get('hit')), 'formats': formats, } + + +class MwaveMeetGreetIE(InfoExtractor): + _VALID_URL = r'https?://mwave\.interest\.me/meetgreet/view/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://mwave.interest.me/meetgreet/view/256', + 'info_dict': { + 'id': '173294', + 'ext': 'flv', + 'title': '[MEET&GREET] Park BoRam', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'Mwave', + 'duration': 3634, + 'view_count': int, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + clip_id = self._html_search_regex(r'<iframe src="/mnettv/ifr_clip\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)', webpage, 'clip ID') + clip_url = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id={0}'.format(clip_id) + return self.url_result(clip_url, 'Mwave', clip_id) From dcf094d62699f8ad06ceaf3fba55e453980fac91 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 27 Apr 2016 18:08:23 +0800 Subject: [PATCH 0335/3599] [theplatform] Fix for Python 3.2 test_AENetworks{,_1} fails as in Python < 3.3, binascii.a2b_* functions accepts only bytes-like objects --- youtube_dl/extractor/theplatform.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 8272dd969..a25417f94 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -159,11 +159,11 @@ class ThePlatformIE(ThePlatformBaseIE): def str_to_hex(str): return binascii.b2a_hex(str.encode('ascii')).decode('ascii') - def hex_to_str(hex): - return binascii.a2b_hex(hex) + def hex_to_bytes(hex): + return binascii.a2b_hex(hex.encode('ascii')) relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1) - clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path)) + clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path)) checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() sig = flags + expiration_date + checksum + str_to_hex(sig_secret) return '%s&sig=%s' % (url, sig) From 3cc8649c9d42bab8c7b665115ebdc569bf44a762 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 28 Apr 2016 02:58:11 +0800 Subject: [PATCH 0336/3599] [20min] Detect embedded YouTube videos Fixes #9331 --- youtube_dl/extractor/twentymin.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index ca7d953b8..b721ecb0a 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -32,7 +32,22 @@ class TwentyMinutenIE(InfoExtractor): 'title': '«Wir müssen mutig nach vorne schauen»', 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' - } + }, + 'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.', + }, { + # YouTube embed + 'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184', + 'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f', + 'info_dict': { + 'id': 'ivM7A7SpDOs', + 'ext': 'mp4', + 'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016', + 'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a', + 'upload_date': '20160424', + 'uploader': 'RTVCM Castilla-La Mancha', + 'uploader_id': 'RTVCM', + }, + 'add_ie': ['Youtube'], }, { 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738', 'only_matching': True, @@ -48,6 +63,12 @@ class TwentyMinutenIE(InfoExtractor): webpage = self._download_webpage(url, display_id) + youtube_url = self._html_search_regex( + r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"', + webpage, 'YouTube embed URL', default=None) + if youtube_url is not None: + return self.url_result(youtube_url, 'Youtube') + title = self._html_search_regex( r'<h1>.*?<span>(.+?)</span></h1>', webpage, 'title', default=None) From 52af8f222bc4f067b4c5e7a977a64345d35ae4fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 Apr 2016 04:01:21 +0600 Subject: [PATCH 0337/3599] [cwtv] Relax _VALID_URL (Closes #9327) --- youtube_dl/extractor/cwtv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py index f5cefd966..ebd14cb16 100644 --- a/youtube_dl/extractor/cwtv.py +++ b/youtube_dl/extractor/cwtv.py @@ -9,7 +9,7 @@ from ..utils import ( class CWTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/shows/(?:[^/]+/){2}\?play=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' + _VALID_URL = r'https?://(?:www\.)?cw(?:tv|seed)\.com/(?:shows/)?(?:[^/]+/){2}\?.*\bplay=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})' _TESTS = [{ 'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63', 'info_dict': { @@ -48,6 +48,9 @@ class CWTVIE(InfoExtractor): # m3u8 download 'skip_download': True, } + }, { + 'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6', + 'only_matching': True, }] def _real_extract(self, url): From 618c71dc64086f751b6ae87d5f32687e02a54e58 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 28 Apr 2016 15:00:02 +0800 Subject: [PATCH 0338/3599] [cloudy] New domain name for the test_cloudy_1 I'm sure whether videoraj.ch still works or not, so keep it. --- youtube_dl/extractor/cloudy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index 9e267e6c0..9a28ef354 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -19,7 +19,7 @@ from ..utils import ( class CloudyIE(InfoExtractor): _IE_DESC = 'cloudy.ec and videoraj.ch' _VALID_URL = r'''(?x) - https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.ch)/ + https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.(?:ch|to))/ (?:v/|embed\.php\?id=) (?P<id>[A-Za-z0-9]+) ''' @@ -37,7 +37,7 @@ class CloudyIE(InfoExtractor): } }, { - 'url': 'http://www.videoraj.ch/v/47f399fd8bb60', + 'url': 'http://www.videoraj.to/v/47f399fd8bb60', 'md5': '7d0f8799d91efd4eda26587421c3c3b0', 'info_dict': { 'id': '47f399fd8bb60', From a5941305b6ba0921ea4f34641dd9095372dd1c1d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 28 Apr 2016 16:03:08 +0800 Subject: [PATCH 0339/3599] [mwave] Coding style --- youtube_dl/extractor/mwave.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mwave.py b/youtube_dl/extractor/mwave.py index 6485c6928..a103e0323 100644 --- a/youtube_dl/extractor/mwave.py +++ b/youtube_dl/extractor/mwave.py @@ -10,6 +10,7 @@ from ..utils import ( class MwaveIE(InfoExtractor): _VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)' + _URL_TEMPLATE = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=%s' _TEST = { 'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859', # md5 is unstable @@ -59,7 +60,7 @@ class MwaveIE(InfoExtractor): class MwaveMeetGreetIE(InfoExtractor): - _VALID_URL = r'https?://mwave\.interest\.me/meetgreet/view/(?P<id>[0-9]+)' + _VALID_URL = r'https?://mwave\.interest\.me/meetgreet/view/(?P<id>\d+)' _TEST = { 'url': 'http://mwave.interest.me/meetgreet/view/256', 'info_dict': { @@ -76,6 +77,8 @@ class MwaveMeetGreetIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - clip_id = self._html_search_regex(r'<iframe src="/mnettv/ifr_clip\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)', webpage, 'clip ID') - clip_url = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id={0}'.format(clip_id) + clip_id = self._html_search_regex( + r'<iframe[^>]+src="/mnettv/ifr_clip\.m\?searchVideoDetailVO\.clip_id=(\d+)', + webpage, 'clip ID') + clip_url = MwaveIE._URL_TEMPLATE % clip_id return self.url_result(clip_url, 'Mwave', clip_id) From 7f776fa4b510b7973e08f06de556fa39cb5946e5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 28 Apr 2016 17:08:41 +0800 Subject: [PATCH 0340/3599] [yandexmusic] Skip tests as Travis CI blocked --- youtube_dl/extractor/yandexmusic.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 7a90cc60c..0d32a612f 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -22,6 +22,12 @@ class YandexMusicBaseIE(InfoExtractor): if error: raise ExtractorError(error, expected=True) + def _download_webpage(self, *args, **kwargs): + webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs) + if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: + raise ExtractorError('Blocked by YandexMusic', expected=True) + return webpage + def _download_json(self, *args, **kwargs): response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs) self._handle_error(response) @@ -47,7 +53,8 @@ class YandexMusicTrackIE(YandexMusicBaseIE): 'album_artist': 'Carlo Ambrosio', 'artist': 'Carlo Ambrosio & Fabio Di Bari, Carlo Ambrosio', 'release_year': '2009', - } + }, + 'skip': 'Travis CI servers blocked by YandexMusic', } def _get_track_url(self, storage_dir, track_id): @@ -139,6 +146,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): 'title': 'Carlo Ambrosio - Gypsy Soul (2009)', }, 'playlist_count': 50, + 'skip': 'Travis CI servers blocked by YandexMusic', } def _real_extract(self, url): @@ -171,6 +179,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9', }, 'playlist_count': 6, + 'skip': 'Travis CI servers blocked by YandexMusic', }, { # playlist exceeding the limit of 150 tracks shipped with webpage (see # https://github.com/rg3/youtube-dl/issues/6666) @@ -180,6 +189,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): 'title': 'Музыка 90-х', }, 'playlist_count': 310, + 'skip': 'Travis CI servers blocked by YandexMusic', }] def _real_extract(self, url): From 0cbcbdd89dbc3573ecfcf68496c54bd84804967d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 28 Apr 2016 17:51:20 +0800 Subject: [PATCH 0341/3599] [nuvid] Fix extraction Closes #7620 --- youtube_dl/extractor/nuvid.py | 44 +++++++++++++++++------------------ 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py index 9fa7cefad..ab6bfcd7f 100644 --- a/youtube_dl/extractor/nuvid.py +++ b/youtube_dl/extractor/nuvid.py @@ -5,8 +5,6 @@ import re from .common import InfoExtractor from ..utils import ( parse_duration, - sanitized_Request, - unified_strdate, ) @@ -20,7 +18,6 @@ class NuvidIE(InfoExtractor): 'ext': 'mp4', 'title': 'Horny babes show their awesome bodeis and', 'duration': 129, - 'upload_date': '20140508', 'age_limit': 18, } } @@ -28,28 +25,31 @@ class NuvidIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - formats = [] + page_url = 'http://m.nuvid.com/video/%s' % video_id + webpage = self._download_webpage( + page_url, video_id, 'Downloading video page') + # When dwnld_speed exists and has a value larger than the MP4 file's + # bitrate, Nuvid returns the MP4 URL + # It's unit is 100bytes/millisecond, see mobile-nuvid-min.js for the algorithm + self._set_cookie('nuvid.com', 'dwnld_speed', '10.0') + mp4_webpage = self._download_webpage( + page_url, video_id, 'Downloading video page for MP4 format') - for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]: - request = sanitized_Request( - 'http://m.nuvid.com/play/%s' % video_id) - request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed) - webpage = self._download_webpage( - request, video_id, 'Downloading %s page' % format_id) - video_url = self._html_search_regex( - r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False) - if not video_url: - continue + html5_video_re = r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', + video_url = self._html_search_regex(html5_video_re, webpage, video_id) + mp4_video_url = self._html_search_regex(html5_video_re, mp4_webpage, video_id) + formats = [{ + 'url': video_url, + }] + if mp4_video_url != video_url: formats.append({ - 'url': video_url, - 'format_id': format_id, + 'url': mp4_video_url, }) - webpage = self._download_webpage( - 'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page') title = self._html_search_regex( [r'<span title="([^"]+)">', - r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>'], webpage, 'title').strip() + r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>', + r'<span[^>]+class="title_thumb">([^<]+)</span>'], webpage, 'title').strip() thumbnails = [ { 'url': thumb_url, @@ -57,9 +57,8 @@ class NuvidIE(InfoExtractor): ] thumbnail = thumbnails[0]['url'] if thumbnails else None duration = parse_duration(self._html_search_regex( - r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False)) - upload_date = unified_strdate(self._html_search_regex( - r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False)) + [r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', + r'<span[^>]+class="view_time">([^<]+)</span>'], webpage, 'duration', fatal=False)) return { 'id': video_id, @@ -67,7 +66,6 @@ class NuvidIE(InfoExtractor): 'thumbnails': thumbnails, 'thumbnail': thumbnail, 'duration': duration, - 'upload_date': upload_date, 'age_limit': 18, 'formats': formats, } From eebe6b382eb6bd9e8118b616f3dde48c294e3b0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 Apr 2016 21:37:34 +0600 Subject: [PATCH 0342/3599] [yandexmusic] Improve error handling --- youtube_dl/extractor/yandexmusic.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 0d32a612f..b0e68a087 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -18,9 +18,10 @@ from ..utils import ( class YandexMusicBaseIE(InfoExtractor): @staticmethod def _handle_error(response): - error = response.get('error') - if error: - raise ExtractorError(error, expected=True) + if isinstance(response, dict): + error = response.get('error') + if error: + raise ExtractorError(error, expected=True) def _download_webpage(self, *args, **kwargs): webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs) From 4b537629143c8f51c5814c650227971c438b12e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 Apr 2016 21:45:33 +0600 Subject: [PATCH 0343/3599] [yandexmusic] Clarify blockage --- youtube_dl/extractor/yandexmusic.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index b0e68a087..a33fe3d83 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -26,7 +26,11 @@ class YandexMusicBaseIE(InfoExtractor): def _download_webpage(self, *args, **kwargs): webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs) if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: - raise ExtractorError('Blocked by YandexMusic', expected=True) + raise ExtractorError( + 'YandexMusic asks you to solve a CAPTCHA: go to ' + 'https://music.yandex.ru/ and solve it, then export ' + 'cookies and pass cookie file to youtube-dl with --cookies', + expected=True) return webpage def _download_json(self, *args, **kwargs): From 0ba9e3ca2233d018d695bac4eebe0e34043a7ec9 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 28 Apr 2016 17:44:33 +0100 Subject: [PATCH 0344/3599] [viewster] extract formats for videos with multiple audios/subtitles --- youtube_dl/extractor/viewster.py | 147 +++++++++++++++++-------------- 1 file changed, 80 insertions(+), 67 deletions(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index 6edc2c44e..1813b81d6 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -78,11 +78,11 @@ class ViewsterIE(InfoExtractor): _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01' - def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True): + def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}): request = sanitized_Request(url) request.add_header('Accept', self._ACCEPT_HEADER) request.add_header('Auth-token', self._AUTH_TOKEN) - return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal) + return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query) def _real_extract(self, url): video_id = self._match_id(url) @@ -117,72 +117,85 @@ class ViewsterIE(InfoExtractor): return self.playlist_result(entries, video_id, title, description) formats = [] - manifest_url = None - m3u8_formats = [] - for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'): - media = self._download_json( - 'https://public-api.viewster.com/movies/%s/video?mediaType=%s' - % (entry_id, compat_urllib_parse.quote(media_type)), - video_id, 'Downloading %s JSON' % media_type, fatal=False) - if not media: - continue - video_url = media.get('Uri') - if not video_url: - continue - ext = determine_ext(video_url) - if ext == 'f4m': - manifest_url = video_url - video_url += '&' if '?' in video_url else '?' - video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1' - formats.extend(self._extract_f4m_formats( - video_url, video_id, f4m_id='hds')) - elif ext == 'm3u8': - manifest_url = video_url - m3u8_formats = self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id='hls', - fatal=False) # m3u8 sometimes fail - if m3u8_formats: - formats.extend(m3u8_formats) - else: - qualities_basename = self._search_regex( - '/([^/]+)\.csmil/', - manifest_url, 'qualities basename', default=None) - if not qualities_basename: - continue - QUALITIES_RE = r'((,\d+k)+,?)' - qualities = self._search_regex( - QUALITIES_RE, qualities_basename, - 'qualities', default=None) - if not qualities: - continue - qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(','))) - qualities.sort() - http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename) - http_url_basename = url_basename(video_url) - if m3u8_formats: - self._sort_formats(m3u8_formats) - m3u8_formats = list(filter( - lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', - m3u8_formats)) - if len(qualities) == len(m3u8_formats): - for q, m3u8_format in zip(qualities, m3u8_formats): - f = m3u8_format.copy() - f.update({ - 'url': video_url.replace(http_url_basename, http_template % q), - 'format_id': f['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - formats.append(f) - else: - for q in qualities: - formats.append({ - 'url': video_url.replace(http_url_basename, http_template % q), - 'ext': 'mp4', - 'format_id': 'http-%d' % q, - 'tbr': q, - }) + for language_set in info.get('LanguageSets', []): + manifest_url = None + m3u8_formats = [] + audio = language_set.get('Audio') or '' + subtitle = language_set.get('Subtitle') or '' + base_format_id = audio + if subtitle: + base_format_id += '-%s' % subtitle - if not formats and not info.get('LanguageSets') and not info.get('VODSettings'): + def concat(suffix, sep='-'): + return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix + + for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'): + media = self._download_json( + 'https://public-api.viewster.com/movies/%s/video' % entry_id, + video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={ + 'mediaType': media_type, + 'language': audio, + 'subtitle': subtitle, + }) + if not media: + continue + video_url = media.get('Uri') + if not video_url: + continue + ext = determine_ext(video_url) + if ext == 'f4m': + manifest_url = video_url + video_url += '&' if '?' in video_url else '?' + video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1' + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id=concat('hds'))) + elif ext == 'm3u8': + manifest_url = video_url + m3u8_formats = self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id=concat('hls'), + fatal=False) # m3u8 sometimes fail + if m3u8_formats: + formats.extend(m3u8_formats) + else: + qualities_basename = self._search_regex( + '/([^/]+)\.csmil/', + manifest_url, 'qualities basename', default=None) + if not qualities_basename: + continue + QUALITIES_RE = r'((,\d+k)+,?)' + qualities = self._search_regex( + QUALITIES_RE, qualities_basename, + 'qualities', default=None) + if not qualities: + continue + qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(','))) + qualities.sort() + http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename) + http_url_basename = url_basename(video_url) + if m3u8_formats: + self._sort_formats(m3u8_formats) + m3u8_formats = list(filter( + lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + m3u8_formats)) + if len(qualities) == len(m3u8_formats): + for q, m3u8_format in zip(qualities, m3u8_formats): + f = m3u8_format.copy() + f.update({ + 'url': video_url.replace(http_url_basename, http_template % q), + 'format_id': f['format_id'].replace('hls', 'http'), + 'protocol': 'http', + }) + formats.append(f) + else: + for q in qualities: + formats.append({ + 'url': video_url.replace(http_url_basename, http_template % q), + 'ext': 'mp4', + 'format_id': 'http-%d' % q, + 'tbr': q, + }) + + if not formats and not info.get('VODSettings'): self.raise_geo_restricted() self._sort_formats(formats) From e757fb3d053a195da4084c08a59a7b17b08ba598 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 28 Apr 2016 18:42:20 +0100 Subject: [PATCH 0345/3599] [crunchyroll] improve extraction - extract more metadata(series, episode, episode_number) - reduce duplicate requests for extracting formats - remove duplicate formats --- youtube_dl/extractor/crunchyroll.py | 31 +++++++++++++++++------------ 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 8ae3f2890..dd753c7c3 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -11,7 +11,6 @@ from math import pow, sqrt, floor from .common import InfoExtractor from ..compat import ( compat_etree_fromstring, - compat_urllib_parse_unquote, compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, @@ -306,28 +305,24 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage, 'video_uploader', fatal=False) - playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) - playerdata_req = sanitized_Request(playerdata_url) - playerdata_req.data = urlencode_postdata({'current_page': webpage_url}) - playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') - playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') - - stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, 'stream_id') - video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, 'thumbnail', fatal=False) - formats = [] - for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage): + video_encode_ids = [] + for fmt in re.findall(r'token="showmedia\.([0-9]{3,4})p"', webpage): stream_quality, stream_format = self._FORMAT_IDS[fmt] video_format = fmt + 'p' streamdata_req = sanitized_Request( 'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s' - % (stream_id, stream_format, stream_quality), + % (video_id, stream_format, stream_quality), compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8')) streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') streamdata = self._download_xml( streamdata_req, video_id, note='Downloading media info for %s' % video_format) stream_info = streamdata.find('./{default}preload/stream_info') + video_encode_id = xpath_text(stream_info, './video_encode_id') + if video_encode_id in video_encode_ids: + continue + video_encode_ids.append(video_encode_id) video_url = xpath_text(stream_info, './host') video_play_path = xpath_text(stream_info, './file') if not video_url or not video_play_path: @@ -360,15 +355,25 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text }) formats.append(format_info) + metadata = self._download_xml( + 'http://www.crunchyroll.com/xml', video_id, + note='Downloading media info', query={ + 'req': 'RpcApiVideoPlayer_GetMediaMetadata', + 'media_id': video_id, + }) + subtitles = self.extract_subtitles(video_id, webpage) return { 'id': video_id, 'title': video_title, 'description': video_description, - 'thumbnail': video_thumbnail, + 'thumbnail': xpath_text(metadata, 'episode_image_url'), 'uploader': video_uploader, 'upload_date': video_upload_date, + 'series': xpath_text(metadata, 'series_title'), + 'episode': xpath_text(metadata, 'episode_title'), + 'episode_number': int_or_none(xpath_text(metadata, 'episode_number')), 'subtitles': subtitles, 'formats': formats, } From 497971cd4a8407651debfb2fd4b10fc4009b0f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 29 Apr 2016 01:28:07 +0600 Subject: [PATCH 0346/3599] [yandexmusic] Clarify blockage even more --- youtube_dl/extractor/yandexmusic.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index a33fe3d83..ce3723b55 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -27,9 +27,12 @@ class YandexMusicBaseIE(InfoExtractor): webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs) if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: raise ExtractorError( - 'YandexMusic asks you to solve a CAPTCHA: go to ' - 'https://music.yandex.ru/ and solve it, then export ' - 'cookies and pass cookie file to youtube-dl with --cookies', + 'YandexMusic has considered youtube-dl requests automated and ' + 'asks you to solve a CAPTCHA. You can either wait for some ' + 'time until unblocked and optionally use --sleep-interval ' + 'in future or alternatively you can go to https://music.yandex.ru/ ' + 'solve CAPTCHA, then export cookies and pass cookie file to ' + 'youtube-dl with --cookies', expected=True) return webpage From 683d892bf9332df1a255c673bca56a8f5487292a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 29 Apr 2016 01:30:53 +0600 Subject: [PATCH 0347/3599] [viewster] Remove unused import --- youtube_dl/extractor/viewster.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index 1813b81d6..a93196a07 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -6,7 +6,6 @@ import re from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_urllib_parse, compat_urllib_parse_unquote, ) from ..utils import ( From 72670c39decc296a3ee757301dc70389674d19c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 29 Apr 2016 04:46:23 +0600 Subject: [PATCH 0348/3599] [arte:+7] Fix typo in _VALID_URL --- youtube_dl/extractor/arte.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index a9e3266dc..881cacfab 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -63,7 +63,7 @@ class ArteTvIE(InfoExtractor): class ArteTVPlus7IE(InfoExtractor): IE_NAME = 'arte.tv:+7' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&+])' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)' @classmethod def _extract_url_info(cls, url): From 31ff3c074eddf4078b6eb49281830875eb4e65a1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 29 Apr 2016 13:36:52 +0800 Subject: [PATCH 0349/3599] [sexykarma] Remove the extractor Its domain name is on sale. Closes #9317 --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/sexykarma.py | 121 ----------------------------- 2 files changed, 122 deletions(-) delete mode 100644 youtube_dl/extractor/sexykarma.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 88405f070..41ff1e7a5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -657,7 +657,6 @@ from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .senateisvp import SenateISVPIE from .servingsys import ServingSysIE from .sexu import SexuIE -from .sexykarma import SexyKarmaIE from .shahid import ShahidIE from .shared import SharedIE from .sharesix import ShareSixIE diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/sexykarma.py deleted file mode 100644 index e33483674..000000000 --- a/youtube_dl/extractor/sexykarma.py +++ /dev/null @@ -1,121 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - unified_strdate, - parse_duration, - int_or_none, -) - - -class SexyKarmaIE(InfoExtractor): - IE_DESC = 'Sexy Karma and Watch Indian Porn' - _VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html' - _TESTS = [{ - 'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html', - 'md5': 'b9798e7d1ef1765116a8f516c8091dbd', - 'info_dict': { - 'id': 'yHI70cOyIHt', - 'display_id': 'taking-a-quick-pee', - 'ext': 'mp4', - 'title': 'Taking a quick pee.', - 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'wildginger7', - 'upload_date': '20141008', - 'duration': 22, - 'view_count': int, - 'comment_count': int, - 'categories': list, - 'age_limit': 18, - } - }, { - 'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html', - 'md5': 'dd216c68d29b49b12842b9babe762a5d', - 'info_dict': { - 'id': '8Id6EZPbuHf', - 'display_id': 'pot-pixie-tribute', - 'ext': 'mp4', - 'title': 'pot_pixie tribute', - 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'banffite', - 'upload_date': '20141013', - 'duration': 16, - 'view_count': int, - 'comment_count': int, - 'categories': list, - 'age_limit': 18, - } - }, { - 'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html', - 'md5': '9afb80675550406ed9a63ac2819ef69d', - 'info_dict': { - 'id': 'dW2mtctxJfs', - 'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number', - 'ext': 'mp4', - 'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number', - 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'Don', - 'upload_date': '20140213', - 'duration': 83, - 'view_count': int, - 'comment_count': int, - 'categories': list, - 'age_limit': 18, - } - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - - video_url = self._html_search_regex( - r"url: escape\('([^']+)'\)", webpage, 'url') - - title = self._html_search_regex( - r'<h2 class="he2"><span>(.*?)</span>', - webpage, 'title') - thumbnail = self._html_search_regex( - r'<span id="container"><img\s+src="([^"]+)"', - webpage, 'thumbnail', fatal=False) - - uploader = self._html_search_regex( - r'class="aupa">\s*(.*?)</a>', - webpage, 'uploader') - upload_date = unified_strdate(self._html_search_regex( - r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False)) - - duration = parse_duration(self._search_regex( - r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>', - webpage, 'duration', fatal=False)) - - view_count = int_or_none(self._search_regex( - r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>', - webpage, 'view count', fatal=False)) - comment_count = int_or_none(self._search_regex( - r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>', - webpage, 'comment count', fatal=False)) - - categories = re.findall( - r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>', - webpage) - - return { - 'id': video_id, - 'display_id': display_id, - 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'upload_date': upload_date, - 'duration': duration, - 'view_count': view_count, - 'comment_count': comment_count, - 'categories': categories, - 'age_limit': 18, - } From f5535ed0e3537acee90820c98d6ca474d437d7d0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 29 Apr 2016 14:24:07 +0800 Subject: [PATCH 0350/3599] [orf] Skip the expired test --- youtube_dl/extractor/orf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 66c75f8b3..4e3864f0d 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -185,6 +185,7 @@ class ORFFM4IE(InfoExtractor): 'timestamp': 1452456073, 'upload_date': '20160110', }, + 'skip': 'Live streams on FM4 got deleted soon', } def _real_extract(self, url): From 5819edef034819b76b8eec6a0cdf7b29cc9ddff3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 29 Apr 2016 14:27:15 +0800 Subject: [PATCH 0351/3599] [ooyala] Skip an invalid test Ooyala is used by lots of extractors and its correctness can be verified by these websites. --- youtube_dl/extractor/ooyala.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 16f040191..95e982897 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -96,6 +96,8 @@ class OoyalaIE(OoyalaBaseIE): 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', 'duration': 853.386, }, + # The video in the original webpage now uses PlayWire + 'skip': 'Ooyala said: movie expired', }, { # Only available for ipad 'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', From 1910077ed77a270fea8e368c3815b23cee254f85 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 29 Apr 2016 17:59:23 +0800 Subject: [PATCH 0352/3599] Revert "[sexykarma] Remove the extractor" This reverts commit 31ff3c074eddf4078b6eb49281830875eb4e65a1. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/sexykarma.py | 121 +++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 youtube_dl/extractor/sexykarma.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 41ff1e7a5..88405f070 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -657,6 +657,7 @@ from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .senateisvp import SenateISVPIE from .servingsys import ServingSysIE from .sexu import SexuIE +from .sexykarma import SexyKarmaIE from .shahid import ShahidIE from .shared import SharedIE from .sharesix import ShareSixIE diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/sexykarma.py new file mode 100644 index 000000000..e33483674 --- /dev/null +++ b/youtube_dl/extractor/sexykarma.py @@ -0,0 +1,121 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, + parse_duration, + int_or_none, +) + + +class SexyKarmaIE(InfoExtractor): + IE_DESC = 'Sexy Karma and Watch Indian Porn' + _VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html' + _TESTS = [{ + 'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html', + 'md5': 'b9798e7d1ef1765116a8f516c8091dbd', + 'info_dict': { + 'id': 'yHI70cOyIHt', + 'display_id': 'taking-a-quick-pee', + 'ext': 'mp4', + 'title': 'Taking a quick pee.', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'wildginger7', + 'upload_date': '20141008', + 'duration': 22, + 'view_count': int, + 'comment_count': int, + 'categories': list, + 'age_limit': 18, + } + }, { + 'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html', + 'md5': 'dd216c68d29b49b12842b9babe762a5d', + 'info_dict': { + 'id': '8Id6EZPbuHf', + 'display_id': 'pot-pixie-tribute', + 'ext': 'mp4', + 'title': 'pot_pixie tribute', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'banffite', + 'upload_date': '20141013', + 'duration': 16, + 'view_count': int, + 'comment_count': int, + 'categories': list, + 'age_limit': 18, + } + }, { + 'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html', + 'md5': '9afb80675550406ed9a63ac2819ef69d', + 'info_dict': { + 'id': 'dW2mtctxJfs', + 'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number', + 'ext': 'mp4', + 'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'Don', + 'upload_date': '20140213', + 'duration': 83, + 'view_count': int, + 'comment_count': int, + 'categories': list, + 'age_limit': 18, + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id) + + video_url = self._html_search_regex( + r"url: escape\('([^']+)'\)", webpage, 'url') + + title = self._html_search_regex( + r'<h2 class="he2"><span>(.*?)</span>', + webpage, 'title') + thumbnail = self._html_search_regex( + r'<span id="container"><img\s+src="([^"]+)"', + webpage, 'thumbnail', fatal=False) + + uploader = self._html_search_regex( + r'class="aupa">\s*(.*?)</a>', + webpage, 'uploader') + upload_date = unified_strdate(self._html_search_regex( + r'Added: <strong>(.+?)</strong>', webpage, 'upload date', fatal=False)) + + duration = parse_duration(self._search_regex( + r'<td>Time:\s*</td>\s*<td align="right"><span>\s*(.+?)\s*</span>', + webpage, 'duration', fatal=False)) + + view_count = int_or_none(self._search_regex( + r'<td>Views:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>', + webpage, 'view count', fatal=False)) + comment_count = int_or_none(self._search_regex( + r'<td>Comments:\s*</td>\s*<td align="right"><span>\s*(\d+)\s*</span>', + webpage, 'comment count', fatal=False)) + + categories = re.findall( + r'<a href="[^"]+/search/video/desi"><span>([^<]+)</span></a>', + webpage) + + return { + 'id': video_id, + 'display_id': display_id, + 'url': video_url, + 'title': title, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'upload_date': upload_date, + 'duration': duration, + 'view_count': view_count, + 'comment_count': comment_count, + 'categories': categories, + 'age_limit': 18, + } From 14638e291511c3305b70dce64e9bd97686e9da93 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 29 Apr 2016 18:17:08 +0800 Subject: [PATCH 0353/3599] [sexykarma] Rename to WatchIndianPornIE and fix extraction --- youtube_dl/extractor/extractors.py | 2 +- .../{sexykarma.py => watchindianporn.py} | 63 +++++-------------- 2 files changed, 17 insertions(+), 48 deletions(-) rename youtube_dl/extractor/{sexykarma.py => watchindianporn.py} (54%) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 88405f070..3adcd41c4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -657,7 +657,6 @@ from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .senateisvp import SenateISVPIE from .servingsys import ServingSysIE from .sexu import SexuIE -from .sexykarma import SexyKarmaIE from .shahid import ShahidIE from .shared import SharedIE from .sharesix import ShareSixIE @@ -918,6 +917,7 @@ from .vulture import VultureIE from .walla import WallaIE from .washingtonpost import WashingtonPostIE from .wat import WatIE +from .watchindianporn import WatchIndianPornIE from .wdr import ( WDRIE, WDRMobileIE, diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/watchindianporn.py similarity index 54% rename from youtube_dl/extractor/sexykarma.py rename to youtube_dl/extractor/watchindianporn.py index e33483674..5d3b5bdb4 100644 --- a/youtube_dl/extractor/sexykarma.py +++ b/youtube_dl/extractor/watchindianporn.py @@ -11,61 +11,27 @@ from ..utils import ( ) -class SexyKarmaIE(InfoExtractor): - IE_DESC = 'Sexy Karma and Watch Indian Porn' - _VALID_URL = r'https?://(?:www\.)?(?:sexykarma\.com|watchindianporn\.net)/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html' - _TESTS = [{ - 'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html', - 'md5': 'b9798e7d1ef1765116a8f516c8091dbd', +class WatchIndianPornIE(InfoExtractor): + IE_DESC = 'Watch Indian Porn' + _VALID_URL = r'https?://(?:www\.)?watchindianporn\.net/(?:[^/]+/)*video/(?P<display_id>[^/]+)-(?P<id>[a-zA-Z0-9]+)\.html' + _TEST = { + 'url': 'http://www.watchindianporn.net/video/hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera-RZa2avywNPa.html', + 'md5': '249589a164dde236ec65832bfce17440', 'info_dict': { - 'id': 'yHI70cOyIHt', - 'display_id': 'taking-a-quick-pee', + 'id': 'RZa2avywNPa', + 'display_id': 'hot-milf-from-kerala-shows-off-her-gorgeous-large-breasts-on-camera', 'ext': 'mp4', - 'title': 'Taking a quick pee.', + 'title': 'Hot milf from kerala shows off her gorgeous large breasts on camera', 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'wildginger7', - 'upload_date': '20141008', - 'duration': 22, + 'uploader': 'LoveJay', + 'upload_date': '20160428', + 'duration': 226, 'view_count': int, 'comment_count': int, 'categories': list, 'age_limit': 18, } - }, { - 'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html', - 'md5': 'dd216c68d29b49b12842b9babe762a5d', - 'info_dict': { - 'id': '8Id6EZPbuHf', - 'display_id': 'pot-pixie-tribute', - 'ext': 'mp4', - 'title': 'pot_pixie tribute', - 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'banffite', - 'upload_date': '20141013', - 'duration': 16, - 'view_count': int, - 'comment_count': int, - 'categories': list, - 'age_limit': 18, - } - }, { - 'url': 'http://www.watchindianporn.net/video/desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number-dW2mtctxJfs.html', - 'md5': '9afb80675550406ed9a63ac2819ef69d', - 'info_dict': { - 'id': 'dW2mtctxJfs', - 'display_id': 'desi-dancer-namrata-stripping-completely-nude-and-dancing-on-a-hot-number', - 'ext': 'mp4', - 'title': 'Desi dancer namrata stripping completely nude and dancing on a hot number', - 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'Don', - 'upload_date': '20140213', - 'duration': 83, - 'view_count': int, - 'comment_count': int, - 'categories': list, - 'age_limit': 18, - } - }] + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -109,6 +75,9 @@ class SexyKarmaIE(InfoExtractor): 'id': video_id, 'display_id': display_id, 'url': video_url, + 'http_headers': { + 'Referer': url, + }, 'title': title, 'thumbnail': thumbnail, 'uploader': uploader, From 67167920db50e818c9fca20579c8a05eb2218f86 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 29 Apr 2016 11:14:42 +0100 Subject: [PATCH 0354/3599] [viewlift] replace SnagFilms extractors - add support for other sites that use the same logic - improve format extraction and sorting --- youtube_dl/extractor/extractors.py | 8 ++--- youtube_dl/extractor/generic.py | 10 +++--- .../extractor/{snagfilms.py => viewlift.py} | 35 +++++++++++++------ 3 files changed, 34 insertions(+), 19 deletions(-) rename youtube_dl/extractor/{snagfilms.py => viewlift.py} (81%) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3adcd41c4..b1b7f9b42 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -673,10 +673,6 @@ from .smotri import ( SmotriUserIE, SmotriBroadcastIE, ) -from .snagfilms import ( - SnagFilmsIE, - SnagFilmsEmbedIE, -) from .snotr import SnotrIE from .sohu import SohuIE from .soundcloud import ( @@ -879,6 +875,10 @@ from .vidme import ( ) from .vidzi import VidziIE from .vier import VierIE, VierVideosIE +from .viewlift import ( + ViewLiftIE, + ViewLiftEmbedIE, +) from .viewster import ViewsterIE from .viidea import ViideaIE from .vimeo import ( diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a95501d86..0f1eb7fa6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -51,7 +51,7 @@ from .tnaflix import TNAFlixNetworkEmbedIE from .vimeo import VimeoIE from .dailymotion import DailymotionCloudIE from .onionstudios import OnionStudiosIE -from .snagfilms import SnagFilmsEmbedIE +from .viewlift import ViewLiftEmbedIE from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE from .pladform import PladformIE @@ -1924,10 +1924,10 @@ class GenericIE(InfoExtractor): if onionstudios_url: return self.url_result(onionstudios_url) - # Look for SnagFilms embeds - snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage) - if snagfilms_url: - return self.url_result(snagfilms_url) + # Look for ViewLift embeds + viewlift_url = ViewLiftEmbedIE._extract_url(webpage) + if viewlift_url: + return self.url_result(viewlift_url) # Look for JWPlatform embeds jwplatform_url = JWPlatformIE._extract_url(webpage) diff --git a/youtube_dl/extractor/snagfilms.py b/youtube_dl/extractor/viewlift.py similarity index 81% rename from youtube_dl/extractor/snagfilms.py rename to youtube_dl/extractor/viewlift.py index 6977afb27..dd4a13a4a 100644 --- a/youtube_dl/extractor/snagfilms.py +++ b/youtube_dl/extractor/viewlift.py @@ -13,8 +13,12 @@ from ..utils import ( ) -class SnagFilmsEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|embed)\.)?snagfilms\.com/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' +class ViewLiftBaseIE(InfoExtractor): + _DOMAINS_REGEX = '(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv' + + +class ViewLiftEmbedIE(ViewLiftBaseIE): + _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' % ViewLiftBaseIE._DOMAINS_REGEX _TESTS = [{ 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500', 'md5': '2924e9215c6eff7a55ed35b72276bd93', @@ -40,7 +44,7 @@ class SnagFilmsEmbedIE(InfoExtractor): @staticmethod def _extract_url(webpage): mobj = re.search( - r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1', + r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX, webpage) if mobj: return mobj.group('url') @@ -55,6 +59,7 @@ class SnagFilmsEmbedIE(InfoExtractor): 'Film %s is not playable in your area.' % video_id, expected=True) formats = [] + has_bitrate = False for source in self._parse_json(js_to_json(self._search_regex( r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id): file_ = source.get('file') @@ -63,22 +68,25 @@ class SnagFilmsEmbedIE(InfoExtractor): type_ = source.get('type') ext = determine_ext(file_) format_id = source.get('label') or ext - if all(v == 'm3u8' for v in (type_, ext)): + if all(v == 'm3u8' or v == 'hls' for v in (type_, ext)): formats.extend(self._extract_m3u8_formats( file_, video_id, 'mp4', m3u8_id='hls')) else: bitrate = int_or_none(self._search_regex( [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext], file_, 'bitrate', default=None)) + if not has_bitrate and bitrate: + has_bitrate = True height = int_or_none(self._search_regex( r'^(\d+)[pP]$', format_id, 'height', default=None)) formats.append({ 'url': file_, - 'format_id': format_id, + 'format_id': 'http-%s%s' % (format_id, ('-%dk' % bitrate if bitrate else '')), 'tbr': bitrate, 'height': height, }) - self._sort_formats(formats) + field_preference = None if has_bitrate else ('height', 'tbr', 'format_id') + self._sort_formats(formats, field_preference) title = self._search_regex( [r"title\s*:\s*'([^']+)'", r'<title>([^<]+)'], @@ -91,8 +99,8 @@ class SnagFilmsEmbedIE(InfoExtractor): } -class SnagFilmsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?snagfilms\.com/(?:films/title|show)/(?P[^?#]+)' +class ViewLiftIE(ViewLiftBaseIE): + _VALID_URL = r'https?://(?:www\.)?(?P%s)/(?:films/title|show|(?:news/)?videos?)/(?P[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX _TESTS = [{ 'url': 'http://www.snagfilms.com/films/title/lost_for_life', 'md5': '19844f897b35af219773fd63bdec2942', @@ -127,10 +135,16 @@ class SnagFilmsIE(InfoExtractor): # Film is not available. 'url': 'http://www.snagfilms.com/show/augie_alone/flirting', 'only_matching': True, + }, { + 'url': 'http://www.winnersview.com/videos/the-good-son', + 'only_matching': True, + }, { + 'url': 'http://www.kesari.tv/news/video/1461919076414', + 'only_matching': True, }] def _real_extract(self, url): - display_id = self._match_id(url) + domain, display_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, display_id) @@ -170,7 +184,7 @@ class SnagFilmsIE(InfoExtractor): return { '_type': 'url_transparent', - 'url': 'http://embed.snagfilms.com/embed/player?filmId=%s' % film_id, + 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id), 'id': film_id, 'display_id': display_id, 'title': title, @@ -178,4 +192,5 @@ class SnagFilmsIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'categories': categories, + 'ie_key': 'ViewLiftEmbed', } From 065216d94f59953a228d2683d3bafe4241fd1e29 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 29 Apr 2016 11:46:42 +0100 Subject: [PATCH 0355/3599] [crunchyroll] reduce requests for formats extraction --- youtube_dl/extractor/crunchyroll.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index dd753c7c3..184ba6896 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -26,6 +26,7 @@ from ..utils import ( unified_strdate, urlencode_postdata, xpath_text, + extract_attributes, ) from ..aes import ( aes_cbc_decrypt, @@ -305,9 +306,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text r']+href="/publisher/[^"]+"[^>]*>([^<]+)', webpage, 'video_uploader', fatal=False) - formats = [] + available_fmts = [] + for a, fmt in re.findall(r'(]+token="showmedia\.([0-9]{3,4})p"[^>]+>.*?)', webpage): + attrs = extract_attributes(a) + href = attrs.get('href') + if href and '/freetrial' in href: + continue + available_fmts.append(fmt) + if not available_fmts: + available_fmts = re.findall(r'token="showmedia\.([0-9]{3,4})p"', webpage) video_encode_ids = [] - for fmt in re.findall(r'token="showmedia\.([0-9]{3,4})p"', webpage): + formats = [] + for fmt in available_fmts: stream_quality, stream_format = self._FORMAT_IDS[fmt] video_format = fmt + 'p' streamdata_req = sanitized_Request( From b24d6336a797b99339c12a0aa1b431755e22e8cf Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi Date: Tue, 26 Apr 2016 17:30:24 +0300 Subject: [PATCH 0356/3599] [vlive] Add support for live videos --- youtube_dl/extractor/common.py | 8 ++- youtube_dl/extractor/vlive.py | 98 ++++++++++++++++++++++++++-------- 2 files changed, 83 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a285ee7d8..2763d2ffe 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1061,7 +1061,7 @@ class InfoExtractor(object): def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, entry_protocol='m3u8', preference=None, m3u8_id=None, note=None, errnote=None, - fatal=True): + fatal=True, live=False): formats = [{ 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])), @@ -1139,7 +1139,11 @@ class InfoExtractor(object): if m3u8_id: format_id.append(m3u8_id) last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None - format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats))) + # Bandwidth of live streams may differ over time thus making + # format_id unpredictable. So it's better to keep provided + # format_id intact. + if last_media_name and not live: + format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats))) f = { 'format_id': '-'.join(format_id), 'url': format_url(line.strip()), diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index baf39bb2c..2151696ea 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -1,8 +1,11 @@ # coding: utf-8 -from __future__ import unicode_literals +from __future__ import division, unicode_literals +import re +import time from .common import InfoExtractor from ..utils import ( + ExtractorError, dict_get, float_or_none, int_or_none, @@ -31,16 +34,77 @@ class VLiveIE(InfoExtractor): webpage = self._download_webpage( 'http://www.vlive.tv/video/%s' % video_id, video_id) - long_video_id = self._search_regex( - r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"([^"]+)"', - webpage, 'long video id') + # UTC+x - UTC+9 (KST) + tz = time.altzone if time.localtime().tm_isdst == 1 else time.timezone + tz_offset = -tz // 60 - 9 * 60 + self._set_cookie('vlive.tv', 'timezoneOffset', '%d' % tz_offset) - key = self._search_regex( - r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"[^"]+"\s*,\s*"([^"]+)"', - webpage, 'key') + status_params = self._download_json( + 'http://www.vlive.tv/video/status?videoSeq=%s' % video_id, + video_id, 'Downloading JSON status', + headers={'Referer': url}) + status = status_params.get('status') + air_start = status_params.get('onAirStartAt', '') + is_live = status_params.get('isLive') + video_params = self._search_regex( + r'vlive\.tv\.video\.ajax\.request\.handler\.init\((.+)\)', + webpage, 'video params') + live_params, long_video_id, key = re.split( + r'"\s*,\s*"', video_params)[1:4] + + if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR': + live_params = self._parse_json('"%s"' % live_params, video_id) + live_params = self._parse_json(live_params, video_id) + return self._live(video_id, webpage, live_params) + elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO': + if long_video_id and key: + return self._replay(video_id, webpage, long_video_id, key) + elif is_live: + status = 'LIVE_END' + else: + status = 'COMING_SOON' + + if status == 'LIVE_END': + raise ExtractorError('Uploading for replay. Please wait...', + expected=True) + elif status == 'COMING_SOON': + raise ExtractorError('Coming soon! %s' % air_start, expected=True) + elif status == 'CANCELED': + raise ExtractorError('We are sorry, ' + 'but the live broadcast has been canceled.', + expected=True) + else: + raise ExtractorError('Unknown status %s' % status) + + def _get_common_fields(self, webpage): title = self._og_search_title(webpage) + creator = self._html_search_regex( + r']+class="info_area"[^>]*>\s*]*>([^<]+)', + webpage, 'creator', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) + return { + 'title': title, + 'creator': creator, + 'thumbnail': thumbnail, + } + def _live(self, video_id, webpage, live_params): + formats = [] + for vid in live_params.get('resolutions', []): + formats.extend(self._extract_m3u8_formats( + vid['cdnUrl'], video_id, 'mp4', + m3u8_id=vid.get('name'), + fatal=False, live=True)) + self._sort_formats(formats) + + return dict(self._get_common_fields(webpage), + id=video_id, + formats=formats, + is_live=True, + ) + + def _replay(self, video_id, webpage, long_video_id, key): playinfo = self._download_json( 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s' % compat_urllib_parse_urlencode({ @@ -62,11 +126,6 @@ class VLiveIE(InfoExtractor): } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')] self._sort_formats(formats) - thumbnail = self._og_search_thumbnail(webpage) - creator = self._html_search_regex( - r']+class="info_area"[^>]*>\s*]*>([^<]+)', - webpage, 'creator', fatal=False) - view_count = int_or_none(playinfo.get('meta', {}).get('count')) subtitles = {} @@ -77,12 +136,9 @@ class VLiveIE(InfoExtractor): 'ext': 'vtt', 'url': caption['source']}] - return { - 'id': video_id, - 'title': title, - 'creator': creator, - 'thumbnail': thumbnail, - 'view_count': view_count, - 'formats': formats, - 'subtitles': subtitles, - } + return dict(self._get_common_fields(webpage), + id=video_id, + formats=formats, + view_count=view_count, + subtitles=subtitles, + ) From 9d186afac818645490122aa7457f247c31c601bf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 29 Apr 2016 19:29:00 +0800 Subject: [PATCH 0357/3599] [vlive] Coding style and PEP8 --- youtube_dl/extractor/vlive.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 2151696ea..7f9e99ec2 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -3,10 +3,11 @@ from __future__ import division, unicode_literals import re import time + from .common import InfoExtractor from ..utils import ( - ExtractorError, dict_get, + ExtractorError, float_or_none, int_or_none, ) @@ -99,10 +100,9 @@ class VLiveIE(InfoExtractor): self._sort_formats(formats) return dict(self._get_common_fields(webpage), - id=video_id, - formats=formats, - is_live=True, - ) + id=video_id, + formats=formats, + is_live=True) def _replay(self, video_id, webpage, long_video_id, key): playinfo = self._download_json( @@ -137,8 +137,7 @@ class VLiveIE(InfoExtractor): 'url': caption['source']}] return dict(self._get_common_fields(webpage), - id=video_id, - formats=formats, - view_count=view_count, - subtitles=subtitles, - ) + id=video_id, + formats=formats, + view_count=view_count, + subtitles=subtitles) From 6ff4469528d642bd678df9b1fa83545a0942e333 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 29 Apr 2016 19:39:27 +0600 Subject: [PATCH 0358/3599] [crunchyroll] Relax fmt regex --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 184ba6896..4a7664296 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -307,7 +307,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_uploader', fatal=False) available_fmts = [] - for a, fmt in re.findall(r'(]+token="showmedia\.([0-9]{3,4})p"[^>]+>.*?)', webpage): + for a, fmt in re.findall(r'(]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): attrs = extract_attributes(a) href = attrs.get('href') if href and '/freetrial' in href: From 8312b1a3d1dc07d80d33e31f9b2b6facf13fa744 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 29 Apr 2016 19:43:53 +0600 Subject: [PATCH 0359/3599] [crunchyroll] Add even more relaxed fmt fallback --- youtube_dl/extractor/crunchyroll.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 4a7664296..58960b2f8 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -314,7 +314,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text continue available_fmts.append(fmt) if not available_fmts: - available_fmts = re.findall(r'token="showmedia\.([0-9]{3,4})p"', webpage) + for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'): + available_fmts = re.findall(p, webpage) + if available_fmts: + break video_encode_ids = [] formats = [] for fmt in available_fmts: From 00a17a9e1234ecc868a15b5759472a0f9215f797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 29 Apr 2016 19:44:10 +0600 Subject: [PATCH 0360/3599] [crunchyroll] Sort formats --- youtube_dl/extractor/crunchyroll.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 58960b2f8..90a64303d 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -367,6 +367,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'ext': 'flv', }) formats.append(format_info) + self._sort_formats(formats) metadata = self._download_xml( 'http://www.crunchyroll.com/xml', video_id, From e9c6cdf4a103d1ebdb6927bdab429c370cbe66b2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 29 Apr 2016 22:49:04 +0800 Subject: [PATCH 0361/3599] [common] Fix format_id construction for HLS --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2763d2ffe..61a5d124c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1142,7 +1142,7 @@ class InfoExtractor(object): # Bandwidth of live streams may differ over time thus making # format_id unpredictable. So it's better to keep provided # format_id intact. - if last_media_name and not live: + if not live: format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats))) f = { 'format_id': '-'.join(format_id), From cef3f3011f9d3a67de3ff064a5185a1a4bcf40e7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Apr 2016 00:17:09 +0800 Subject: [PATCH 0362/3599] [funimation] Detect blocking and support CloudFlare cookies --- youtube_dl/extractor/funimation.py | 48 ++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 1eb528f31..0ad0d9b6a 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -2,6 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_urllib_parse_unquote_plus, +) from ..utils import ( clean_html, determine_ext, @@ -27,6 +31,7 @@ class FunimationIE(InfoExtractor): 'description': 'md5:1769f43cd5fc130ace8fd87232207892', 'thumbnail': 're:https?://.*\.jpg', }, + 'skip': 'Access without user interaction is forbidden by CloudFlare, and video removed', }, { 'url': 'http://www.funimation.com/shows/hacksign/videos/official/role-play', 'info_dict': { @@ -37,6 +42,7 @@ class FunimationIE(InfoExtractor): 'description': 'md5:b602bdc15eef4c9bbb201bb6e6a4a2dd', 'thumbnail': 're:https?://.*\.jpg', }, + 'skip': 'Access without user interaction is forbidden by CloudFlare', }, { 'url': 'http://www.funimation.com/shows/attack-on-titan-junior-high/videos/promotional/broadcast-dub-preview', 'info_dict': { @@ -47,8 +53,36 @@ class FunimationIE(InfoExtractor): 'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803', 'thumbnail': 're:https?://.*\.(?:jpg|png)', }, + 'skip': 'Access without user interaction is forbidden by CloudFlare', }] + _LOGIN_URL = 'http://www.funimation.com/login' + + def _download_webpage(self, *args, **kwargs): + try: + return super(FunimationIE, self)._download_webpage(*args, **kwargs) + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: + response = ee.cause.read() + if b'>Please complete the security check to access<' in response: + raise ExtractorError( + 'Access to funimation.com is blocked by CloudFlare. ' + 'Please browse to http://www.funimation.com/, solve ' + 'the reCAPTCHA, export browser cookies to a text file,' + ' and then try again with --cookies YOUR_COOKIE_FILE.', + expected=True) + raise + + def _extract_cloudflare_session_ua(self, url): + ci_session_cookie = self._get_cookies(url).get('ci_session') + if ci_session_cookie: + ci_session = compat_urllib_parse_unquote_plus(ci_session_cookie.value) + # ci_session is a string serialized by PHP function serialize() + # This case is simple enough to use regular expressions only + return self._search_regex( + r'"user_agent";s:\d+:"([^"]+)"', ci_session, 'user agent', + default=None) + def _login(self): (username, password) = self._get_login_info() if username is None: @@ -57,8 +91,11 @@ class FunimationIE(InfoExtractor): 'email_field': username, 'password_field': password, }) - login_request = sanitized_Request('http://www.funimation.com/login', data, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0', + user_agent = self._extract_cloudflare_session_ua(self._LOGIN_URL) + if not user_agent: + user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0' + login_request = sanitized_Request(self._LOGIN_URL, data, headers={ + 'User-Agent': user_agent, 'Content-Type': 'application/x-www-form-urlencoded' }) login_page = self._download_webpage( @@ -103,11 +140,16 @@ class FunimationIE(InfoExtractor): ('mobile', 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36'), ) + user_agent = self._extract_cloudflare_session_ua(url) + if user_agent: + USER_AGENTS = ((None, user_agent),) + for kind, user_agent in USER_AGENTS: request = sanitized_Request(url) request.add_header('User-Agent', user_agent) webpage = self._download_webpage( - request, display_id, 'Downloading %s webpage' % kind) + request, display_id, + 'Downloading %s webpage' % kind if kind else 'Downloading webpage') playlist = self._parse_json( self._search_regex( From 65a3bfb379c9d5e53cac874af097d2071ee4ac4d Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 29 Apr 2016 19:21:17 +0100 Subject: [PATCH 0363/3599] [dfb] extract m3u8 formats --- youtube_dl/extractor/dfb.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py index cdfeccacb..a4d0448c2 100644 --- a/youtube_dl/extractor/dfb.py +++ b/youtube_dl/extractor/dfb.py @@ -12,39 +12,46 @@ class DFBIE(InfoExtractor): _TEST = { 'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/', - # The md5 is different each time + 'md5': 'ac0f98a52a330f700b4b3034ad240649', 'info_dict': { 'id': '11633', 'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'U 19-EM: Stimmen zum Spiel gegen Russland', 'upload_date': '20150714', }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') + display_id, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage(url, display_id) player_info = self._download_xml( 'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id, display_id) video_info = player_info.find('video') + stream_access_url = self._proto_relative_url(video_info.find('url').text.strip()) - f4m_info = self._download_xml( - self._proto_relative_url(video_info.find('url').text.strip()), display_id) - token_el = f4m_info.find('token') - manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' - formats = self._extract_f4m_formats(manifest_url, display_id) + formats = [] + # see http://tv.dfb.de/player/js/ajax.js for the method to extract m3u8 formats + for sa_url in (stream_access_url, stream_access_url + '&area=&format=iphone'): + stream_access_info = self._download_xml(sa_url, display_id) + token_el = stream_access_info.find('token') + manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + if '.f4m' in manifest_url: + formats.extend(self._extract_f4m_formats( + manifest_url + '&hdcore=3.2.0', + display_id, f4m_id='hds', fatal=False)) + else: + formats.extend(self._extract_m3u8_formats( + manifest_url, display_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) self._sort_formats(formats) return { 'id': video_id, 'display_id': display_id, 'title': video_info.find('title').text, - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': 'http://tv.dfb.de/images/%s_640x360.jpg' % video_id, 'upload_date': unified_strdate(video_info.find('time_date').text), 'formats': formats, } From 5556047465e0601d2bdee0e5a436cee64b745851 Mon Sep 17 00:00:00 2001 From: Reino17 Date: Wed, 27 Apr 2016 13:11:38 +0200 Subject: [PATCH 0364/3599] [rtlnl] Update 720p PG_URL_TEMPLATE - Fixed the format_id for the 720p progressive videostream and added the video's resolution. - The adaptive videostreams have the m3u8-extension, so I removed the confusing mp4-extension in order to make a better distinction between the these and the progressive videostreams. --- youtube_dl/extractor/rtlnl.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index 543d94417..e8b55ea25 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -94,19 +94,30 @@ class RtlNlIE(InfoExtractor): videopath = material['videopath'] m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath - formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4') + formats = self._extract_m3u8_formats(m3u8_url, uuid) video_urlpart = videopath.split('/adaptive/')[1][:-5] PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4' formats.extend([ { - 'url': PG_URL_TEMPLATE % ('a2m', video_urlpart), - 'format_id': 'pg-sd', + 'url': PG_URL_TEMPLATE % ('a2t', video_urlpart), + 'format_id': 'a2t', + 'width': 512, + 'height': 288, }, { - 'url': PG_URL_TEMPLATE % ('a3m', video_urlpart), - 'format_id': 'pg-hd', + 'url': PG_URL_TEMPLATE % ('a3t', video_urlpart), + 'format_id': 'a3t', + 'width': 704, + 'height': 400, + 'quality': 0, + }, + { + 'url': PG_URL_TEMPLATE % ('nettv', video_urlpart), + 'format_id': 'nettv', + 'width': 1280, + 'height': 720, 'quality': 0, } ]) From 0571ffda7dd12fc1067c0344f3ce4ce47b39edb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Apr 2016 01:43:39 +0600 Subject: [PATCH 0365/3599] [rtlnl] Improve extraction (Closes #9329) * Make hls extraction non fatal and revert ext * Extract progressive formats' metadata from corresponding hls formats --- youtube_dl/extractor/rtlnl.py | 55 +++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index e8b55ea25..c95bcf035 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -94,33 +94,44 @@ class RtlNlIE(InfoExtractor): videopath = material['videopath'] m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath - formats = self._extract_m3u8_formats(m3u8_url, uuid) + formats = self._extract_m3u8_formats( + m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False) video_urlpart = videopath.split('/adaptive/')[1][:-5] PG_URL_TEMPLATE = 'http://pg.us.rtl.nl/rtlxl/network/%s/progressive/%s.mp4' - formats.extend([ - { - 'url': PG_URL_TEMPLATE % ('a2t', video_urlpart), - 'format_id': 'a2t', - 'width': 512, - 'height': 288, - }, - { - 'url': PG_URL_TEMPLATE % ('a3t', video_urlpart), - 'format_id': 'a3t', - 'width': 704, - 'height': 400, - 'quality': 0, - }, - { - 'url': PG_URL_TEMPLATE % ('nettv', video_urlpart), - 'format_id': 'nettv', - 'width': 1280, - 'height': 720, - 'quality': 0, + PG_FORMATS = ( + ('a2t', 512, 288), + ('a3t', 704, 400), + ('nettv', 1280, 720), + ) + + def pg_format(format_id, width, height): + return { + 'url': PG_URL_TEMPLATE % (format_id, video_urlpart), + 'format_id': 'pg-%s' % format_id, + 'protocol': 'http', + 'width': width, + 'height': height, } - ]) + + if not formats: + formats = [pg_format(*pg_tuple) for pg_tuple in PG_FORMATS] + else: + pg_formats = [] + for format_id, width, height in PG_FORMATS: + try: + # Find hls format with the same width and height corresponding + # to progressive format and copy metadata from it. + f = next(f for f in formats + if f.get('width') == width and f.get('height') == height).copy() + f.update(pg_format(format_id, width, height)) + pg_formats.append(f) + except StopIteration: + # Missing hls format does mean that no progressive format with + # such width and height exists either. + pass + formats.extend(pg_formats) self._sort_formats(formats) thumbnails = [] From cd63d091cecd8a85a2080035051205b00f3454d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Apr 2016 01:48:14 +0600 Subject: [PATCH 0366/3599] [rtlnl] Fix tests --- youtube_dl/extractor/rtlnl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index c95bcf035..e4411054a 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -39,7 +39,7 @@ class RtlNlIE(InfoExtractor): 'ext': 'mp4', 'timestamp': 1424039400, 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag', - 'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$', + 'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$', 'upload_date': '20150215', 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.', } @@ -50,7 +50,7 @@ class RtlNlIE(InfoExtractor): 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a', 'ext': 'mp4', 'title': 'RTL Nieuws - Meer beelden van overval juwelier', - 'thumbnail': 're:^https?://screenshots\.rtl\.nl/system/thumb/sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$', + 'thumbnail': 're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$', 'timestamp': 1437233400, 'upload_date': '20150718', 'duration': 30.474, From 373e1230e4a3b934ddc59c212773d36a7e998dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Apr 2016 01:50:26 +0600 Subject: [PATCH 0367/3599] [rtlnl] Clarify tests --- youtube_dl/extractor/rtlnl.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index e4411054a..5e916c4ab 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -32,6 +32,7 @@ class RtlNlIE(InfoExtractor): 'duration': 576.880, }, }, { + # best format avaialble a3t 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', 'md5': 'dea7474214af1271d91ef332fb8be7ea', 'info_dict': { @@ -45,6 +46,7 @@ class RtlNlIE(InfoExtractor): } }, { # empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275) + # best format available nettv 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false', 'info_dict': { 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a', From ca278a182b9331201e058f9f4d46b3b6114a1518 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 30 Apr 2016 02:07:29 +0600 Subject: [PATCH 0368/3599] [rtlnl] Replace test --- youtube_dl/extractor/rtlnl.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index 5e916c4ab..8598b5840 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -20,16 +20,16 @@ class RtlNlIE(InfoExtractor): (?P[0-9a-f-]+)''' _TESTS = [{ - 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677', - 'md5': 'cc16baa36a6c169391f0764fa6b16654', + 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416', + 'md5': '473d1946c1fdd050b2c0161a4b13c373', 'info_dict': { - 'id': '6e4203a6-0a5e-3596-8424-c599a59e0677', + 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416', 'ext': 'mp4', - 'title': 'RTL Nieuws - Laat', - 'description': 'md5:6b61f66510c8889923b11f2778c72dc5', - 'timestamp': 1408051800, - 'upload_date': '20140814', - 'duration': 576.880, + 'title': 'RTL Nieuws', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'timestamp': 1461951000, + 'upload_date': '20160429', + 'duration': 1167.96, }, }, { # best format avaialble a3t From 69c4cde4ba6a4c7dfb8a46d1713cbb46d6f1d623 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 29 Apr 2016 21:35:09 +0100 Subject: [PATCH 0369/3599] [wsj] improve extraction --- youtube_dl/extractor/wsj.py | 95 +++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py index 5a897371d..a83e68b17 100644 --- a/youtube_dl/extractor/wsj.py +++ b/youtube_dl/extractor/wsj.py @@ -4,16 +4,22 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, + float_or_none, unified_strdate, ) class WSJIE(InfoExtractor): - _VALID_URL = r'https?://video-api\.wsj\.com/api-video/player/iframe\.html\?guid=(?P[a-zA-Z0-9-]+)' + _VALID_URL = r'''(?x)https?:// + (?: + video-api\.wsj\.com/api-video/player/iframe\.html\?guid=| + (?:www\.)?wsj\.com/video/[^/]+/ + ) + (?P[a-zA-Z0-9-]+)''' IE_DESC = 'Wall Street Journal' - _TEST = { + _TESTS = [{ 'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A', - 'md5': '9747d7a6ebc2f4df64b981e1dde9efa9', + 'md5': 'e230a5bb249075e40793b655a54a02e4', 'info_dict': { 'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A', 'ext': 'mp4', @@ -24,65 +30,60 @@ class WSJIE(InfoExtractor): 'duration': 90, 'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo', }, - } + }, { + 'url': 'http://www.wsj.com/video/can-alphabet-build-a-smarter-city/359DDAA8-9AC1-489C-82E6-0429C1E430E0.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - bitrates = [128, 174, 264, 320, 464, 664, 1264] api_url = ( 'http://video-api.wsj.com/api-video/find_all_videos.asp?' - 'type=guid&count=1&query=%s&' - 'fields=hls,adZone,thumbnailList,guid,state,secondsUntilStartTime,' - 'author,description,name,linkURL,videoStillURL,duration,videoURL,' - 'adCategory,catastrophic,linkShortURL,doctypeID,youtubeID,' - 'titletag,rssURL,wsj-section,wsj-subsection,allthingsd-section,' - 'allthingsd-subsection,sm-section,sm-subsection,provider,' - 'formattedCreationDate,keywords,keywordsOmniture,column,editor,' - 'emailURL,emailPartnerID,showName,omnitureProgramName,' - 'omnitureVideoFormat,linkRelativeURL,touchCastID,' - 'omniturePublishDate,%s') % ( - video_id, ','.join('video%dkMP4Url' % br for br in bitrates)) + 'type=guid&count=1&query=%s&fields=type,hls,videoMP4List,' + 'thumbnailList,author,description,name,duration,videoURL,' + 'titletag,formattedCreationDate,keywords,editor' % video_id) info = self._download_json(api_url, video_id)['items'][0] - - # Thumbnails are conveniently in the correct format already - thumbnails = info.get('thumbnailList') - creator = info.get('author') - uploader_id = info.get('editor') - categories = info.get('keywords') - duration = int_or_none(info.get('duration')) - upload_date = unified_strdate( - info.get('formattedCreationDate'), day_first=False) title = info.get('name', info.get('titletag')) - formats = [{ - 'format_id': 'f4m', - 'format_note': 'f4m (meta URL)', - 'url': info['videoURL'], - }] - if info.get('hls'): + formats = [] + + f4m_url = info.get('videoURL') + if f4m_url: + formats.extend(self._extract_f4m_formats( + f4m_url, video_id, f4m_id='hds', fatal=False)) + + m3u8_url = info.get('hls') + if m3u8_url: formats.extend(self._extract_m3u8_formats( info['hls'], video_id, ext='mp4', - preference=0, entry_protocol='m3u8_native')) - for br in bitrates: - field = 'video%dkMP4Url' % br - if info.get(field): - formats.append({ - 'format_id': 'mp4-%d' % br, - 'container': 'mp4', - 'tbr': br, - 'url': info[field], - }) + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + + for v in info.get('videoMP4List', []): + mp4_url = v.get('url') + if not mp4_url: + continue + tbr = int_or_none(v.get('bitrate')) + formats.append({ + 'url': mp4_url, + 'format_id': 'http' + ('-%d' % tbr if tbr else ''), + 'tbr': tbr, + 'width': int_or_none(v.get('width')), + 'height': int_or_none(v.get('height')), + 'fps': float_or_none(v.get('fps')), + }) self._sort_formats(formats) return { 'id': video_id, 'formats': formats, - 'thumbnails': thumbnails, - 'creator': creator, - 'uploader_id': uploader_id, - 'duration': duration, - 'upload_date': upload_date, + # Thumbnails are conveniently in the correct format already + 'thumbnails': info.get('thumbnailList'), + 'creator': info.get('author'), + 'uploader_id': info.get('editor'), + 'duration': int_or_none(info.get('duration')), + 'upload_date': unified_strdate(info.get( + 'formattedCreationDate'), day_first=False), 'title': title, - 'categories': categories, + 'categories': info.get('keywords'), } From cbc032c8b70a038a69259378c92b4ba97b42d491 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 30 Apr 2016 01:24:36 +0100 Subject: [PATCH 0370/3599] [pbs] extract all http formats --- youtube_dl/extractor/pbs.py | 48 ++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index f43e3a146..38cdb9975 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -196,7 +196,7 @@ class PBSIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/', - 'md5': 'ce1888486f0908d555a8093cac9a7362', + 'md5': '173dc391afd361fa72eab5d3d918968d', 'info_dict': { 'id': '2365006249', 'ext': 'mp4', @@ -204,13 +204,10 @@ class PBSIE(InfoExtractor): 'description': 'md5:36f341ae62e251b8f5bd2b754b95a071', 'duration': 3190, }, - 'params': { - 'skip_download': True, # requires ffmpeg - }, }, { 'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/', - 'md5': '143c98aa54a346738a3d78f54c925321', + 'md5': '6f722cb3c3982186d34b0f13374499c7', 'info_dict': { 'id': '2365297690', 'ext': 'mp4', @@ -218,9 +215,6 @@ class PBSIE(InfoExtractor): 'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9', 'duration': 5050, }, - 'params': { - 'skip_download': True, # requires ffmpeg - } }, { 'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/', @@ -244,9 +238,6 @@ class PBSIE(InfoExtractor): 'duration': 6559, 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - 'skip_download': True, # requires ffmpeg - }, }, { 'url': 'http://www.pbs.org/wgbh/nova/earth/killer-typhoon.html', @@ -262,9 +253,6 @@ class PBSIE(InfoExtractor): 'upload_date': '20140122', 'age_limit': 10, }, - 'params': { - 'skip_download': True, # requires ffmpeg - }, }, { 'url': 'http://www.pbs.org/wgbh/pages/frontline/united-states-of-secrets/', @@ -290,6 +278,7 @@ class PBSIE(InfoExtractor): }, { 'url': 'http://www.pbs.org/video/2365245528/', + 'md5': '115223d41bd55cda8ae5cd5ed4e11497', 'info_dict': { 'id': '2365245528', 'display_id': '2365245528', @@ -299,15 +288,13 @@ class PBSIE(InfoExtractor): 'duration': 6851, 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - 'skip_download': True, # requires ffmpeg - }, }, { # Video embedded in iframe containing angle brackets as attribute's value (e.g. # "', webpage, 'embed url')) + + return { + '_type': 'url_transparent', + 'url': embed_url, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c9d1422e5..14b4f245f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -75,6 +75,7 @@ from .bigflix import BigflixIE from .bild import BildIE from .bilibili import BiliBiliIE from .biobiochiletv import BioBioChileTVIE +from .biqle import BIQLEIE from .bleacherreport import ( BleacherReportIE, BleacherReportCMSIE, diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 67220f1b7..041d93629 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -26,12 +26,16 @@ class VKIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:m\.)?vk\.com/video_ext\.php\?.*?\boid=(?P-?\d+).*?\bid=(?P\d+)| + (?: + (?:m\.)?vk\.com/video_| + (?:www\.)?daxab.com/ + ) + ext\.php\?(?P.*?\boid=(?P-?\d+).*?\bid=(?P\d+).*)| (?: (?:m\.)?vk\.com/(?:.+?\?.*?z=)?video| - (?:www\.)?biqle\.ru/watch/ + (?:www\.)?daxab.com/embed/ ) - (?P[^s].*?)(?:\?(?:.*\blist=(?P[\da-f]+))?|%2F|$) + (?P-?\d+_\d+)(?:.*\blist=(?P[\da-f]+))? ) ''' _NETRC_MACHINE = 'vk' @@ -75,7 +79,8 @@ class VKIE(InfoExtractor): 'duration': 101, 'upload_date': '20120730', 'view_count': int, - } + }, + 'skip': 'This video has been removed from public access.', }, { # VIDEO NOW REMOVED @@ -142,7 +147,7 @@ class VKIE(InfoExtractor): 'id': 'V3K4mi0SYkc', 'ext': 'webm', 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate", - 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a', + 'description': 'md5:d9903938abdc74c738af77f527ca0596', 'duration': 178, 'upload_date': '20130116', 'uploader': "Children's Joy Foundation", @@ -173,11 +178,6 @@ class VKIE(InfoExtractor): 'url': 'https://vk.com/video205387401_164765225', 'only_matching': True, }, - { - # vk wrapper - 'url': 'http://www.biqle.ru/watch/847655_160197695', - 'only_matching': True, - }, { # pladform embed 'url': 'https://vk.com/video-76116461_171554880', @@ -217,20 +217,22 @@ class VKIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') - if not video_id: + info_url = url + if video_id: + info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id + # Some videos (removed?) can only be downloaded with list id specified + list_id = mobj.group('list_id') + if list_id: + info_url += '&list=%s' % list_id + else: + info_url = 'http://vk.com/video_ext.php?' + mobj.group('embed_query') video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id')) - info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id - - # Some videos (removed?) can only be downloaded with list id specified - list_id = mobj.group('list_id') - if list_id: - info_url += '&list=%s' % list_id - info_page = self._download_webpage(info_url, video_id) error_message = self._html_search_regex( - r'(?s)]+class="video_layer_message"[^>]*>(.+?)
', + [r'(?s)]+class="video_layer_message"[^>]*>(.+?)

', + r'(?s)]+id="video_ext_msg"[^>]*>(.+?)
'], info_page, 'error message', default=None) if error_message: raise ExtractorError(error_message, expected=True) @@ -305,17 +307,17 @@ class VKIE(InfoExtractor): view_count = None views = self._html_search_regex( r'"mv_views_count_number"[^>]*>(.+?\bviews?)<', - info_page, 'view count', fatal=False) + info_page, 'view count', default=None) if views: view_count = str_to_int(self._search_regex( r'([\d,.]+)', views, 'view count', fatal=False)) formats = [] for k, v in data.items(): - if not k.startswith('url') and k != 'extra_data' or not v: + if not k.startswith('url') and not k.startswith('cache') and k != 'extra_data' or not v: continue height = int_or_none(self._search_regex( - r'^url(\d+)', k, 'height', default=None)) + r'^(?:url|cache)(\d+)', k, 'height', default=None)) formats.append({ 'format_id': k, 'url': v, From abc97b5eda4ed4b36cec29e9966eb1bb7bcd97ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 May 2016 22:07:30 +0600 Subject: [PATCH 0446/3599] [utils] Allow empty attribute values in get_element_by_attribute (Closes #9415) --- youtube_dl/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a5922b2b5..6e4573784 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -256,9 +256,9 @@ def get_element_by_attribute(attribute, value, html): m = re.search(r'''(?xs) <([a-zA-Z0-9:._-]+) - (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*? \s+%s=['"]?%s['"]? - (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*? \s*> (?P.*?) From 25cb7a0eebae0093a81fa1c930480fafa13feb25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 May 2016 22:11:18 +0600 Subject: [PATCH 0447/3599] [youtube] Allow empty attribute values in description regex --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b7c3cb63f..f3f102c30 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1326,9 +1326,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if video_description: video_description = re.sub(r'''(?x) ]*> [^<]+\.{3}\s* From 3e80e6f40d6ef76142340a2292ef2445dc79594b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 May 2016 23:35:58 +0600 Subject: [PATCH 0448/3599] [vevo] Allow request to api.vevo.com to fail (Closes #9417) I don't know whether this it's tempopary or api has just gone --- youtube_dl/extractor/vevo.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index c0ef08c02..30b3a9e7e 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -201,9 +201,10 @@ class VevoIE(VevoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id + json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id response = self._download_json( - json_url, video_id, 'Downloading video info', 'Unable to download info') + json_url, video_id, 'Downloading video info', + 'Unable to download info', fatal=False) or {} video_info = response.get('video') or {} artist = None featured_artist = None @@ -212,7 +213,7 @@ class VevoIE(VevoBaseIE): formats = [] if not video_info: - if response.get('statusCode') != 909: + if response and response.get('statusCode') != 909: ytid = response.get('errorInfo', {}).get('ytid') if ytid: self.report_warning( From f745403b5b448c170710256a61b8505e09e77674 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 6 May 2016 23:37:17 +0600 Subject: [PATCH 0449/3599] [vevo] Revert videoplayer.vevo.com to api.vevo.com --- youtube_dl/extractor/vevo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 30b3a9e7e..c0632cd6a 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -201,7 +201,7 @@ class VevoIE(VevoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - json_url = 'http://videoplayer.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id + json_url = 'http://api.vevo.com/VideoService/AuthenticateVideo?isrc=%s' % video_id response = self._download_json( json_url, video_id, 'Downloading video info', 'Unable to download info', fatal=False) or {} From e2ee97dcd5c55e1c2aceae0d93fbfd64d0cc5ba3 Mon Sep 17 00:00:00 2001 From: inondle Date: Fri, 6 May 2016 12:05:37 -0700 Subject: [PATCH 0450/3599] [liveleak] Adds support for thumbnails, updates tests --- youtube_dl/extractor/liveleak.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index 29fba5f30..ea0565ac0 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -17,7 +17,8 @@ class LiveLeakIE(InfoExtractor): 'ext': 'flv', 'description': 'extremely bad day for this guy..!', 'uploader': 'ljfriel2', - 'title': 'Most unlucky car accident' + 'title': 'Most unlucky car accident', + 'thumbnail': 're:^https?://.*\.jpg$' } }, { 'url': 'http://www.liveleak.com/view?i=f93_1390833151', @@ -28,6 +29,7 @@ class LiveLeakIE(InfoExtractor): 'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.', 'uploader': 'ARD_Stinkt', 'title': 'German Television does first Edward Snowden Interview (ENGLISH)', + 'thumbnail': 're:^https?://.*\.jpg$' } }, { 'url': 'http://www.liveleak.com/view?i=4f7_1392687779', @@ -49,7 +51,8 @@ class LiveLeakIE(InfoExtractor): 'ext': 'mp4', 'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.', 'uploader': 'bony333', - 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia' + 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia', + 'thumbnail': 're:^https?://.*\.jpg$' } }] @@ -72,6 +75,7 @@ class LiveLeakIE(InfoExtractor): age_limit = int_or_none(self._search_regex( r'you confirm that you are ([0-9]+) years and over.', webpage, 'age limit', default=None)) + video_thumbnail = self._og_search_thumbnail(webpage) sources_raw = self._search_regex( r'(?s)sources:\s*(\[.*?\]),', webpage, 'video URLs', default=None) @@ -124,4 +128,5 @@ class LiveLeakIE(InfoExtractor): 'uploader': video_uploader, 'formats': formats, 'age_limit': age_limit, + 'thumbnail': video_thumbnail, } From 3fd6332c056115e5de37b0789d907e9344c2ff5c Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 7 May 2016 15:12:20 +0100 Subject: [PATCH 0451/3599] [flickr] extract license field(closes #9425) --- youtube_dl/extractor/flickr.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 0a3de1498..73ae3adee 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -27,10 +27,24 @@ class FlickrIE(InfoExtractor): 'comment_count': int, 'view_count': int, 'tags': list, + 'license': 'Attribution-ShareAlike', } } - _API_BASE_URL = 'https://api.flickr.com/services/rest?' + # https://help.yahoo.com/kb/flickr/SLN25525.html + _LICENSES = { + '0': 'All Rights Reserved', + '1': 'Attribution-NonCommercial-ShareAlike', + '2': 'Attribution-NonCommercial', + '3': 'Attribution-NonCommercial-NoDerivs', + '4': 'Attribution', + '5': 'Attribution-ShareAlike', + '6': 'Attribution-NoDerivs', + '7': 'No known copyright restrictions', + '8': 'United States government work', + '9': 'Public Domain Dedication (CC0)', + '10': 'Public Domain Work', + } def _call_api(self, method, video_id, api_key, note, secret=None): query = { @@ -87,7 +101,8 @@ class FlickrIE(InfoExtractor): 'uploader': owner.get('realname'), 'comment_count': int_or_none(video_info.get('comments', {}).get('_content')), 'view_count': int_or_none(video_info.get('views')), - 'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])] + 'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])], + 'license': self._LICENSES.get(video_info.get('license')), } else: raise ExtractorError('not a video', expected=True) From cb1fa5881315ed998a366f47511b7a4b4ea067b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 7 May 2016 20:15:40 +0600 Subject: [PATCH 0452/3599] [flickr] Extract uploader URL (Closes #9426) --- youtube_dl/extractor/flickr.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 73ae3adee..a8e1bf42a 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -24,6 +24,7 @@ class FlickrIE(InfoExtractor): 'upload_date': '20110423', 'uploader_id': '10922353@N03', 'uploader': 'Forest Wander', + 'uploader_url': 'https://www.flickr.com/photos/forestwander-nature-pictures/', 'comment_count': int, 'view_count': int, 'tags': list, @@ -89,6 +90,9 @@ class FlickrIE(InfoExtractor): self._sort_formats(formats) owner = video_info.get('owner', {}) + uploader_id = owner.get('nsid') + uploader_path = owner.get('path_alias') or uploader_id + uploader_url = 'https://www.flickr.com/photos/%s/' % uploader_path if uploader_path else None return { 'id': video_id, @@ -97,8 +101,9 @@ class FlickrIE(InfoExtractor): 'formats': formats, 'timestamp': int_or_none(video_info.get('dateuploaded')), 'duration': int_or_none(video_info.get('video', {}).get('duration')), - 'uploader_id': owner.get('nsid'), + 'uploader_id': uploader_id, 'uploader': owner.get('realname'), + 'uploader_url': uploader_url, 'comment_count': int_or_none(video_info.get('comments', {}).get('_content')), 'view_count': int_or_none(video_info.get('views')), 'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])], From a0904c5d8024c12b7f95b1126a6b8152a4e1021f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 8 May 2016 00:56:31 +0800 Subject: [PATCH 0453/3599] [telegraaf] Fix extractor (closes #9318) --- youtube_dl/extractor/telegraaf.py | 58 +++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py index 6f8333cfc..9092e9b85 100644 --- a/youtube_dl/extractor/telegraaf.py +++ b/youtube_dl/extractor/telegraaf.py @@ -2,14 +2,16 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import remove_end +from ..utils import ( + determine_ext, + remove_end, +) class TelegraafIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P\d+)/[^/]+\.html' _TEST = { 'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html', - 'md5': '83245a9779bcc4a24454bfd53c65b6dc', 'info_dict': { 'id': '24353229', 'ext': 'mp4', @@ -18,18 +20,60 @@ class TelegraafIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 33, }, + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): - playlist_id = self._match_id(url) + video_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) + webpage = self._download_webpage(url, video_id) + player_url = self._html_search_regex( + r']+src="([^"]+")', webpage, 'player URL') + player_page = self._download_webpage( + player_url, video_id, note='Download player webpage') playlist_url = self._search_regex( - r"iframe\.loadPlayer\('([^']+)'", webpage, 'player') + r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL') + playlist_data = self._download_json(playlist_url, video_id) + + item = playlist_data['items'][0] + formats = [] + locations = item['locations'] + for location in locations.get('adaptive', []): + manifest_url = location['src'] + ext = determine_ext(manifest_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, ext='mp4', m3u8_id='hls')) + elif ext == 'mpd': + # TODO: Current DASH formats are broken - $Time$ pattern in + # not implemented yet + continue + else: + self.report_warning('Unknown adaptive format %s' % ext) + for location in locations.get('progressive', []): + formats.append({ + 'url': location['sources'][0]['src'], + 'width': location.get('width'), + 'height': location.get('height'), + 'format_id': 'http-%s' % location['label'], + }) + + self._sort_formats(formats) - entries = self._extract_xspf_playlist(playlist_url, playlist_id) title = remove_end(self._og_search_title(webpage), ' - VIDEO') description = self._og_search_description(webpage) + duration = item.get('duration') + thumbnail = item.get('poster') - return self.playlist_result(entries, playlist_id, title, description) + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + 'duration': duration, + 'thumbnail': thumbnail, + } From e2eca6f65e9969c31b3374bd3688321f3e471cd7 Mon Sep 17 00:00:00 2001 From: Kevin Deldycke Date: Sat, 7 May 2016 20:03:25 +0200 Subject: [PATCH 0454/3599] Expand user's home in batch file path. --- youtube_dl/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 737f6545d..7a0466077 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -86,7 +86,9 @@ def _real_main(argv=None): if opts.batchfile == '-': batchfd = sys.stdin else: - batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore') + batchfd = io.open( + compat_expanduser(opts.batchfile), + 'r', encoding='utf-8', errors='ignore') batch_urls = read_batch_urls(batchfd) if opts.verbose: write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') From 00c21c225decf648199013f2fa3385a1332037bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 May 2016 00:11:44 +0600 Subject: [PATCH 0455/3599] Credit @kdeldycke for #9430 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 814fe9ec3..5f668338b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -169,3 +169,4 @@ Viťas Strádal Kagami Hiiragi Philip Huppert blahgeek +Kevin Deldycke From 5c24873a9e6a47e58b10eb0c0825e165604796f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 May 2016 02:04:34 +0600 Subject: [PATCH 0456/3599] Credit @inondle for #9400 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 5f668338b..bf860b7f7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -170,3 +170,4 @@ Kagami Hiiragi Philip Huppert blahgeek Kevin Deldycke +inondle From f5436c5d9e4e65790440ada40476712ff430651b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 May 2016 02:29:26 +0600 Subject: [PATCH 0457/3599] [downloader/external] Add temp fix ffmpeg m3u8 downloads (Closes #9394) --- youtube_dl/downloader/external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 8d642fc3e..45f49c350 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -224,7 +224,7 @@ class FFmpegFD(ExternalFD): args += ['-rtmp_live', 'live'] args += ['-i', url, '-c', 'copy'] - if protocol == 'm3u8': + if protocol in ('m3u8', 'm3u8_native'): if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': args += ['-f', 'mpegts'] else: From 3e169233daf76cd7585ebac12504f8e624b7693b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 May 2016 04:36:57 +0600 Subject: [PATCH 0458/3599] Expanduser for more options with input files --- youtube_dl/YoutubeDL.py | 1 + youtube_dl/__init__.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2187dcc8f..a96482e68 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -2018,6 +2018,7 @@ class YoutubeDL(object): if opts_cookiefile is None: self.cookiejar = compat_cookiejar.CookieJar() else: + opts_cookiefile = compat_expanduser(opts_cookiefile) self.cookiejar = compat_cookiejar.MozillaCookieJar( opts_cookiefile) if os.access(opts_cookiefile, os.R_OK): diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 7a0466077..cbd84c3af 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -406,7 +406,7 @@ def _real_main(argv=None): try: if opts.load_info_filename is not None: - retcode = ydl.download_with_info_file(opts.load_info_filename) + retcode = ydl.download_with_info_file(compat_expanduser(opts.load_info_filename)) else: retcode = ydl.download(all_urls) except MaxDownloadsReached: From 0fdbe3146c2b3825cc26aca7e918df041b0f9adf Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 08:56:22 +0900 Subject: [PATCH 0459/3599] use dict.get in case upload_date does not exist --- youtube_dl/extractor/afreecatv.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index aa5847677..4ebc61bae 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -66,7 +66,7 @@ class AfreecaTVIE(InfoExtractor): @staticmethod def parse_video_key(key): - video_key = {'upload_date': None, 'part': '0'} + video_key = {} m = re.match(r'^(?P\d{8})_\w+_(?P\d+)$', key) if m: video_key['upload_date'] = m.group('upload_date') @@ -92,12 +92,12 @@ class AfreecaTVIE(InfoExtractor): thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') entries = [] - for video_file in video_xml.findall('./track/video/file'): + for i, video_file in enumerate(video_xml.findall('./track/video/file')): video_key = self.parse_video_key(video_file.get('key')) entries.append({ - 'id': '%s_%s' % (video_id, video_key['part']), + 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), 'title': title, - 'upload_date': video_key['upload_date'], + 'upload_date': video_key.get('upload_date'), 'duration': int_or_none(video_file.get('duration')), 'url': video_file.text, }) From 81f35fee2fd2b58d909887aaa7667310a4d65759 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 08:56:44 +0900 Subject: [PATCH 0460/3599] fix extractors.py import order --- youtube_dl/extractor/extractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f85d75933..1f95530a5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -16,8 +16,8 @@ from .adobetv import ( AdobeTVVideoIE, ) from .adultswim import AdultSwimIE -from .afreecatv import AfreecaTVIE from .aenetworks import AENetworksIE +from .afreecatv import AfreecaTVIE from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE From 9c072d38c6b0361d91e92c50cd0c753dc8ce3101 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 May 2016 06:52:42 +0600 Subject: [PATCH 0461/3599] [arte] Improve language preference (Closes #9401, closes #9162) --- youtube_dl/extractor/arte.py | 58 ++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 881cacfab..e37fdae13 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -161,24 +161,53 @@ class ArteTVPlus7IE(InfoExtractor): 'es': 'E[ESP]', } + langcode = LANGS.get(lang, lang) + formats = [] for format_id, format_dict in player_info['VSR'].items(): f = dict(format_dict) versionCode = f.get('versionCode') - langcode = LANGS.get(lang, lang) - lang_rexs = [r'VO?%s-' % re.escape(langcode), r'VO?.-ST%s$' % re.escape(langcode)] - lang_pref = None - if versionCode: - matched_lang_rexs = [r for r in lang_rexs if re.match(r, versionCode)] - lang_pref = -10 if not matched_lang_rexs else 10 * len(matched_lang_rexs) - source_pref = 0 - if versionCode is not None: - # The original version with subtitles has lower relevance - if re.match(r'VO-ST(F|A|E)', versionCode): - source_pref -= 10 - # The version with sourds/mal subtitles has also lower relevance - elif re.match(r'VO?(F|A|E)-STM\1', versionCode): - source_pref -= 9 + l = re.escape(langcode) + + # Language preference from most to least priority + # Reference: section 5.6.3 of + # http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf + PREFERENCES = ( + # original version in requested language, without subtitles + r'VO{0}$'.format(l), + # original version in requested language, with partial subtitles in requested language + r'VO{0}-ST{0}$'.format(l), + # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language + r'VO{0}-STM{0}$'.format(l), + # non-original (dubbed) version in requested language, without subtitles + r'V{0}$'.format(l), + # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language + r'V{0}-ST{0}$'.format(l), + # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language + r'V{0}-STM{0}$'.format(l), + # original version in requested language, with partial subtitles in different language + r'VO{0}-ST(?!{0}).+?$'.format(l), + # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language + r'VO{0}-STM(?!{0}).+?$'.format(l), + # original version in different language, with partial subtitles in requested language + r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l), + # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language + r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l), + # original version in different language, without subtitles + r'VO(?:(?!{0}))?$'.format(l), + # original version in different language, with partial subtitles in different language + r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l), + # original version in different language, with subtitles for the deaf and hard-of-hearing in different language + r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l), + ) + + for pref, p in enumerate(PREFERENCES): + if re.match(p, versionCode): + lang_pref = len(PREFERENCES) - pref + break + else: + lang_pref = -1 + format = { 'format_id': format_id, 'preference': -10 if f.get('videoFormat') == 'M3U8' else None, @@ -188,7 +217,6 @@ class ArteTVPlus7IE(InfoExtractor): 'height': int_or_none(f.get('height')), 'tbr': int_or_none(f.get('bitrate')), 'quality': qfunc(f.get('quality')), - 'source_preference': source_pref, } if f.get('mediaType') == 'rtmp': From 3452c3a27c2bfd278746314cda4247c2226a35f3 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 10:02:19 +0900 Subject: [PATCH 0462/3599] update tests --- youtube_dl/extractor/afreecatv.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 4ebc61bae..b90095881 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -30,7 +30,7 @@ class AfreecaTVIE(InfoExtractor): 'id': '36164052', 'ext': 'mp4', 'title': '데일리 에이프릴 요정들의 시상식!', - 'thumbnail': 're:^https?://videoimg.afreecatv.com/.*$', + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', 'uploader': 'dailyapril', 'uploader_id': 'dailyapril', 'upload_date': '20160503', @@ -40,7 +40,7 @@ class AfreecaTVIE(InfoExtractor): 'info_dict': { 'id': '36153164', 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", - 'thumbnail': 're:^https?://videoimg.afreecatv.com/.*$', + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', 'uploader': 'dailyapril', 'uploader_id': 'dailyapril', }, @@ -62,6 +62,9 @@ class AfreecaTVIE(InfoExtractor): 'upload_date': '20160502', }, }], + }, { + 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', + 'only_matching': True, }] @staticmethod From 370d4eb8ad3d9d092fc5eb116509eaf4a3e83177 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 10:02:48 +0900 Subject: [PATCH 0463/3599] use stricter file selector in case of empty in case of empty ./track/video/file entries --- youtube_dl/extractor/afreecatv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index b90095881..527386be3 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -95,7 +95,7 @@ class AfreecaTVIE(InfoExtractor): thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') entries = [] - for i, video_file in enumerate(video_xml.findall('./track/video/file')): + for i, video_file in enumerate(video_xml.findall('./track/video/file[@key]')): video_key = self.parse_video_key(video_file.get('key')) entries.append({ 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), @@ -119,7 +119,7 @@ class AfreecaTVIE(InfoExtractor): info['entries'] = entries elif len(entries) == 1: info['url'] = entries[0]['url'] - info['upload_date'] = entries[0]['upload_date'] + info['upload_date'] = entries[0].get('upload_date') else: raise ExtractorError( 'No files found for the specified AfreecaTV video, either' From 93fdb1417766015ddadcd13a709cdfae4de5e246 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Sun, 8 May 2016 10:33:17 +0900 Subject: [PATCH 0464/3599] don't use selection by attribute --- youtube_dl/extractor/afreecatv.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 527386be3..0fcbea0d1 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -95,8 +95,10 @@ class AfreecaTVIE(InfoExtractor): thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') entries = [] - for i, video_file in enumerate(video_xml.findall('./track/video/file[@key]')): - video_key = self.parse_video_key(video_file.get('key')) + for i, video_file in enumerate(video_xml.findall('./track/video/file')): + video_key = self.parse_video_key(video_file.get('key', '')) + if not video_key: + continue entries.append({ 'id': '%s_%s' % (video_id, video_key.get('part', i + 1)), 'title': title, From 3b01a9fbb63e33325fa979db8a846d3e655e79e6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 8 May 2016 14:34:38 +0800 Subject: [PATCH 0465/3599] [litv] Add new extractor LiTV is a streaming platform providing free and paid legal contents in Taiwan. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/litv.py | 137 +++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 youtube_dl/extractor/litv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 14b4f245f..7bacef184 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -384,6 +384,7 @@ from .limelight import ( LimelightChannelIE, LimelightChannelListIE, ) +from .litv import LiTVIE from .liveleak import LiveLeakIE from .livestream import ( LivestreamIE, diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py new file mode 100644 index 000000000..3356d015d --- /dev/null +++ b/youtube_dl/extractor/litv.py @@ -0,0 +1,137 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + smuggle_url, + unsmuggle_url, +) + + +class LiTVIE(InfoExtractor): + _VALID_URL = r'https?://www\.litv\.tv/vod/[^/]+/content\.do\?.*?\bid=(?P[^&]+)' + + _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s' + + _TESTS = [{ + 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', + 'info_dict': { + 'id': 'VOD00041606', + 'title': '花千骨', + }, + 'playlist_count': 50, + }, { + 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', + 'info_dict': { + 'id': 'VOD00041610', + 'ext': 'mp4', + 'title': '花千骨第1集', + 'thumbnail': 're:https?://.*\.jpg$', + 'description': 'md5:c7017aa144c87467c4fb2909c4b05d6f', + 'episode_number': 1, + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, # m3u8 download + }, + 'skip': 'Georestricted to Taiwan', + }] + + def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True): + episode_title = view_data['title'] + content_id = season_list['contentId'] + + if prompt: + self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (content_id, video_id)) + + all_episodes = [ + self.url_result(smuggle_url( + self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']), + {'force_noplaylist': True})) # To prevent infinite recursion + for episode in season_list['episode']] + + return self.playlist_result(all_episodes, content_id, episode_title) + + def _real_extract(self, url): + url, data = unsmuggle_url(url, {}) + + video_id = self._match_id(url) + + noplaylist = self._downloader.params.get('noplaylist') + noplaylist_prompt = True + if 'force_noplaylist' in data: + noplaylist = data['force_noplaylist'] + noplaylist_prompt = False + + webpage = self._download_webpage(url, video_id) + + view_data = dict(map(lambda t: (t[0], t[2]), re.findall( + r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2', + webpage))) + + vod_data = self._parse_json(self._search_regex( + 'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), + video_id) + + season_list = list(vod_data.get('seasonList', {}).values()) + if season_list: + if not noplaylist: + return self._extract_playlist( + season_list[0], video_id, vod_data, view_data, + prompt=noplaylist_prompt) + + if noplaylist_prompt: + self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + + # In browsers `getMainUrl` request is always issued. Usually this + # endpoint gives the same result as the data embedded in the webpage. + # If georestricted, there are no embedded data, so an extra request is + # necessary to get the error code + video_data = self._parse_json(self._search_regex( + r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);', + webpage, 'video data', default='{}'), video_id) + if not video_data: + payload = { + 'assetId': view_data['assetId'], + 'watchDevices': vod_data['watchDevices'], + 'contentType': view_data['contentType'], + } + video_data = self._download_json( + 'https://www.litv.tv/vod/getMainUrl', video_id, + data=json.dumps(payload).encode('utf-8'), + headers={'Content-Type': 'application/json'}) + + if not video_data.get('fullpath'): + error_msg = video_data.get('errorMessage') + if error_msg == 'vod.error.outsideregionerror': + self.raise_geo_restricted('This video is available in Taiwan only') + if error_msg: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_msg), expected=True) + raise ExtractorError('Unexpected result from %s' % self.IE_NAME) + + formats = self._extract_m3u8_formats( + video_data['fullpath'], video_id, ext='mp4', m3u8_id='hls') + for a_format in formats: + # LiTV HLS segments doesn't like compressions + a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True + + title = view_data['title'] + view_data.get('secondaryMark', '') + description = view_data.get('description') + thumbnail = view_data.get('imageFile') + categories = [item['name'] for item in vod_data.get('category', [])] + episode = int_or_none(view_data.get('episode')) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'categories': categories, + 'episode_number': episode, + } From f23a92a0cecac0d4db60e086e429793556347271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 May 2016 20:02:54 +0600 Subject: [PATCH 0466/3599] [mva] Add extractor (Closes #6667) --- youtube_dl/extractor/extractors.py | 4 + .../extractor/microsoftvirtualacademy.py | 192 ++++++++++++++++++ 2 files changed, 196 insertions(+) create mode 100644 youtube_dl/extractor/microsoftvirtualacademy.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7bacef184..a0bb3d4c2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -409,6 +409,10 @@ from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE from .mgtv import MGTVIE +from .microsoftvirtualacademy import ( + MicrosoftVirtualAcademyIE, + MicrosoftVirtualAcademyCourseIE, +) from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE from .minoto import MinotoIE diff --git a/youtube_dl/extractor/microsoftvirtualacademy.py b/youtube_dl/extractor/microsoftvirtualacademy.py new file mode 100644 index 000000000..b7fea47ee --- /dev/null +++ b/youtube_dl/extractor/microsoftvirtualacademy.py @@ -0,0 +1,192 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_xpath, +) +from ..utils import ( + int_or_none, + parse_duration, + smuggle_url, + unsmuggle_url, + xpath_text, +) + + +class MicrosoftVirtualAcademyBaseIE(InfoExtractor): + def _extract_base_url(self, course_id, display_id): + return self._download_json( + 'https://api-mlxprod.microsoft.com/services/products/anonymous/%s' % course_id, + display_id, 'Downloading course base URL') + + def _extract_chapter_and_title(self, title): + if not title: + return None, None + m = re.search(r'(?P\d+)\s*\|\s*(?P.+)', title) + return (int(m.group('chapter')), m.group('title')) if m else (None, title) + + +class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): + IE_NAME = 'mva' + IE_DESC = 'Microsoft Virtual Academy videos' + _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME + + _TESTS = [{ + 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382', + 'md5': '7826c44fc31678b12ad8db11f6b5abb9', + 'info_dict': { + 'id': 'gfVXISmEB_6804984382', + 'ext': 'mp4', + 'title': 'Course Introduction', + 'formats': 'mincount:3', + 'subtitles': { + 'en': [{ + 'ext': 'ttml', + }], + }, + } + }, { + 'url': 'mva:11788:gfVXISmEB_6804984382', + 'only_matching': True, + }] + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + + mobj = re.match(self._VALID_URL, url) + course_id = mobj.group('course_id') + video_id = mobj.group('id') + + base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id) + + settings = self._download_xml( + '%s/content/content_%s/videosettings.xml?v=1' % (base_url, video_id), + video_id, 'Downloading video settings XML') + + _, title = self._extract_chapter_and_title(xpath_text( + settings, './/Title', 'title', fatal=True)) + + formats = [] + + for sources in settings.findall(compat_xpath('.//MediaSources')): + if sources.get('videoType') == 'smoothstreaming': + continue + for source in sources.findall(compat_xpath('./MediaSource')): + video_url = source.text + if not video_url or not video_url.startswith('http'): + continue + video_mode = source.get('videoMode') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', video_mode or '', 'height', default=None)) + codec = source.get('codec') + acodec, vcodec = [None] * 2 + if codec: + codecs = codec.split(',') + if len(codecs) == 2: + acodec, vcodec = codecs + elif len(codecs) == 1: + vcodec = codecs[0] + formats.append({ + 'url': video_url, + 'format_id': video_mode, + 'height': height, + 'acodec': acodec, + 'vcodec': vcodec, + }) + self._sort_formats(formats) + + subtitles = {} + for source in settings.findall(compat_xpath('.//MarkerResourceSource')): + subtitle_url = source.text + if not subtitle_url: + continue + subtitles.setdefault('en', []).append({ + 'url': '%s/%s' % (base_url, subtitle_url), + 'ext': source.get('type'), + }) + + return { + 'id': video_id, + 'title': title, + 'subtitles': subtitles, + 'formats': formats + } + + +class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE): + IE_NAME = 'mva:course' + IE_DESC = 'Microsoft Virtual Academy courses' + _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME + + _TESTS = [{ + 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788', + 'info_dict': { + 'id': '11788', + 'title': 'Microsoft Azure Fundamentals: Virtual Machines', + }, + 'playlist_count': 36, + }, { + # with emphasized chapters + 'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335', + 'info_dict': { + 'id': '16335', + 'title': 'Developing Windows 10 Games with Construct 2', + }, + 'playlist_count': 10, + }, { + 'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788', + 'only_matching': True, + }, { + 'url': 'mva:course:11788', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if MicrosoftVirtualAcademyIE.suitable(url) else super( + MicrosoftVirtualAcademyCourseIE, cls).suitable(url) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + course_id = mobj.group('id') + display_id = mobj.group('display_id') + + base_url = self._extract_base_url(course_id, display_id) + + manifest = self._download_json( + '%s/imsmanifestlite.json' % base_url, + display_id, 'Downloading course manifest JSON')['manifest'] + + organization = manifest['organizations']['organization'][0] + + entries = [] + for chapter in organization['item']: + chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title')) + chapter_id = chapter.get('@identifier') + for item in chapter.get('item', []): + item_id = item.get('@identifier') + if not item_id: + continue + metadata = item.get('resource', {}).get('metadata') or {} + if metadata.get('learningresourcetype') != 'Video': + continue + _, title = self._extract_chapter_and_title(item.get('title')) + duration = parse_duration(metadata.get('duration')) + description = metadata.get('description') + entries.append({ + '_type': 'url_transparent', + 'url': smuggle_url( + 'mva:%s:%s' % (course_id, item_id), {'base_url': base_url}), + 'title': title, + 'description': description, + 'duration': duration, + 'chapter': chapter_title, + 'chapter_number': chapter_number, + 'chapter_id': chapter_id, + }) + + title = organization.get('title') or manifest.get('metadata', {}).get('title') + + return self.playlist_result(entries, course_id, title) From c52f4efaee2386a72c3f6b694fb4f4c3132ced55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 May 2016 20:10:20 +0600 Subject: [PATCH 0467/3599] [mva] Improve _VALID_URLs --- youtube_dl/extractor/microsoftvirtualacademy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/microsoftvirtualacademy.py b/youtube_dl/extractor/microsoftvirtualacademy.py index b7fea47ee..afd3e98ec 100644 --- a/youtube_dl/extractor/microsoftvirtualacademy.py +++ b/youtube_dl/extractor/microsoftvirtualacademy.py @@ -31,7 +31,7 @@ class MicrosoftVirtualAcademyBaseIE(InfoExtractor): class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): IE_NAME = 'mva' IE_DESC = 'Microsoft Virtual Academy videos' - _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME + _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME _TESTS = [{ 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382', @@ -118,7 +118,7 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE): IE_NAME = 'mva:course' IE_DESC = 'Microsoft Virtual Academy courses' - _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME + _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME _TESTS = [{ 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788', From f1f6f5aa5e2a6d66fa54d35bf3e8b3626e85ee73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20=C4=8Cech?= <sleep_walker@suse.cz> Date: Sat, 7 May 2016 20:15:49 +0200 Subject: [PATCH 0468/3599] [ceskatelevize] Add support for live streams Live streams has no playlist title, use title of the stream containing TV channel name. Internal m3u8 handler doesn't seem to handle well continuous streams. Add test for live stream. Remove no longer reachable test. --- youtube_dl/extractor/ceskatelevize.py | 35 +++++++++++++++++++-------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 6652c8e42..b41888531 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -33,14 +33,13 @@ class CeskaTelevizeIE(InfoExtractor): 'skip_download': True, }, }, { - 'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', + # live stream + 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', 'info_dict': { - 'id': '61924494876844374', + 'id': 402, 'ext': 'mp4', - 'title': 'První republika: Zpěvačka z Dupárny Bobina', - 'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.', - 'thumbnail': 're:^https?://.*\.jpg', - 'duration': 88.4, + 'title': 're:ČT Sport.*', + 'is_live': True, }, 'params': { # m3u8 download @@ -118,19 +117,21 @@ class CeskaTelevizeIE(InfoExtractor): req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) req.add_header('Referer', url) - playlist_title = self._og_search_title(webpage) - playlist_description = self._og_search_description(webpage) + playlist_title = self._og_search_title(webpage, default=None) + playlist_description = self._og_search_description(webpage, default=None) playlist = self._download_json(req, playlist_id)['playlist'] playlist_len = len(playlist) entries = [] for item in playlist: + is_live = item['type'] == 'LIVE' formats = [] for format_id, stream_url in item['streamUrls'].items(): formats.extend(self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', - entry_protocol='m3u8_native', fatal=False)) + entry_protocol='m3u8' if is_live else 'm3u8_native', + fatal=False)) self._sort_formats(formats) item_id = item.get('id') or item['assetId'] @@ -145,14 +146,28 @@ class CeskaTelevizeIE(InfoExtractor): if subs: subtitles = self.extract_subtitles(episode_id, subs) + if playlist_len == 1: + if is_live: + # live streams has channel name in title + final_title = self._live_title(title) + elif playlist_title: + # title is always set (no KeyError caught) + # and gives good fallback + final_title = title + else: + final_title = playlist_title + else: + final_title = '%s (%s)' % (playlist_title, title) + entries.append({ 'id': item_id, - 'title': playlist_title if playlist_len == 1 else '%s (%s)' % (playlist_title, title), + 'title': final_title, 'description': playlist_description if playlist_len == 1 else None, 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, 'subtitles': subtitles, + 'is_live': is_live, }) return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) From 3951e7eb9305448aab6395f4303ed7ab19248c52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 20:37:20 +0600 Subject: [PATCH 0469/3599] [ceskatelevize] Simplify, restore bonus video test and skip georestricted test (Closes #9431) --- youtube_dl/extractor/ceskatelevize.py | 29 ++++++++++++++++++--------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index b41888531..5a58d1777 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -32,19 +32,34 @@ class CeskaTelevizeIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', + 'info_dict': { + 'id': '61924494877028507', + 'ext': 'mp4', + 'title': 'Hyde Park Civilizace: Bonus 01 - En', + 'description': 'English Subtittles', + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 81.3, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { # live stream 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', 'info_dict': { 'id': 402, 'ext': 'mp4', - 'title': 're:ČT Sport.*', + 'title': 're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'is_live': True, }, 'params': { # m3u8 download 'skip_download': True, }, + 'skip': 'Georestricted to Czech Republic', }, { # video with 18+ caution trailer 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', @@ -125,7 +140,7 @@ class CeskaTelevizeIE(InfoExtractor): entries = [] for item in playlist: - is_live = item['type'] == 'LIVE' + is_live = item.get('type') == 'LIVE' formats = [] for format_id, stream_url in item['streamUrls'].items(): formats.extend(self._extract_m3u8_formats( @@ -147,15 +162,9 @@ class CeskaTelevizeIE(InfoExtractor): subtitles = self.extract_subtitles(episode_id, subs) if playlist_len == 1: + final_title = playlist_title or title if is_live: - # live streams has channel name in title - final_title = self._live_title(title) - elif playlist_title: - # title is always set (no KeyError caught) - # and gives good fallback - final_title = title - else: - final_title = playlist_title + final_title = self._live_title(final_title) else: final_title = '%s (%s)' % (playlist_title, title) From 965fefdcd879405c3e4b5604513719353ba8474a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 20:38:33 +0600 Subject: [PATCH 0470/3599] Credit @sleep-walker for #9431 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index bf860b7f7..5ca71ace7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -171,3 +171,4 @@ Philip Huppert blahgeek Kevin Deldycke inondle +Tomáš Čech From c15c47d19bfeeacd42f44dd7736f175711a91346 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 20:45:03 +0600 Subject: [PATCH 0471/3599] [downloader/hls] Remove EXT-X-MEDIA-SEQUENCE from unsupported features for hlsnative --- youtube_dl/downloader/hls.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index d7b34bde3..dcedc9a64 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -23,7 +23,9 @@ class HlsFD(FragmentFD): UNSUPPORTED_FEATURES = ( r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1] r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] - r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] + # Live streams heuristic does not always work (e.g. geo restricted to Germany + # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) + #r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 From 6104cc2985c36e996df1aae7cfcc686f3bae0b82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 20:55:37 +0600 Subject: [PATCH 0472/3599] [downloader/hls] Add event media playlists to unsupported features of hlsnative --- youtube_dl/downloader/hls.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index dcedc9a64..a8279718b 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -26,9 +26,12 @@ class HlsFD(FragmentFD): # Live streams heuristic does not always work (e.g. geo restricted to Germany # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) #r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] + r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of + # event media playlists [4] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 + # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 ) return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) From fe40f9eef2483748ed83c9749f35220143d8cc9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 21:55:03 +0600 Subject: [PATCH 0473/3599] [compat] Add compat_setenv --- test/test_compat.py | 8 ++++++++ youtube_dl/compat.py | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/test/test_compat.py b/test/test_compat.py index 618668210..0d751a856 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -13,6 +13,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.utils import get_filesystem_encoding from youtube_dl.compat import ( compat_getenv, + compat_setenv, compat_etree_fromstring, compat_expanduser, compat_shlex_split, @@ -31,6 +32,13 @@ class TestCompat(unittest.TestCase): else test_str.encode(get_filesystem_encoding())) self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) + def test_compat_setenv(self): + test_var = 'YOUTUBE-DL-TEST' + test_str = 'тест' + compat_setenv(test_var, test_str) + compat_getenv(test_var) + self.assertEqual(compat_getenv(test_var), test_str) + def test_compat_expanduser(self): old_home = os.environ.get('HOME') test_str = 'C:\Documents and Settings\тест\Application Data' diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 0b6c5ca7a..12b53cdc8 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -373,6 +373,9 @@ compat_os_name = os._name if os.name == 'java' else os.name if sys.version_info >= (3, 0): compat_getenv = os.getenv compat_expanduser = os.path.expanduser + + def compat_setenv(key, value, env=os.environ): + env[key] = value else: # Environment variables should be decoded with filesystem encoding. # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918) @@ -384,6 +387,12 @@ else: env = env.decode(get_filesystem_encoding()) return env + def compat_setenv(key, value, env=os.environ): + def encode(v): + from .utils import get_filesystem_encoding + return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v + env[encode(key)] = encode(value) + # HACK: The default implementations of os.path.expanduser from cpython do not decode # environment variables with filesystem encoding. We will work around this by # providing adjusted implementations. @@ -604,6 +613,7 @@ __all__ = [ 'compat_os_name', 'compat_parse_qs', 'compat_print', + 'compat_setenv', 'compat_shlex_split', 'compat_socket_create_connection', 'compat_str', From 129263875403841da485ac74b09960d862d23f63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 21:58:38 +0600 Subject: [PATCH 0474/3599] [test_compat] Use compat_setenv --- test/test_compat.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index 0d751a856..afe6bd528 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -27,9 +27,7 @@ from youtube_dl.compat import ( class TestCompat(unittest.TestCase): def test_compat_getenv(self): test_str = 'тест' - os.environ['YOUTUBE-DL-TEST'] = ( - test_str if sys.version_info >= (3, 0) - else test_str.encode(get_filesystem_encoding())) + compat_setenv('YOUTUBE-DL-TEST', test_str) self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) def test_compat_setenv(self): @@ -42,11 +40,9 @@ class TestCompat(unittest.TestCase): def test_compat_expanduser(self): old_home = os.environ.get('HOME') test_str = 'C:\Documents and Settings\тест\Application Data' - os.environ['HOME'] = ( - test_str if sys.version_info >= (3, 0) - else test_str.encode(get_filesystem_encoding())) + compat_setenv('HOME', test_str) self.assertEqual(compat_expanduser('~'), test_str) - os.environ['HOME'] = old_home + compat_setenv('HOME', old_home) def test_all_present(self): import youtube_dl.compat From 20cfdcc910d0bc2ee4b0ee38bdf5e6ecb67e5731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 22:00:14 +0600 Subject: [PATCH 0475/3599] [test_compat] Avoid None values for compat_setenv --- test/test_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_compat.py b/test/test_compat.py index afe6bd528..b20814249 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -42,7 +42,7 @@ class TestCompat(unittest.TestCase): test_str = 'C:\Documents and Settings\тест\Application Data' compat_setenv('HOME', test_str) self.assertEqual(compat_expanduser('~'), test_str) - compat_setenv('HOME', old_home) + compat_setenv('HOME', old_home or '') def test_all_present(self): import youtube_dl.compat From e62d9c5caaa972ef4b1ed5d6ab5ee4a087a4ba95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 22:05:12 +0600 Subject: [PATCH 0476/3599] [downloader/external] Call ffmpeg with with HTTP_PROXY env variable set (#9437) --- youtube_dl/downloader/external.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 45f49c350..3a73cee1c 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -6,6 +6,7 @@ import sys import re from .common import FileDownloader +from ..compat import compat_setenv from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS from ..utils import ( cli_option, @@ -198,6 +199,18 @@ class FFmpegFD(ExternalFD): '-headers', ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] + env = None + proxy = self.params.get('proxy') + if proxy: + if not re.match(r'^[\da-zA-Z]+://', proxy): + proxy = 'http://%s' % proxy + # Since December 2015 ffmpeg supports -http_proxy option (see + # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) + # We could switch to the following code if we are able to detect version properly + # args += ['-http_proxy', proxy] + env = os.environ.copy() + compat_setenv('HTTP_PROXY', proxy, env=env) + protocol = info_dict.get('protocol') if protocol == 'rtmp': @@ -239,7 +252,7 @@ class FFmpegFD(ExternalFD): self._debug_cmd(args) - proc = subprocess.Popen(args, stdin=subprocess.PIPE) + proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) try: retval = proc.wait() except KeyboardInterrupt: From fad7bbec3a1fb62964c8e6637dfd535fabe9c133 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 22:15:55 +0600 Subject: [PATCH 0477/3599] [test_compat] Remove unused import --- test/test_compat.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_compat.py b/test/test_compat.py index b20814249..9adf75763 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -10,7 +10,6 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.utils import get_filesystem_encoding from youtube_dl.compat import ( compat_getenv, compat_setenv, From 2937590e8b70384ef91bdadbb56a55897aab0837 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 May 2016 22:16:33 +0600 Subject: [PATCH 0478/3599] [downloader/hls] PEP 8 --- youtube_dl/downloader/hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index a8279718b..62136ee54 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -25,7 +25,7 @@ class HlsFD(FragmentFD): r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] # Live streams heuristic does not always work (e.g. geo restricted to Germany # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) - #r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] + # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of # event media playlists [4] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 From 4350b74545ea3d3ce072444655613bc6974d5516 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 23 Apr 2016 15:44:34 +0800 Subject: [PATCH 0479/3599] [socks] Add socks.py from @bluec0re's public domain implementation https://gist.github.com/bluec0re/cafd3764412967417fd3 --- youtube_dl/socks.py | 336 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 336 insertions(+) create mode 100644 youtube_dl/socks.py diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py new file mode 100644 index 000000000..b0c36a189 --- /dev/null +++ b/youtube_dl/socks.py @@ -0,0 +1,336 @@ +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# For more information, please refer to <http://unlicense.org/> +# +# Example: +# import socks +# import ftplib +# import socket +# +# socks.patch_socket() +# +# f = ftplib.FTP('ftp.kernel.org') +# f.login() +# print f.retrlines('LIST') +# f.quit() +# +# s = socket.create_connection(('www.google.com', 80)) +# s.sendall('HEAD / HTTP/1.0\r\n\r\n') +# print s.recv(1024) +# s.close() +from __future__ import unicode_literals +import os +import struct +import socket +import time + +__author__ = 'Timo Schmid <coding@timoschmid.de>' + +_orig_socket = socket.socket + +try: + from collections import namedtuple +except ImportError: + from Collections import namedtuple + +try: + from urllib.parse import urlparse +except: + from urlparse import urlparse + +try: + from enum import Enum +except ImportError: + Enum = object + + +class ProxyError(IOError): pass +class Socks4Error(ProxyError): + CODES = { + 0x5B: 'request rejected or failed', + 0x5C: 'request rejected becasue SOCKS server cannot connect to identd on the client', + 0x5D: 'request rejected because the client program and identd report different user-ids' + } + def __init__(self, code=None, msg=None): + if code is not None and msg is None: + msg = self.CODES.get(code) + if msg is None: + msg = 'unknown error' + super(Socks4Error, self).__init__(code, msg) + +class Socks5Error(Socks4Error): + CODES = { + 0x01: 'general SOCKS server failure', + 0x02: 'connection not allowed by ruleset', + 0x03: 'Network unreachable', + 0x04: 'Host unreachable', + 0x05: 'Connection refused', + 0x06: 'TTL expired', + 0x07: 'Command not supported', + 0x08: 'Address type not supported', + 0xFE: 'unknown username or invalid password', + 0xFF: 'all offered authentication methods were rejected' + } + +class ProxyType(Enum): + SOCKS4 = 0 + SOCKS4A = 1 + SOCKS5 = 2 + +Proxy = namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) + +_default_proxy = None + +def setdefaultproxy(proxytype=None, addr=None, port=None, rdns=True, username=None, password=None, allow_env_override=True): + global _default_proxy + if allow_env_override: + all_proxy = os.environ.get('ALL_PROXY', os.environ.get('all_proxy')) + if all_proxy: + all_proxy = urlparse(all_proxy) + if all_proxy.scheme.startswith('socks'): + if all_proxy.scheme == 'socks' or all_proxy.scheme == 'socks4': + proxytype = ProxyType.SOCKS4 + elif all_proxy.scheme == 'socks4a': + proxytype = ProxyType.SOCKS4A + elif all_proxy.scheme == 'socks5': + proxytype = ProxyType.SOCKS5 + addr = all_proxy.hostname + port = all_proxy.port + username = all_proxy.username + password = all_proxy.password + + if proxytype is not None: + _default_proxy = Proxy(proxytype, addr, port, username, password, rdns) + + +def wrap_socket(sock): + return socksocket(_sock=sock._sock) + +def wrap_module(module): + if hasattr(module, 'socket'): + sock = module.socket + if isinstance(sock, socket.socket): + module.socket = sockssocket + elif hasattr(socket, 'socket'): + socket.socket = sockssocket + +def patch_socket(): + import sys + if 'socket' not in sys.modules: + import socket + sys.modules['socket'].socket = sockssocket + + +class sockssocket(socket.socket): + def __init__(self, *args, **kwargs): + self.__proxy = None + if 'proxy' in kwargs: + self.__proxy = kwargs['proxy'] + del kwargs['proxy'] + super(sockssocket, self).__init__(*args, **kwargs) + + @property + def _proxy(self): + if self.__proxy: + return self.__proxy + return _default_proxy + + @property + def _proxy_port(self): + if self._proxy: + if self._proxy.port: + return self._proxy.port + return 1080 + return None + + def setproxy(self, proxytype=None, addr=None, port=None, rdns=True, username=None, password=None): + if proxytype is None: + self.__proxy = None + else: + self.__proxy = Proxy(proxytype, addr, port, username, password, rdns) + + def recvall(self, cnt): + data = b'' + while len(data) < cnt: + cur = self.recv(cnt - len(data)) + if not cur: + raise IOError("{0} bytes missing".format(cnt-len(data))) + data += cur + return data + + def _setup_socks4(self, address, is_4a=False): + destaddr, port = address + + try: + ipaddr = socket.inet_aton(destaddr) + except socket.error: + if is_4a and self._proxy.remote_dns: + ipaddr = struct.pack('!BBBB', 0, 0, 0, 0xFF) + else: + ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + + packet = struct.pack('!BBH', 0x4, 0x1, port) + ipaddr + if self._proxy.username: + username = self._proxy.username + if hasattr(username, 'encode'): + username = username.encode() + packet += struct.pack('!{0}s'.format(len(username)+1), username) + else: + packet += b'\x00' + + if is_4a and self._proxy.remote_dns: + if hasattr(destaddr, 'encode'): + destaddr = destaddr.encode() + packet += struct.pack('!{0}s'.format(len(destaddr)+1), destaddr) + + self.sendall(packet) + + packet = self.recvall(8) + nbyte, resp_code, dstport, dsthost = struct.unpack('!BBHI', packet) + + # check valid response + if nbyte != 0x00: + self.close() + raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(0, nbyte)) + + # access granted + if resp_code != 0x5a: + self.close() + raise Socks4Error(resp_code) + + def _setup_socks5(self, address): + destaddr, port = address + + try: + ipaddr = socket.inet_aton(destaddr) + except socket.error: + if self._proxy.remote_dns: + ipaddr = None + else: + ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + + auth_methods = 1 + if self._proxy.username and self._proxy.password: + # two auth methods available + auth_methods = 2 + packet = struct.pack('!BBB', 0x5, auth_methods, 0x00) # no auth + if self._proxy.username and self._proxy.password: + packet += struct.pack('!B', 0x02) # user/pass auth + + self.sendall(packet) + + packet = self.recvall(2) + version, method = struct.unpack('!BB', packet) + + # check valid response + if version != 0x05: + self.close() + raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + + # no auth methods + if method == 0xFF: + self.close() + raise Socks5Error(method) + + # user/pass auth + if method == 0x01: + username = self._proxy.username + if hasattr(username, 'encode'): + username = username.encode() + password = self._proxy.password + if hasattr(password, 'encode'): + password = password.encode() + packet = struct.pack('!BB', 1, len(username)) + username + packet += struct.pack('!B', len(password)) + password + self.sendall(packet) + + packet = self.recvall(2) + version, status = struct.unpack('!BB', packet) + + if version != 0x01: + self.close() + raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(1, version)) + + if status != 0x00: + self.close() + raise Socks5Error(1) + elif method == 0x00: # no auth + pass + + + packet = struct.pack('!BBB', 5, 1, 0) + if ipaddr is None: + if hasattr(destaddr, 'encode'): + destaddr = destaddr.encode() + packet += struct.pack('!BB', 3, len(destaddr)) + destaddr + else: + packet += struct.pack('!B', 1) + ipaddr + packet += struct.pack('!H', port) + + self.sendall(packet) + + packet = self.recvall(4) + version, status, _, atype = struct.unpack('!BBBB', packet) + + if version != 0x05: + self.close() + raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + + if status != 0x00: + self.close() + raise Socks5Error(status) + + if atype == 0x01: + destaddr = self.recvall(4) + elif atype == 0x03: + alen = struct.unpack('!B', self.recv(1))[0] + destaddr = self.recvall(alen) + elif atype == 0x04: + destaddr = self.recvall(16) + destport = struct.unpack('!H', self.recvall(2))[0] + + def _make_proxy(self, connect_func, address): + if self._proxy.type == ProxyType.SOCKS4: + result = connect_func(self, (self._proxy.host, self._proxy_port)) + if result != 0 and result is not None: + return result + self._setup_socks4(address) + elif self._proxy.type == ProxyType.SOCKS4A: + result = connect_func(self, (self._proxy.host, self._proxy_port)) + if result != 0 and result is not None: + return result + self._setup_socks4(address, is_4a=True) + elif self._proxy.type == ProxyType.SOCKS5: + result = connect_func(self, (self._proxy.host, self._proxy_port)) + if result != 0 and result is not None: + return result + self._setup_socks5(address) + else: + return connect_func(self, address) + + def connect(self, address): + self._make_proxy(_orig_socket.connect, address) + + def connect_ex(self, address): + return self._make_proxy(_orig_socket.connect_ex, address) From dab0daeeb0929b9b560d2b9a5f39c1e2e6dfa449 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 23 Apr 2016 18:28:49 +0800 Subject: [PATCH 0480/3599] [utils,compat] Move struct_pack and struct_unpack to compat.py --- test/test_compat.py | 5 +++++ test/test_utils.py | 4 ---- youtube_dl/compat.py | 23 +++++++++++++++++++++++ youtube_dl/downloader/f4m.py | 4 ++-- youtube_dl/extractor/rtve.py | 4 +++- youtube_dl/swfinterp.py | 6 ++++-- youtube_dl/utils.py | 20 +------------------- 7 files changed, 38 insertions(+), 28 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index 9adf75763..dd62a5d6b 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -20,6 +20,7 @@ from youtube_dl.compat import ( compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, + struct_unpack, ) @@ -102,5 +103,9 @@ class TestCompat(unittest.TestCase): self.assertTrue(isinstance(doc.find('chinese').text, compat_str)) self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) + def test_struct_unpack(self): + self.assertEqual(struct_unpack('!B', b'\x00'), (0,)) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 00ada95ec..5702ffa97 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -55,7 +55,6 @@ from youtube_dl.utils import ( smuggle_url, str_to_int, strip_jsonp, - struct_unpack, timeconvert, unescapeHTML, unified_strdate, @@ -457,9 +456,6 @@ class TestUtil(unittest.TestCase): testPL(5, 2, (2, 99), [2, 3, 4]) testPL(5, 2, (20, 99), []) - def test_struct_unpack(self): - self.assertEqual(struct_unpack('!B', b'\x00'), (0,)) - def test_read_batch_urls(self): f = io.StringIO('''\xef\xbb\xbf foo bar\r diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 12b53cdc8..f697bee7e 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -11,6 +11,7 @@ import re import shlex import shutil import socket +import struct import subprocess import sys import itertools @@ -592,6 +593,26 @@ if sys.version_info >= (3, 0): else: from tokenize import generate_tokens as compat_tokenize_tokenize + +try: + struct.pack('!I', 0) +except TypeError: + # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument + # See https://bugs.python.org/issue19099 + def struct_pack(spec, *args): + if isinstance(spec, compat_str): + spec = spec.encode('ascii') + return struct.pack(spec, *args) + + def struct_unpack(spec, *args): + if isinstance(spec, compat_str): + spec = spec.encode('ascii') + return struct.unpack(spec, *args) +else: + struct_pack = struct.pack + struct_unpack = struct.unpack + + __all__ = [ 'compat_HTMLParser', 'compat_HTTPError', @@ -634,6 +655,8 @@ __all__ = [ 'compat_xml_parse_error', 'compat_xpath', 'shlex_quote', + 'struct_pack', + 'struct_unpack', 'subprocess_check_output', 'workaround_optparse_bug9161', ] diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 664d87543..b282fe3d6 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -12,13 +12,13 @@ from ..compat import ( compat_urlparse, compat_urllib_error, compat_urllib_parse_urlparse, + struct_pack, + struct_unpack, ) from ..utils import ( encodeFilename, fix_xml_ampersands, sanitize_open, - struct_pack, - struct_unpack, xpath_text, ) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 79af47715..f59040877 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -6,6 +6,9 @@ import re import time from .common import InfoExtractor +from ..compat import ( + struct_unpack, +) from ..utils import ( ExtractorError, float_or_none, @@ -13,7 +16,6 @@ from ..utils import ( remove_start, sanitized_Request, std_headers, - struct_unpack, ) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 06c1d6cc1..86b28716c 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -4,10 +4,12 @@ import collections import io import zlib -from .compat import compat_str +from .compat import ( + compat_str, + struct_unpack, +) from .utils import ( ExtractorError, - struct_unpack, ) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6e4573784..fa16a42ad 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -26,7 +26,6 @@ import platform import re import socket import ssl -import struct import subprocess import sys import tempfile @@ -53,6 +52,7 @@ from .compat import ( compat_urlparse, compat_xpath, shlex_quote, + struct_pack, ) @@ -1761,24 +1761,6 @@ def escape_url(url): fragment=escape_rfc3986(url_parsed.fragment) ).geturl() -try: - struct.pack('!I', 0) -except TypeError: - # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument - # See https://bugs.python.org/issue19099 - def struct_pack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.pack(spec, *args) - - def struct_unpack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.unpack(spec, *args) -else: - struct_pack = struct.pack - struct_unpack = struct.unpack - def read_batch_urls(batch_fd): def fixup(url): From 71aff18809a70b7fa32d8fd07f4fb2f64641aea5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 23 Apr 2016 21:30:06 +0800 Subject: [PATCH 0481/3599] [socks] Support SOCKS proxies --- youtube_dl/socks.py | 196 ++++++++++++-------------------------------- youtube_dl/utils.py | 63 +++++++++++++- 2 files changed, 115 insertions(+), 144 deletions(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index b0c36a189..95795b5a9 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -1,77 +1,30 @@ -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# For more information, please refer to <http://unlicense.org/> -# -# Example: -# import socks -# import ftplib -# import socket -# -# socks.patch_socket() -# -# f = ftplib.FTP('ftp.kernel.org') -# f.login() -# print f.retrlines('LIST') -# f.quit() -# -# s = socket.create_connection(('www.google.com', 80)) -# s.sendall('HEAD / HTTP/1.0\r\n\r\n') -# print s.recv(1024) -# s.close() +# Public Domain SOCKS proxy protocol implementation +# Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3 + from __future__ import unicode_literals -import os -import struct + +import collections import socket -import time + +from .compat import ( + struct_pack, + struct_unpack, +) __author__ = 'Timo Schmid <coding@timoschmid.de>' -_orig_socket = socket.socket -try: - from collections import namedtuple -except ImportError: - from Collections import namedtuple - -try: - from urllib.parse import urlparse -except: - from urlparse import urlparse - -try: - from enum import Enum -except ImportError: - Enum = object +class ProxyError(IOError): + pass -class ProxyError(IOError): pass class Socks4Error(ProxyError): CODES = { 0x5B: 'request rejected or failed', 0x5C: 'request rejected becasue SOCKS server cannot connect to identd on the client', 0x5D: 'request rejected because the client program and identd report different user-ids' } + def __init__(self, code=None, msg=None): if code is not None and msg is None: msg = self.CODES.get(code) @@ -79,6 +32,7 @@ class Socks4Error(ProxyError): msg = 'unknown error' super(Socks4Error, self).__init__(code, msg) + class Socks5Error(Socks4Error): CODES = { 0x01: 'general SOCKS server failure', @@ -93,68 +47,19 @@ class Socks5Error(Socks4Error): 0xFF: 'all offered authentication methods were rejected' } -class ProxyType(Enum): - SOCKS4 = 0 + +class ProxyType(object): + SOCKS4 = 0 SOCKS4A = 1 - SOCKS5 = 2 + SOCKS5 = 2 -Proxy = namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) - -_default_proxy = None - -def setdefaultproxy(proxytype=None, addr=None, port=None, rdns=True, username=None, password=None, allow_env_override=True): - global _default_proxy - if allow_env_override: - all_proxy = os.environ.get('ALL_PROXY', os.environ.get('all_proxy')) - if all_proxy: - all_proxy = urlparse(all_proxy) - if all_proxy.scheme.startswith('socks'): - if all_proxy.scheme == 'socks' or all_proxy.scheme == 'socks4': - proxytype = ProxyType.SOCKS4 - elif all_proxy.scheme == 'socks4a': - proxytype = ProxyType.SOCKS4A - elif all_proxy.scheme == 'socks5': - proxytype = ProxyType.SOCKS5 - addr = all_proxy.hostname - port = all_proxy.port - username = all_proxy.username - password = all_proxy.password - - if proxytype is not None: - _default_proxy = Proxy(proxytype, addr, port, username, password, rdns) - - -def wrap_socket(sock): - return socksocket(_sock=sock._sock) - -def wrap_module(module): - if hasattr(module, 'socket'): - sock = module.socket - if isinstance(sock, socket.socket): - module.socket = sockssocket - elif hasattr(socket, 'socket'): - socket.socket = sockssocket - -def patch_socket(): - import sys - if 'socket' not in sys.modules: - import socket - sys.modules['socket'].socket = sockssocket +Proxy = collections.namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) class sockssocket(socket.socket): - def __init__(self, *args, **kwargs): - self.__proxy = None - if 'proxy' in kwargs: - self.__proxy = kwargs['proxy'] - del kwargs['proxy'] - super(sockssocket, self).__init__(*args, **kwargs) - @property def _proxy(self): - if self.__proxy: - return self.__proxy - return _default_proxy + return self.__proxy @property def _proxy_port(self): @@ -175,7 +80,7 @@ class sockssocket(socket.socket): while len(data) < cnt: cur = self.recv(cnt - len(data)) if not cur: - raise IOError("{0} bytes missing".format(cnt-len(data))) + raise IOError('{0} bytes missing'.format(cnt - len(data))) data += cur return data @@ -186,39 +91,42 @@ class sockssocket(socket.socket): ipaddr = socket.inet_aton(destaddr) except socket.error: if is_4a and self._proxy.remote_dns: - ipaddr = struct.pack('!BBBB', 0, 0, 0, 0xFF) + ipaddr = struct_pack('!BBBB', 0, 0, 0, 0xFF) else: ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) - packet = struct.pack('!BBH', 0x4, 0x1, port) + ipaddr + packet = struct_pack('!BBH', 0x4, 0x1, port) + ipaddr if self._proxy.username: username = self._proxy.username if hasattr(username, 'encode'): username = username.encode() - packet += struct.pack('!{0}s'.format(len(username)+1), username) + packet += struct_pack('!{0}s'.format(len(username) + 1), username) else: packet += b'\x00' if is_4a and self._proxy.remote_dns: if hasattr(destaddr, 'encode'): destaddr = destaddr.encode() - packet += struct.pack('!{0}s'.format(len(destaddr)+1), destaddr) + packet += struct_pack('!{0}s'.format(len(destaddr) + 1), destaddr) self.sendall(packet) packet = self.recvall(8) - nbyte, resp_code, dstport, dsthost = struct.unpack('!BBHI', packet) + nbyte, resp_code, dstport, dsthost = struct_unpack('!BBHI', packet) # check valid response if nbyte != 0x00: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(0, nbyte)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(0, nbyte)) # access granted if resp_code != 0x5a: self.close() raise Socks4Error(resp_code) + return (dsthost, dstport) + def _setup_socks5(self, address): destaddr, port = address @@ -234,19 +142,20 @@ class sockssocket(socket.socket): if self._proxy.username and self._proxy.password: # two auth methods available auth_methods = 2 - packet = struct.pack('!BBB', 0x5, auth_methods, 0x00) # no auth + packet = struct_pack('!BBB', 0x5, auth_methods, 0x00) # no auth if self._proxy.username and self._proxy.password: - packet += struct.pack('!B', 0x02) # user/pass auth + packet += struct_pack('!B', 0x02) # user/pass auth self.sendall(packet) packet = self.recvall(2) - version, method = struct.unpack('!BB', packet) + version, method = struct_unpack('!BB', packet) # check valid response if version != 0x05: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) # no auth methods if method == 0xFF: @@ -261,41 +170,42 @@ class sockssocket(socket.socket): password = self._proxy.password if hasattr(password, 'encode'): password = password.encode() - packet = struct.pack('!BB', 1, len(username)) + username - packet += struct.pack('!B', len(password)) + password + packet = struct_pack('!BB', 1, len(username)) + username + packet += struct_pack('!B', len(password)) + password self.sendall(packet) packet = self.recvall(2) - version, status = struct.unpack('!BB', packet) + version, status = struct_unpack('!BB', packet) if version != 0x01: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(1, version)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(1, version)) if status != 0x00: self.close() raise Socks5Error(1) - elif method == 0x00: # no auth + elif method == 0x00: # no auth pass - - packet = struct.pack('!BBB', 5, 1, 0) + packet = struct_pack('!BBB', 5, 1, 0) if ipaddr is None: if hasattr(destaddr, 'encode'): destaddr = destaddr.encode() - packet += struct.pack('!BB', 3, len(destaddr)) + destaddr + packet += struct_pack('!BB', 3, len(destaddr)) + destaddr else: - packet += struct.pack('!B', 1) + ipaddr - packet += struct.pack('!H', port) + packet += struct_pack('!B', 1) + ipaddr + packet += struct_pack('!H', port) self.sendall(packet) packet = self.recvall(4) - version, status, _, atype = struct.unpack('!BBBB', packet) + version, status, _, atype = struct_unpack('!BBBB', packet) if version != 0x05: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) if status != 0x00: self.close() @@ -304,11 +214,13 @@ class sockssocket(socket.socket): if atype == 0x01: destaddr = self.recvall(4) elif atype == 0x03: - alen = struct.unpack('!B', self.recv(1))[0] + alen = struct_unpack('!B', self.recv(1))[0] destaddr = self.recvall(alen) elif atype == 0x04: destaddr = self.recvall(16) - destport = struct.unpack('!H', self.recvall(2))[0] + destport = struct_unpack('!H', self.recvall(2))[0] + + return (destaddr, destport) def _make_proxy(self, connect_func, address): if self._proxy.type == ProxyType.SOCKS4: @@ -330,7 +242,7 @@ class sockssocket(socket.socket): return connect_func(self, address) def connect(self, address): - self._make_proxy(_orig_socket.connect, address) + self._make_proxy(socket.socket.connect, address) def connect_ex(self, address): - return self._make_proxy(_orig_socket.connect_ex, address) + return self._make_proxy(socket.socket.connect_ex, address) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index fa16a42ad..b2e4a2dfb 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -55,6 +55,11 @@ from .compat import ( struct_pack, ) +from .socks import ( + ProxyType, + sockssocket, +) + # This is not clearly defined otherwise compiled_regex_type = type(re.compile('')) @@ -752,8 +757,15 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): self._params = params def http_open(self, req): + conn_class = compat_http_client.HTTPConnection + + socks_proxy = req.headers.get('Ytdl-socks-proxy') + if socks_proxy: + conn_class = make_socks_conn_class(conn_class, socks_proxy) + del req.headers['Ytdl-socks-proxy'] + return self.do_open(functools.partial( - _create_http_connection, self, compat_http_client.HTTPConnection, False), + _create_http_connection, self, conn_class, False), req) @staticmethod @@ -849,6 +861,41 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): https_response = http_response +def make_socks_conn_class(base_class, socks_proxy): + assert issubclass(base_class, ( + compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection)) + + url_components = compat_urlparse.urlparse(socks_proxy) + if url_components.scheme.lower() == 'socks5': + socks_type = ProxyType.SOCKS5 + elif url_components.scheme.lower() in ('socks', 'socks4'): + socks_type = ProxyType.SOCKS4 + + proxy_args = ( + socks_type, + url_components.hostname, url_components.port or 1080, + True, # Remote DNS + url_components.username, url_components.password + ) + + class SocksConnection(base_class): + def connect(self): + self.sock = sockssocket() + self.sock.setproxy(*proxy_args) + if type(self.timeout) in (int, float): + self.sock.settimeout(self.timeout) + self.sock.connect((self.host, self.port)) + + if isinstance(self, compat_http_client.HTTPSConnection): + if hasattr(self, '_context'): # Python > 2.6 + self.sock = self._context.wrap_socket( + self.sock, server_hostname=self.host) + else: + self.sock = ssl.wrap_socket(self.sock) + + return SocksConnection + + class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): def __init__(self, params, https_conn_class=None, *args, **kwargs): compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs) @@ -857,12 +904,20 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): def https_open(self, req): kwargs = {} + conn_class = self._https_conn_class + if hasattr(self, '_context'): # python > 2.6 kwargs['context'] = self._context if hasattr(self, '_check_hostname'): # python 3.x kwargs['check_hostname'] = self._check_hostname + + socks_proxy = req.headers.get('Ytdl-socks-proxy') + if socks_proxy: + conn_class = make_socks_conn_class(conn_class, socks_proxy) + del req.headers['Ytdl-socks-proxy'] + return self.do_open(functools.partial( - _create_http_connection, self, self._https_conn_class, True), + _create_http_connection, self, conn_class, True), req, **kwargs) @@ -2683,6 +2738,10 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): if proxy == '__noproxy__': return None # No Proxy + if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks5'): + req.add_header('Ytdl-socks-proxy', proxy) + # youtube-dl's http/https handlers do wrapping the socket with socks + return None return compat_urllib_request.ProxyHandler.proxy_open( self, req, proxy, type) From 72f3289ac48d8dbfe1ee3fd2d82a23f1bff045df Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 23 Apr 2016 21:30:44 +0800 Subject: [PATCH 0482/3599] [test/test_socks] Add tests for SOCKS proxies --- .gitignore | 1 + Makefile | 2 +- test/helper.py | 5 ++++ test/test_socks.py | 71 ++++++++++++++++++++++++++++++++++++++++++++++ tox.ini | 1 + 5 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 test/test_socks.py diff --git a/.gitignore b/.gitignore index 72c10425d..0e7128551 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ updates_key.pem *.part *.swp test/testdata +test/local_parameters.json .tox youtube-dl.zsh .idea diff --git a/Makefile b/Makefile index c9ce216d1..5d7cd5a7e 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ test: ot: offlinetest offlinetest: codetest - $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py + $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py tar: youtube-dl.tar.gz diff --git a/test/helper.py b/test/helper.py index b8e22c5cb..dfee217a9 100644 --- a/test/helper.py +++ b/test/helper.py @@ -24,8 +24,13 @@ from youtube_dl.utils import ( def get_params(override=None): PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") + LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "local_parameters.json") with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) + if os.path.exists(LOCAL_PARAMETERS_FILE): + with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: + parameters.update(json.load(pf)) if override: parameters.update(override) return parameters diff --git a/test/test_socks.py b/test/test_socks.py new file mode 100644 index 000000000..92574c6fd --- /dev/null +++ b/test/test_socks.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# coding: utf-8 +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import (FakeYDL, get_params) +from youtube_dl.compat import compat_urllib_request + + +class TestSocks(unittest.TestCase): + @staticmethod + def _check_params(attrs): + params = get_params() + for attr in attrs: + if attr not in params: + print('Missing %s. Skipping.' % attr) + return + return params + + def test_proxy_http(self): + params = self._check_params(['primary_proxy', 'primary_server_ip']) + if params is None: + return + ydl = FakeYDL({ + 'proxy': params['primary_proxy'] + }) + self.assertEqual( + ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8'), + params['primary_server_ip']) + + def test_proxy_https(self): + params = self._check_params(['primary_proxy', 'primary_server_ip']) + if params is None: + return + ydl = FakeYDL({ + 'proxy': params['primary_proxy'] + }) + self.assertEqual( + ydl.urlopen('https://yt-dl.org/ip').read().decode('utf-8'), + params['primary_server_ip']) + + def test_secondary_proxy_http(self): + params = self._check_params(['secondary_proxy', 'secondary_server_ip']) + if params is None: + return + ydl = FakeYDL() + req = compat_urllib_request.Request('http://yt-dl.org/ip') + req.add_header('Ytdl-request-proxy', params['secondary_proxy']) + self.assertEqual( + ydl.urlopen(req).read().decode('utf-8'), + params['secondary_server_ip']) + + def test_secondary_proxy_https(self): + params = self._check_params(['secondary_proxy', 'secondary_server_ip']) + if params is None: + return + ydl = FakeYDL() + req = compat_urllib_request.Request('https://yt-dl.org/ip') + req.add_header('Ytdl-request-proxy', params['secondary_proxy']) + self.assertEqual( + ydl.urlopen(req).read().decode('utf-8'), + params['secondary_server_ip']) + + +if __name__ == '__main__': + unittest.main() diff --git a/tox.ini b/tox.ini index 2d7134005..9c4e4a3d1 100644 --- a/tox.ini +++ b/tox.ini @@ -9,5 +9,6 @@ passenv = HOME defaultargs = test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py + --exclude test_socks.py commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html # test.test_download:TestDownload.test_NowVideo From 9e9cd7248d387954d1009087ac300ee3ff6a9766 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 3 May 2016 15:11:05 +0800 Subject: [PATCH 0483/3599] [socks] Eliminate magic constants and improve --- youtube_dl/socks.py | 289 ++++++++++++++++++++++++-------------------- 1 file changed, 157 insertions(+), 132 deletions(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index 95795b5a9..0e3dd7893 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -3,37 +3,87 @@ from __future__ import unicode_literals +# References: +# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol +# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol +# SOCKS5 protocol https://tools.ietf.org/html/rfc1928 +# SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929 + import collections import socket from .compat import ( + compat_ord, struct_pack, struct_unpack, ) __author__ = 'Timo Schmid <coding@timoschmid.de>' +SOCKS4_VERSION = 4 +SOCKS4_REPLY_VERSION = 0x00 +# Excerpt from SOCKS4A protocol: +# if the client cannot resolve the destination host's domain name to find its +# IP address, it should set the first three bytes of DSTIP to NULL and the last +# byte to a non-zero value. +SOCKS4_DEFAULT_DSTIP = struct_pack('!BBBB', 0, 0, 0, 0xFF) + +SOCKS5_VERSION = 5 +SOCKS5_USER_AUTH_VERSION = 0x01 +SOCKS5_USER_AUTH_SUCCESS = 0x00 + + +class Socks4Command(object): + CMD_CONNECT = 0x01 + CMD_BIND = 0x02 + + +class Socks5Command(Socks4Command): + CMD_UDP_ASSOCIATE = 0x03 + + +class Socks5Auth(object): + AUTH_NONE = 0x00 + AUTH_GSSAPI = 0x01 + AUTH_USER_PASS = 0x02 + AUTH_NO_ACCEPTABLE = 0xFF # For server response + + +class Socks5AddressType(object): + ATYP_IPV4 = 0x01 + ATYP_DOMAINNAME = 0x03 + ATYP_IPV6 = 0x04 + class ProxyError(IOError): - pass - - -class Socks4Error(ProxyError): - CODES = { - 0x5B: 'request rejected or failed', - 0x5C: 'request rejected becasue SOCKS server cannot connect to identd on the client', - 0x5D: 'request rejected because the client program and identd report different user-ids' - } + ERR_SUCCESS = 0x00 def __init__(self, code=None, msg=None): if code is not None and msg is None: - msg = self.CODES.get(code) - if msg is None: - msg = 'unknown error' - super(Socks4Error, self).__init__(code, msg) + msg = self.CODES.get(code) and 'unknown error' + super(ProxyError, self).__init__(code, msg) -class Socks5Error(Socks4Error): +class InvalidVersionError(ProxyError): + def __init__(self, expected_version, got_version): + msg = ('Invalid response version from server. Expected {0:02x} got ' + '{1:02x}'.format(expected_version, got_version)) + super(InvalidVersionError, self).__init__(0, msg) + + +class Socks4Error(ProxyError): + ERR_SUCCESS = 90 + + CODES = { + 91: 'request rejected or failed', + 92: 'request rejected becasue SOCKS server cannot connect to identd on the client', + 93: 'request rejected because the client program and identd report different user-ids' + } + + +class Socks5Error(ProxyError): + ERR_GENERAL_FAILURE = 0x01 + CODES = { 0x01: 'general SOCKS server failure', 0x02: 'connection not allowed by ruleset', @@ -53,27 +103,19 @@ class ProxyType(object): SOCKS4A = 1 SOCKS5 = 2 -Proxy = collections.namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) +Proxy = collections.namedtuple('Proxy', ( + 'type', 'host', 'port', 'username', 'password', 'remote_dns')) class sockssocket(socket.socket): - @property - def _proxy(self): - return self.__proxy + def __init__(self, *args, **kwargs): + self._proxy = None + super(sockssocket, self).__init__(*args, **kwargs) - @property - def _proxy_port(self): - if self._proxy: - if self._proxy.port: - return self._proxy.port - return 1080 - return None + def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None): + assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5) - def setproxy(self, proxytype=None, addr=None, port=None, rdns=True, username=None, password=None): - if proxytype is None: - self.__proxy = None - else: - self.__proxy = Proxy(proxytype, addr, port, username, password, rdns) + self._proxy = Proxy(proxytype, addr, port, username, password, rdns) def recvall(self, cnt): data = b'' @@ -84,163 +126,146 @@ class sockssocket(socket.socket): data += cur return data + def _recv_bytes(self, cnt): + data = self.recvall(cnt) + return struct_unpack('!{0}B'.format(cnt), data) + + @staticmethod + def _len_and_data(data): + return struct_pack('!B', len(data)) + data + + def _check_response_version(self, expected_version, got_version): + if got_version != expected_version: + self.close() + raise InvalidVersionError(expected_version, got_version) + + def _resolve_address(self, destaddr, default, use_remote_dns): + try: + return socket.inet_aton(destaddr) + except socket.error: + if use_remote_dns and self._proxy.remote_dns: + return default + else: + return socket.inet_aton(socket.gethostbyname(destaddr)) + def _setup_socks4(self, address, is_4a=False): destaddr, port = address - try: - ipaddr = socket.inet_aton(destaddr) - except socket.error: - if is_4a and self._proxy.remote_dns: - ipaddr = struct_pack('!BBBB', 0, 0, 0, 0xFF) - else: - ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a) - packet = struct_pack('!BBH', 0x4, 0x1, port) + ipaddr - if self._proxy.username: - username = self._proxy.username - if hasattr(username, 'encode'): - username = username.encode() - packet += struct_pack('!{0}s'.format(len(username) + 1), username) - else: - packet += b'\x00' + packet = struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr + + username = (self._proxy.username or '').encode('utf-8') + packet += username + b'\x00' if is_4a and self._proxy.remote_dns: - if hasattr(destaddr, 'encode'): - destaddr = destaddr.encode() - packet += struct_pack('!{0}s'.format(len(destaddr) + 1), destaddr) + packet += destaddr.encode('utf-8') + b'\x00' self.sendall(packet) - packet = self.recvall(8) - nbyte, resp_code, dstport, dsthost = struct_unpack('!BBHI', packet) + version, resp_code, dstport, dsthost = struct_unpack('!BBHI', self.recvall(8)) - # check valid response - if nbyte != 0x00: - self.close() - raise ProxyError( - 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(0, nbyte)) + self._check_response_version(SOCKS4_REPLY_VERSION, version) - # access granted - if resp_code != 0x5a: + if resp_code != Socks4Error.ERR_SUCCESS: self.close() raise Socks4Error(resp_code) return (dsthost, dstport) - def _setup_socks5(self, address): - destaddr, port = address + def _setup_socks4a(self, address): + self._setup_socks4(address, is_4a=True) - try: - ipaddr = socket.inet_aton(destaddr) - except socket.error: - if self._proxy.remote_dns: - ipaddr = None - else: - ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + def _socks5_auth(self): + packet = struct_pack('!B', SOCKS5_VERSION) - auth_methods = 1 + auth_methods = [Socks5Auth.AUTH_NONE] if self._proxy.username and self._proxy.password: - # two auth methods available - auth_methods = 2 - packet = struct_pack('!BBB', 0x5, auth_methods, 0x00) # no auth - if self._proxy.username and self._proxy.password: - packet += struct_pack('!B', 0x02) # user/pass auth + auth_methods.append(Socks5Auth.AUTH_USER_PASS) + + packet += struct_pack('!B', len(auth_methods)) + packet += struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods) self.sendall(packet) - packet = self.recvall(2) - version, method = struct_unpack('!BB', packet) + version, method = self._recv_bytes(2) - # check valid response - if version != 0x05: - self.close() - raise ProxyError( - 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) + self._check_response_version(SOCKS5_VERSION, version) - # no auth methods - if method == 0xFF: + if method == Socks5Auth.AUTH_NO_ACCEPTABLE: self.close() raise Socks5Error(method) - # user/pass auth - if method == 0x01: - username = self._proxy.username - if hasattr(username, 'encode'): - username = username.encode() - password = self._proxy.password - if hasattr(password, 'encode'): - password = password.encode() - packet = struct_pack('!BB', 1, len(username)) + username - packet += struct_pack('!B', len(password)) + password + if method == Socks5Auth.AUTH_USER_PASS: + username = self._proxy.username.encode('utf-8') + password = self._proxy.password.encode('utf-8') + packet = struct_pack('!B', SOCKS5_USER_AUTH_VERSION) + packet += self._len_and_data(username) + self._len_and_data(password) self.sendall(packet) - packet = self.recvall(2) - version, status = struct_unpack('!BB', packet) + version, status = self._recv_bytes(2) - if version != 0x01: - self.close() - raise ProxyError( - 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(1, version)) + self._check_response_version(SOCKS5_USER_AUTH_VERSION, version) - if status != 0x00: + if status != SOCKS5_USER_AUTH_SUCCESS: self.close() - raise Socks5Error(1) - elif method == 0x00: # no auth + raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE) + elif method == Socks5Auth.AUTH_NONE: pass - packet = struct_pack('!BBB', 5, 1, 0) + def _setup_socks5(self, address): + destaddr, port = address + + ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True) + + self._socks5_auth() + + reserved = 0 + packet = struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) if ipaddr is None: - if hasattr(destaddr, 'encode'): - destaddr = destaddr.encode() - packet += struct_pack('!BB', 3, len(destaddr)) + destaddr + destaddr = destaddr.encode('utf-8') + packet += struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) + packet += self._len_and_data(destaddr) else: - packet += struct_pack('!B', 1) + ipaddr + packet += struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr packet += struct_pack('!H', port) self.sendall(packet) - packet = self.recvall(4) - version, status, _, atype = struct_unpack('!BBBB', packet) + version, status, reserved, atype = self._recv_bytes(4) - if version != 0x05: - self.close() - raise ProxyError( - 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) + self._check_response_version(SOCKS5_VERSION, version) - if status != 0x00: + if status != Socks5Error.ERR_SUCCESS: self.close() raise Socks5Error(status) - if atype == 0x01: + if atype == Socks5AddressType.ATYP_IPV4: destaddr = self.recvall(4) - elif atype == 0x03: - alen = struct_unpack('!B', self.recv(1))[0] + elif atype == Socks5AddressType.ATYP_DOMAINNAME: + alen = compat_ord(self.recv(1)) destaddr = self.recvall(alen) - elif atype == 0x04: + elif atype == Socks5AddressType.ATYP_IPV6: destaddr = self.recvall(16) destport = struct_unpack('!H', self.recvall(2))[0] return (destaddr, destport) def _make_proxy(self, connect_func, address): - if self._proxy.type == ProxyType.SOCKS4: - result = connect_func(self, (self._proxy.host, self._proxy_port)) - if result != 0 and result is not None: - return result - self._setup_socks4(address) - elif self._proxy.type == ProxyType.SOCKS4A: - result = connect_func(self, (self._proxy.host, self._proxy_port)) - if result != 0 and result is not None: - return result - self._setup_socks4(address, is_4a=True) - elif self._proxy.type == ProxyType.SOCKS5: - result = connect_func(self, (self._proxy.host, self._proxy_port)) - if result != 0 and result is not None: - return result - self._setup_socks5(address) - else: + if not self._proxy: return connect_func(self, address) + result = connect_func(self, (self._proxy.host, self._proxy.port)) + if result != 0 and result is not None: + return result + setup_funcs = { + ProxyType.SOCKS4: self._setup_socks4, + ProxyType.SOCKS4A: self._setup_socks4a, + ProxyType.SOCKS5: self._setup_socks5, + } + setup_funcs[self._proxy.type](address) + return result + def connect(self, address): self._make_proxy(socket.socket.connect, address) From 51fb4995a5242c0edca09167cf8c4b050cf5a186 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 3 May 2016 15:15:32 +0800 Subject: [PATCH 0484/3599] [utils] Register SOCKS protocols in urllib and support SOCKS4A --- youtube_dl/YoutubeDL.py | 3 +++ youtube_dl/utils.py | 11 ++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a96482e68..34eeb77c5 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -64,6 +64,7 @@ from .utils import ( PostProcessingError, preferredencoding, prepend_extension, + register_socks_protocols, render_table, replace_extension, SameFileError, @@ -361,6 +362,8 @@ class YoutubeDL(object): for ph in self.params.get('progress_hooks', []): self.add_progress_hook(ph) + register_socks_protocols() + def warn_if_short_id(self, argv): # short YouTube ID starting with dash? idxs = [ diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b2e4a2dfb..c9702fd93 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -61,6 +61,13 @@ from .socks import ( ) +def register_socks_protocols(): + # "Register" SOCKS protocols + for scheme in ('socks', 'socks4', 'socks4a', 'socks5'): + if scheme not in compat_urlparse.uses_netloc: + compat_urlparse.uses_netloc.append(scheme) + + # This is not clearly defined otherwise compiled_regex_type = type(re.compile('')) @@ -870,6 +877,8 @@ def make_socks_conn_class(base_class, socks_proxy): socks_type = ProxyType.SOCKS5 elif url_components.scheme.lower() in ('socks', 'socks4'): socks_type = ProxyType.SOCKS4 + elif url_components.scheme.lower() == 'socks4a': + socks_type = ProxyType.SOCKS4A proxy_args = ( socks_type, @@ -2738,7 +2747,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): if proxy == '__noproxy__': return None # No Proxy - if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks5'): + if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): req.add_header('Ytdl-socks-proxy', proxy) # youtube-dl's http/https handlers do wrapping the socket with socks return None From d5ae6bb50124f8320f2b492380480038c487a6d2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 3 May 2016 15:37:30 +0800 Subject: [PATCH 0485/3599] [utils] Add rationale for register_socks_protocols --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c9702fd93..dc73f3407 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -63,6 +63,8 @@ from .socks import ( def register_socks_protocols(): # "Register" SOCKS protocols + # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 + # URLs with protocols not in urlparse.uses_netloc are not handled correctly for scheme in ('socks', 'socks4', 'socks4a', 'socks5'): if scheme not in compat_urlparse.uses_netloc: compat_urlparse.uses_netloc.append(scheme) From edaa23f822a1e4a62771422fb598c7bd8ae0a152 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 3 May 2016 16:50:16 +0800 Subject: [PATCH 0486/3599] [compat] Rename struct_(un)pack to compat_struct_(un)pack --- test/test_compat.py | 4 ++-- youtube_dl/compat.py | 12 ++++++------ youtube_dl/downloader/f4m.py | 14 +++++++------- youtube_dl/extractor/rtve.py | 4 ++-- youtube_dl/socks.py | 32 ++++++++++++++++---------------- youtube_dl/swfinterp.py | 14 +++++++------- youtube_dl/utils.py | 4 ++-- 7 files changed, 42 insertions(+), 42 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index dd62a5d6b..539b30540 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -17,10 +17,10 @@ from youtube_dl.compat import ( compat_expanduser, compat_shlex_split, compat_str, + compat_struct_unpack, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, - struct_unpack, ) @@ -104,7 +104,7 @@ class TestCompat(unittest.TestCase): self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) def test_struct_unpack(self): - self.assertEqual(struct_unpack('!B', b'\x00'), (0,)) + self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,)) if __name__ == '__main__': diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index f697bee7e..e48c761a6 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -599,18 +599,18 @@ try: except TypeError: # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument # See https://bugs.python.org/issue19099 - def struct_pack(spec, *args): + def compat_struct_pack(spec, *args): if isinstance(spec, compat_str): spec = spec.encode('ascii') return struct.pack(spec, *args) - def struct_unpack(spec, *args): + def compat_struct_unpack(spec, *args): if isinstance(spec, compat_str): spec = spec.encode('ascii') return struct.unpack(spec, *args) else: - struct_pack = struct.pack - struct_unpack = struct.unpack + compat_struct_pack = struct.pack + compat_struct_unpack = struct.unpack __all__ = [ @@ -638,6 +638,8 @@ __all__ = [ 'compat_shlex_split', 'compat_socket_create_connection', 'compat_str', + 'compat_struct_pack', + 'compat_struct_unpack', 'compat_subprocess_get_DEVNULL', 'compat_tokenize_tokenize', 'compat_urllib_error', @@ -655,8 +657,6 @@ __all__ = [ 'compat_xml_parse_error', 'compat_xpath', 'shlex_quote', - 'struct_pack', - 'struct_unpack', 'subprocess_check_output', 'workaround_optparse_bug9161', ] diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index b282fe3d6..3d9337afa 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -12,8 +12,8 @@ from ..compat import ( compat_urlparse, compat_urllib_error, compat_urllib_parse_urlparse, - struct_pack, - struct_unpack, + compat_struct_pack, + compat_struct_unpack, ) from ..utils import ( encodeFilename, @@ -31,13 +31,13 @@ class FlvReader(io.BytesIO): # Utility functions for reading numbers and strings def read_unsigned_long_long(self): - return struct_unpack('!Q', self.read(8))[0] + return compat_struct_unpack('!Q', self.read(8))[0] def read_unsigned_int(self): - return struct_unpack('!I', self.read(4))[0] + return compat_struct_unpack('!I', self.read(4))[0] def read_unsigned_char(self): - return struct_unpack('!B', self.read(1))[0] + return compat_struct_unpack('!B', self.read(1))[0] def read_string(self): res = b'' @@ -194,11 +194,11 @@ def build_fragments_list(boot_info): def write_unsigned_int(stream, val): - stream.write(struct_pack('!I', val)) + stream.write(compat_struct_pack('!I', val)) def write_unsigned_int_24(stream, val): - stream.write(struct_pack('!I', val)[1:]) + stream.write(compat_struct_pack('!I', val)[1:]) def write_flv_header(stream): diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index f59040877..edd0d108e 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -7,7 +7,7 @@ import time from .common import InfoExtractor from ..compat import ( - struct_unpack, + compat_struct_unpack, ) from ..utils import ( ExtractorError, @@ -23,7 +23,7 @@ def _decrypt_url(png): encrypted_data = base64.b64decode(png.encode('utf-8')) text_index = encrypted_data.find(b'tEXt') text_chunk = encrypted_data[text_index - 4:] - length = struct_unpack('!I', text_chunk[:4])[0] + length = compat_struct_unpack('!I', text_chunk[:4])[0] # Use bytearray to get integers when iterating in both python 2.x and 3.x data = bytearray(text_chunk[8:8 + length]) data = [chr(b) for b in data if b != 0] diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index 0e3dd7893..a5b27fea7 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -14,8 +14,8 @@ import socket from .compat import ( compat_ord, - struct_pack, - struct_unpack, + compat_struct_pack, + compat_struct_unpack, ) __author__ = 'Timo Schmid <coding@timoschmid.de>' @@ -26,7 +26,7 @@ SOCKS4_REPLY_VERSION = 0x00 # if the client cannot resolve the destination host's domain name to find its # IP address, it should set the first three bytes of DSTIP to NULL and the last # byte to a non-zero value. -SOCKS4_DEFAULT_DSTIP = struct_pack('!BBBB', 0, 0, 0, 0xFF) +SOCKS4_DEFAULT_DSTIP = compat_struct_pack('!BBBB', 0, 0, 0, 0xFF) SOCKS5_VERSION = 5 SOCKS5_USER_AUTH_VERSION = 0x01 @@ -128,11 +128,11 @@ class sockssocket(socket.socket): def _recv_bytes(self, cnt): data = self.recvall(cnt) - return struct_unpack('!{0}B'.format(cnt), data) + return compat_struct_unpack('!{0}B'.format(cnt), data) @staticmethod def _len_and_data(data): - return struct_pack('!B', len(data)) + data + return compat_struct_pack('!B', len(data)) + data def _check_response_version(self, expected_version, got_version): if got_version != expected_version: @@ -153,7 +153,7 @@ class sockssocket(socket.socket): ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a) - packet = struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr + packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr username = (self._proxy.username or '').encode('utf-8') packet += username + b'\x00' @@ -163,7 +163,7 @@ class sockssocket(socket.socket): self.sendall(packet) - version, resp_code, dstport, dsthost = struct_unpack('!BBHI', self.recvall(8)) + version, resp_code, dstport, dsthost = compat_struct_unpack('!BBHI', self.recvall(8)) self._check_response_version(SOCKS4_REPLY_VERSION, version) @@ -177,14 +177,14 @@ class sockssocket(socket.socket): self._setup_socks4(address, is_4a=True) def _socks5_auth(self): - packet = struct_pack('!B', SOCKS5_VERSION) + packet = compat_struct_pack('!B', SOCKS5_VERSION) auth_methods = [Socks5Auth.AUTH_NONE] if self._proxy.username and self._proxy.password: auth_methods.append(Socks5Auth.AUTH_USER_PASS) - packet += struct_pack('!B', len(auth_methods)) - packet += struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods) + packet += compat_struct_pack('!B', len(auth_methods)) + packet += compat_struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods) self.sendall(packet) @@ -199,7 +199,7 @@ class sockssocket(socket.socket): if method == Socks5Auth.AUTH_USER_PASS: username = self._proxy.username.encode('utf-8') password = self._proxy.password.encode('utf-8') - packet = struct_pack('!B', SOCKS5_USER_AUTH_VERSION) + packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION) packet += self._len_and_data(username) + self._len_and_data(password) self.sendall(packet) @@ -221,14 +221,14 @@ class sockssocket(socket.socket): self._socks5_auth() reserved = 0 - packet = struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) + packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) if ipaddr is None: destaddr = destaddr.encode('utf-8') - packet += struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) + packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) packet += self._len_and_data(destaddr) else: - packet += struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr - packet += struct_pack('!H', port) + packet += compat_struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr + packet += compat_struct_pack('!H', port) self.sendall(packet) @@ -247,7 +247,7 @@ class sockssocket(socket.socket): destaddr = self.recvall(alen) elif atype == Socks5AddressType.ATYP_IPV6: destaddr = self.recvall(16) - destport = struct_unpack('!H', self.recvall(2))[0] + destport = compat_struct_unpack('!H', self.recvall(2))[0] return (destaddr, destport) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 86b28716c..7cf490aa4 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -6,7 +6,7 @@ import zlib from .compat import ( compat_str, - struct_unpack, + compat_struct_unpack, ) from .utils import ( ExtractorError, @@ -25,17 +25,17 @@ def _extract_tags(file_contents): file_contents[:1]) # Determine number of bits in framesize rectangle - framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3 + framesize_nbits = compat_struct_unpack('!B', content[:1])[0] >> 3 framesize_len = (5 + 4 * framesize_nbits + 7) // 8 pos = framesize_len + 2 + 2 while pos < len(content): - header16 = struct_unpack('<H', content[pos:pos + 2])[0] + header16 = compat_struct_unpack('<H', content[pos:pos + 2])[0] pos += 2 tag_code = header16 >> 6 tag_len = header16 & 0x3f if tag_len == 0x3f: - tag_len = struct_unpack('<I', content[pos:pos + 4])[0] + tag_len = compat_struct_unpack('<I', content[pos:pos + 4])[0] pos += 4 assert pos + tag_len <= len(content), \ ('Tag %d ends at %d+%d - that\'s longer than the file (%d)' @@ -103,7 +103,7 @@ def _read_int(reader): for _ in range(5): buf = reader.read(1) assert len(buf) == 1 - b = struct_unpack('<B', buf)[0] + b = compat_struct_unpack('<B', buf)[0] res = res | ((b & 0x7f) << shift) if b & 0x80 == 0: break @@ -129,7 +129,7 @@ def _s24(reader): bs = reader.read(3) assert len(bs) == 3 last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00' - return struct_unpack('<i', bs + last_byte)[0] + return compat_struct_unpack('<i', bs + last_byte)[0] def _read_string(reader): @@ -148,7 +148,7 @@ def _read_bytes(count, reader): def _read_byte(reader): resb = _read_bytes(1, reader=reader) - res = struct_unpack('<B', resb)[0] + res = compat_struct_unpack('<B', resb)[0] return res diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index dc73f3407..dbac38b55 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -44,6 +44,7 @@ from .compat import ( compat_parse_qs, compat_socket_create_connection, compat_str, + compat_struct_pack, compat_urllib_error, compat_urllib_parse, compat_urllib_parse_urlencode, @@ -52,7 +53,6 @@ from .compat import ( compat_urlparse, compat_xpath, shlex_quote, - struct_pack, ) from .socks import ( @@ -1259,7 +1259,7 @@ def bytes_to_intlist(bs): def intlist_to_bytes(xs): if not xs: return b'' - return struct_pack('%dB' % len(xs), *xs) + return compat_struct_pack('%dB' % len(xs), *xs) # Cross-platform file locking From e21f17fc86aab0ac7f1f4cee28f64e7b9b954f71 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 5 May 2016 17:09:13 +0800 Subject: [PATCH 0487/3599] [test/test_socks] Test with local SOCKS servers --- .gitignore | 1 + .travis.yml | 3 +++ devscripts/install_srelay.sh | 8 +++++++ test/test_socks.py | 42 +++++++++++++++++++++++++++++++++--- 4 files changed, 51 insertions(+), 3 deletions(-) create mode 100755 devscripts/install_srelay.sh diff --git a/.gitignore b/.gitignore index 0e7128551..d5f216b5f 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ test/local_parameters.json youtube-dl.zsh .idea .idea/* +tmp/ diff --git a/.travis.yml b/.travis.yml index cc21fae8f..998995845 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,9 @@ python: - "3.4" - "3.5" sudo: false +install: + - bash ./devscripts/install_srelay.sh + - export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6 script: nosetests test --verbose notifications: email: diff --git a/devscripts/install_srelay.sh b/devscripts/install_srelay.sh new file mode 100755 index 000000000..33ce8a3f7 --- /dev/null +++ b/devscripts/install_srelay.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +mkdir -p tmp && cd tmp +wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz +tar zxvf srelay-0.4.8b6.tar.gz +cd srelay-0.4.8b6 +./configure +make diff --git a/test/test_socks.py b/test/test_socks.py index 92574c6fd..dc9b8d276 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -8,11 +8,20 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import (FakeYDL, get_params) -from youtube_dl.compat import compat_urllib_request +import random +import subprocess + +from test.helper import ( + FakeYDL, + get_params, +) +from youtube_dl.compat import ( + compat_str, + compat_urllib_request, +) -class TestSocks(unittest.TestCase): +class TestMultipleSocks(unittest.TestCase): @staticmethod def _check_params(attrs): params = get_params() @@ -67,5 +76,32 @@ class TestSocks(unittest.TestCase): params['secondary_server_ip']) +class TestSocks(unittest.TestCase): + def setUp(self): + self.port = random.randint(49152, 65535) + self.server_process = subprocess.Popen([ + 'srelay', '-f', '-i', '127.0.0.1:%d' % self.port], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + def tearDown(self): + self.server_process.terminate() + self.server_process.communicate() + + def _get_ip(self, protocol): + ydl = FakeYDL({ + 'proxy': '%s://127.0.0.1:%d' % (protocol, self.port), + }) + return ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8') + + def test_socks4(self): + self.assertTrue(isinstance(self._get_ip('socks4'), compat_str)) + + def test_socks4a(self): + self.assertTrue(isinstance(self._get_ip('socks4a'), compat_str)) + + def test_socks5(self): + self.assertTrue(isinstance(self._get_ip('socks5'), compat_str)) + + if __name__ == '__main__': unittest.main() From fa5cb8d0212918657cb58b4d5791ed3de831bd74 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 8 May 2016 15:14:56 +0800 Subject: [PATCH 0488/3599] [socks] Remove a superfluous clause --- youtube_dl/socks.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index a5b27fea7..fd49d7435 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -210,8 +210,6 @@ class sockssocket(socket.socket): if status != SOCKS5_USER_AUTH_SUCCESS: self.close() raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE) - elif method == Socks5Auth.AUTH_NONE: - pass def _setup_socks5(self, address): destaddr, port = address From 6ddb4888d2610df3bbb5024440caddde50fe9ad8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 8 May 2016 15:15:58 +0800 Subject: [PATCH 0489/3599] [options] Update --proxy description for SOCKS proxies --- youtube_dl/options.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index d1f8d1331..38efd292d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -188,7 +188,10 @@ def parseOpts(overrideArguments=None): network.add_option( '--proxy', dest='proxy', default=None, metavar='URL', - help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') + help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable experimental ' + 'SOCKS proxy, specify a proper scheme. For example ' + 'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") ' + 'for direct connection') network.add_option( '--socket-timeout', dest='socket_timeout', type=float, default=None, metavar='SECONDS', From c2876afafef392220cdb2baebace1d6d533f8d63 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 8 May 2016 15:16:32 +0800 Subject: [PATCH 0490/3599] [test/test_socks] Use a different port range Seems on Travis CI, ports in the original range are often used. --- test/test_socks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_socks.py b/test/test_socks.py index dc9b8d276..d07003ceb 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -78,7 +78,7 @@ class TestMultipleSocks(unittest.TestCase): class TestSocks(unittest.TestCase): def setUp(self): - self.port = random.randint(49152, 65535) + self.port = random.randint(20000, 30000) self.server_process = subprocess.Popen([ 'srelay', '-f', '-i', '127.0.0.1:%d' % self.port], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) From 28b4f73620c82e7007b3154e4d5f437cf6fb2608 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 10 May 2016 09:08:08 +0200 Subject: [PATCH 0491/3599] release 2016.05.10 --- .github/ISSUE_TEMPLATE.md | 6 +++--- README.md | 8 +++++--- docs/supportedsites.md | 11 +++++++++-- youtube_dl/version.py | 2 +- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a26ff1de4..1fb878b59 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.01 +[debug] youtube-dl version 2016.05.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/README.md b/README.md index 50acb26a0..4ef6b6d5a 100644 --- a/README.md +++ b/README.md @@ -85,9 +85,11 @@ which means you can modify it, redistribute it or use it however you like. --no-color Do not emit color codes in output ## Network Options: - --proxy URL Use the specified HTTP/HTTPS proxy. Pass in - an empty string (--proxy "") for direct - connection + --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. + To enable experimental SOCKS proxy, specify + a proper scheme. For example + socks5://127.0.0.1:1080/. Pass in an empty + string (--proxy "") for direct connection --socket-timeout SECONDS Time to wait before giving up, in seconds --source-address IP Client-side IP address to bind to (experimental) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9fb43671f..de84e5c84 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -77,6 +77,7 @@ - **Bild**: Bild.de - **BiliBili** - **BioBioChileTV** + - **BIQLE** - **BleacherReport** - **BleacherReportCMS** - **blinkx** @@ -145,6 +146,7 @@ - **culturebox.francetvinfo.fr** - **CultureUnplugged** - **CWTV** + - **DailyMail** - **dailymotion** - **dailymotion:playlist** - **dailymotion:user** @@ -325,6 +327,7 @@ - **limelight** - **limelight:channel** - **limelight:channel_list** + - **LiTV** - **LiveLeak** - **livestream** - **livestream:original** @@ -374,6 +377,8 @@ - **mtvservices:embedded** - **MuenchenTV**: münchen.tv - **MusicPlayOn** + - **mva**: Microsoft Virtual Academy videos + - **mva:course**: Microsoft Virtual Academy courses - **Mwave** - **MwaveMeetGreet** - **MySpace** @@ -463,7 +468,8 @@ - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **pcmag** - **People** - - **Periscope**: Periscope + - **periscope**: Periscope + - **periscope:user**: Periscope user videos - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** @@ -700,6 +706,7 @@ - **Vessel** - **Vesti**: Вести.Ru - **Vevo** + - **VevoPlaylist** - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - **Vice** @@ -772,7 +779,7 @@ - **WSJ**: Wall Street Journal - **XBef** - **XboxClips** - - **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me + - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To - **XHamster** - **XHamsterEmbed** - **xiami:album**: 虾米音乐 - 专辑 diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 551160897..45e40c0d1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.01' +__version__ = '2016.05.10' From 702ccf2dc08603fed98d2672f86af1a0e300d83e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 10 May 2016 15:58:25 +0800 Subject: [PATCH 0492/3599] [compat] Rename shlex_quote and remove unused subprocess_check_output --- youtube_dl/compat.py | 19 +++---------------- youtube_dl/postprocessor/execafterdownload.py | 4 ++-- youtube_dl/utils.py | 4 ++-- 3 files changed, 7 insertions(+), 20 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index e48c761a6..1392361a1 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -341,9 +341,9 @@ except ImportError: # Python 2 return parsed_result try: - from shlex import quote as shlex_quote + from shlex import quote as compat_shlex_quote except ImportError: # Python < 3.3 - def shlex_quote(s): + def compat_shlex_quote(s): if re.match(r'^[-_\w./]+$', s): return s else: @@ -466,18 +466,6 @@ else: print(s) -try: - subprocess_check_output = subprocess.check_output -except AttributeError: - def subprocess_check_output(*args, **kwargs): - assert 'input' not in kwargs - p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs) - output, _ = p.communicate() - ret = p.poll() - if ret: - raise subprocess.CalledProcessError(ret, p.args, output=output) - return output - if sys.version_info < (3, 0) and sys.platform == 'win32': def compat_getpass(prompt, *args, **kwargs): if isinstance(prompt, compat_str): @@ -635,6 +623,7 @@ __all__ = [ 'compat_parse_qs', 'compat_print', 'compat_setenv', + 'compat_shlex_quote', 'compat_shlex_split', 'compat_socket_create_connection', 'compat_str', @@ -656,7 +645,5 @@ __all__ = [ 'compat_urlretrieve', 'compat_xml_parse_error', 'compat_xpath', - 'shlex_quote', - 'subprocess_check_output', 'workaround_optparse_bug9161', ] diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py index 74f66d669..90630c2d7 100644 --- a/youtube_dl/postprocessor/execafterdownload.py +++ b/youtube_dl/postprocessor/execafterdownload.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import subprocess from .common import PostProcessor -from ..compat import shlex_quote +from ..compat import compat_shlex_quote from ..utils import PostProcessingError @@ -17,7 +17,7 @@ class ExecAfterDownloadPP(PostProcessor): if '{}' not in cmd: cmd += ' {}' - cmd = cmd.replace('{}', shlex_quote(information['filepath'])) + cmd = cmd.replace('{}', compat_shlex_quote(information['filepath'])) self._downloader.to_screen('[exec] Executing command: %s' % cmd) retCode = subprocess.call(cmd, shell=True) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index dbac38b55..e8b09e9db 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -42,6 +42,7 @@ from .compat import ( compat_http_client, compat_kwargs, compat_parse_qs, + compat_shlex_quote, compat_socket_create_connection, compat_str, compat_struct_pack, @@ -52,7 +53,6 @@ from .compat import ( compat_urllib_request, compat_urlparse, compat_xpath, - shlex_quote, ) from .socks import ( @@ -1977,7 +1977,7 @@ def ytdl_is_updateable(): def args_to_str(args): # Get a short string representation for a subprocess command - return ' '.join(shlex_quote(a) for a in args) + return ' '.join(compat_shlex_quote(a) for a in args) def error_to_compat_str(err): From e73b9c65e279f283b28d14be5b7173eae46d4364 Mon Sep 17 00:00:00 2001 From: teemuy <z0rs4m37tAlL> Date: Wed, 11 May 2016 18:10:30 +0300 Subject: [PATCH 0493/3599] Bugfix: Allow colons in custom HTTP header values. --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index cbd84c3af..740a1904b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -69,7 +69,7 @@ def _real_main(argv=None): for h in opts.headers: if h.find(':', 1) < 0: parser.error('wrong header formatting, it should be key:value, not "%s"' % h) - key, value = h.split(':', 2) + key, value = h.split(':', 1) if opts.verbose: write_string('[debug] Adding header from command line option %s:%s\n' % (key, value)) std_headers[key] = value From e0741fd4496c85ef447e72df935cb6edd1af53ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 11 May 2016 22:03:30 +0600 Subject: [PATCH 0494/3599] [__init__] Simplify colon presence check --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 740a1904b..5df965191 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -67,7 +67,7 @@ def _real_main(argv=None): # Custom HTTP headers if opts.headers is not None: for h in opts.headers: - if h.find(':', 1) < 0: + if ':' not in h: parser.error('wrong header formatting, it should be key:value, not "%s"' % h) key, value = h.split(':', 1) if opts.verbose: From 4540515cb3daa0716fa94e54cacb566ef1461ab3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 12 May 2016 18:48:27 +0800 Subject: [PATCH 0495/3599] [iqiyi] Fix 1080P extraction (closes #9446) --- youtube_dl/extractor/iqiyi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index ffb8008ce..ddcb3c916 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -505,7 +505,10 @@ class IqiyiIE(InfoExtractor): 'enc': md5_text(enc_key + tail), 'qyid': _uuid, 'tn': random.random(), - 'um': 0, + # In iQiyi's flash player, um is set to 1 if there's a logged user + # Some 1080P formats are only available with a logged user. + # Here force um=1 to trick the iQiyi server + 'um': 1, 'authkey': md5_text(md5_text('') + tail), 'k_tag': 1, } From 778a1ccca7d6cce06faf17867f20b87883d84e98 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 12 May 2016 19:48:48 +0800 Subject: [PATCH 0496/3599] =?UTF-8?q?[utils]=20Add=20=C5=92=20and=20=C5=93?= =?UTF-8?q?=20found=20in=20French=20to=20ACCENT=5FCHARS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #9463 --- test/test_utils.py | 4 ++-- youtube_dl/utils.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 5702ffa97..ca254779f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -155,8 +155,8 @@ class TestUtil(unittest.TestCase): self.assertTrue(sanitize_filename(':', restricted=True) != '') self.assertEqual(sanitize_filename( - 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', restricted=True), - 'AAAAAAAECEEEEIIIIDNOOOOOOUUUUYPssaaaaaaaeceeeeiiiionoooooouuuuypy') + 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', restricted=True), + 'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy') def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e8b09e9db..6592c8ec2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -104,9 +104,9 @@ KNOWN_EXTENSIONS = ( 'f4f', 'f4m', 'm3u8', 'smil') # needed for sanitizing filenames in restricted mode -ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', - itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'], - 'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy'))) +ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', + itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOO', ['OE'], 'UUUUYP', ['ss'], + 'aaaaaa', ['ae'], 'ceeeeiiiionoooooo', ['oe'], 'uuuuypy'))) def preferredencoding(): From 7e8ddca1bb10068356d1ec43cf66e7627b76fce7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 12 May 2016 19:56:58 +0800 Subject: [PATCH 0497/3599] [vevo] Delay the georestriction check to prevent false alerts Fixes #9408 --- youtube_dl/extractor/vevo.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index c0632cd6a..388b4debe 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -213,19 +213,17 @@ class VevoIE(VevoBaseIE): formats = [] if not video_info: - if response and response.get('statusCode') != 909: + try: + self._initialize_api(video_id) + except ExtractorError: ytid = response.get('errorInfo', {}).get('ytid') if ytid: self.report_warning( 'Video is geoblocked, trying with the YouTube video %s' % ytid) return self.url_result(ytid, 'Youtube', ytid) - if 'statusMessage' in response: - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, response['statusMessage']), expected=True) - raise ExtractorError('Unable to extract videos') + raise - self._initialize_api(video_id) video_info = self._call_api( 'video/%s' % video_id, video_id, 'Downloading api video info', 'Failed to download video info') From 1b405bb47d91119cc612a90d26f27f2b93f7c7b4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 3 May 2016 18:06:50 +0800 Subject: [PATCH 0498/3599] [downloader/f4m] Tolerate truncate segments when testing Replaces #9216 Fixes #9214 and test_Bloomberg partially --- youtube_dl/downloader/f4m.py | 42 +++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 3d9337afa..314def4cb 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -23,26 +23,38 @@ from ..utils import ( ) +class DataTruncatedError(Exception): + pass + + class FlvReader(io.BytesIO): """ Reader for Flv files The file format is documented in https://www.adobe.com/devnet/f4v.html """ + def read_bytes(self, n): + data = self.read(n) + if len(data) < n: + raise DataTruncatedError( + 'FlvReader error: need %d bytes while only %d bytes got' % ( + n, len(data))) + return data + # Utility functions for reading numbers and strings def read_unsigned_long_long(self): - return compat_struct_unpack('!Q', self.read(8))[0] + return compat_struct_unpack('!Q', self.read_bytes(8))[0] def read_unsigned_int(self): - return compat_struct_unpack('!I', self.read(4))[0] + return compat_struct_unpack('!I', self.read_bytes(4))[0] def read_unsigned_char(self): - return compat_struct_unpack('!B', self.read(1))[0] + return compat_struct_unpack('!B', self.read_bytes(1))[0] def read_string(self): res = b'' while True: - char = self.read(1) + char = self.read_bytes(1) if char == b'\x00': break res += char @@ -53,18 +65,18 @@ class FlvReader(io.BytesIO): Read a box and return the info as a tuple: (box_size, box_type, box_data) """ real_size = size = self.read_unsigned_int() - box_type = self.read(4) + box_type = self.read_bytes(4) header_end = 8 if size == 1: real_size = self.read_unsigned_long_long() header_end = 16 - return real_size, box_type, self.read(real_size - header_end) + return real_size, box_type, self.read_bytes(real_size - header_end) def read_asrt(self): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) quality_entry_count = self.read_unsigned_char() # QualityEntryCount for i in range(quality_entry_count): @@ -85,7 +97,7 @@ class FlvReader(io.BytesIO): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) # time scale self.read_unsigned_int() @@ -119,7 +131,7 @@ class FlvReader(io.BytesIO): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) self.read_unsigned_int() # BootstrapinfoVersion # Profile,Live,Update,Reserved @@ -374,7 +386,17 @@ class F4mFD(FragmentFD): down.close() reader = FlvReader(down_data) while True: - _, box_type, box_data = reader.read_box_info() + try: + _, box_type, box_data = reader.read_box_info() + except DataTruncatedError: + if test: + # In tests, segments may be truncated, and thus + # FlvReader may not be able to parse the whole + # chunk. If so, write the segment as is + # See https://github.com/rg3/youtube-dl/issues/9214 + dest_stream.write(down_data) + break + raise if box_type == b'mdat': dest_stream.write(box_data) break From a3fa6024d676ec20a06fe618f5c3d6e064f49336 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 12 May 2016 20:05:43 +0800 Subject: [PATCH 0499/3599] [bloomberg] Fix test_Bloomberg In this test case, sometimes HLS is the best format while sometimes HDS is. To prevent occasional test failures, force HDS to be the best format. In the past, testing against HDS formats causes the same error as #9214, which is fixed as #9377 landed. --- youtube_dl/extractor/bloomberg.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 13343bc25..bd538be50 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -17,6 +17,9 @@ class BloombergIE(InfoExtractor): 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', 'description': 'md5:a8ba0302912d03d246979735c17d2761', }, + 'params': { + 'format': 'best[format_id^=hds]', + }, }, { 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', 'only_matching': True, From f388f616c1f2ad9d2c906c4183cf996c845b2858 Mon Sep 17 00:00:00 2001 From: TRox1972 <archcr8@gmail.com> Date: Thu, 12 May 2016 16:48:12 +0200 Subject: [PATCH 0500/3599] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4ef6b6d5a..a2febab2c 100644 --- a/README.md +++ b/README.md @@ -417,7 +417,7 @@ which means you can modify it, redistribute it or use it however you like. # CONFIGURATION -You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. +You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory: ``` From 7581bfc958c8de77adbf8a502564d2263d17479d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 12 May 2016 18:57:53 +0800 Subject: [PATCH 0501/3599] [utils] Unquote crendentials passed to SOCKS proxies Fixes #9450 --- youtube_dl/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6592c8ec2..d6f94f8cd 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -50,6 +50,7 @@ from .compat import ( compat_urllib_parse, compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, + compat_urllib_parse_unquote_plus, compat_urllib_request, compat_urlparse, compat_xpath, @@ -886,7 +887,8 @@ def make_socks_conn_class(base_class, socks_proxy): socks_type, url_components.hostname, url_components.port or 1080, True, # Remote DNS - url_components.username, url_components.password + compat_urllib_parse_unquote_plus(url_components.username), + compat_urllib_parse_unquote_plus(url_components.password), ) class SocksConnection(base_class): From 0db3a66162cf1059dbfccd60db350596f7c5b469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 12 May 2016 23:57:52 +0600 Subject: [PATCH 0502/3599] [twitch] Skip dead tests --- youtube_dl/extractor/twitch.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 36ee1adff..68f50487b 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -171,6 +171,7 @@ class TwitchVideoIE(TwitchItemBaseIE): 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG', }, 'playlist_mincount': 12, + 'skip': 'HTTP Error 404: Not Found', } @@ -187,6 +188,7 @@ class TwitchChapterIE(TwitchItemBaseIE): 'title': 'ACRL Off Season - Sports Cars @ Nordschleife', }, 'playlist_mincount': 3, + 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361', 'only_matching': True, @@ -368,6 +370,7 @@ class TwitchBookmarksIE(TwitchPlaylistBaseIE): 'title': 'Ognos', }, 'playlist_mincount': 3, + 'skip': 'HTTP Error 404: Not Found', } def _extract_playlist_page(self, response): From 0df79d552a6d528ac5bb1a9cce99199aafe79144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 13 May 2016 00:14:30 +0600 Subject: [PATCH 0503/3599] [twitch:bookmarks] Remove extractor Bookmarks no longer available --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/twitch.py | 26 -------------------------- 2 files changed, 27 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a0bb3d4c2..f2bd4fe97 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -834,7 +834,6 @@ from .twitch import ( TwitchVodIE, TwitchProfileIE, TwitchPastBroadcastsIE, - TwitchBookmarksIE, TwitchStreamIE, ) from .twitter import ( diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 68f50487b..f7b98e190 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -357,32 +357,6 @@ class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE): } -class TwitchBookmarksIE(TwitchPlaylistBaseIE): - IE_NAME = 'twitch:bookmarks' - _VALID_URL = r'%s/(?P<id>[^/]+)/profile/bookmarks/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE - _PLAYLIST_URL = '%s/api/bookmark/?user=%%s&offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE - _PLAYLIST_TYPE = 'bookmarks' - - _TEST = { - 'url': 'http://www.twitch.tv/ognos/profile/bookmarks', - 'info_dict': { - 'id': 'ognos', - 'title': 'Ognos', - }, - 'playlist_mincount': 3, - 'skip': 'HTTP Error 404: Not Found', - } - - def _extract_playlist_page(self, response): - entries = [] - for bookmark in response.get('bookmarks', []): - video = bookmark.get('video') - if not video: - continue - entries.append(video['url']) - return entries - - class TwitchStreamIE(TwitchBaseIE): IE_NAME = 'twitch:stream' _VALID_URL = r'%s/(?P<id>[^/#?]+)/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE From d8d540cf0d11dbf7b3d9de611470fc7114c8d1ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 13 May 2016 02:07:12 +0600 Subject: [PATCH 0504/3599] [nrk] Rework extractor (Closes #9470) --- youtube_dl/extractor/nrk.py | 435 ++++++++++++++++-------------------- 1 file changed, 196 insertions(+), 239 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 51dfc27ac..f0fbdd8be 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -4,91 +4,224 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urlparse, - compat_urllib_parse_unquote, -) +from ..compat import compat_urllib_parse_unquote from ..utils import ( - determine_ext, ExtractorError, - float_or_none, + int_or_none, + parse_age_limit, parse_duration, - unified_strdate, ) -class NRKIE(InfoExtractor): - _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)' - - _TESTS = [ - { - 'url': 'http://www.nrk.no/video/PS*150533', - # MD5 is unstable - 'info_dict': { - 'id': '150533', - 'ext': 'flv', - 'title': 'Dompap og andre fugler i Piip-Show', - 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', - 'duration': 263, - } - }, - { - 'url': 'http://www.nrk.no/video/PS*154915', - # MD5 is unstable - 'info_dict': { - 'id': '154915', - 'ext': 'flv', - 'title': 'Slik høres internett ut når du er blind', - 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', - 'duration': 20, - } - }, - ] +class NRKBaseIE(InfoExtractor): + def _extract_formats(self, manifest_url, video_id, fatal=True): + return self._extract_f4m_formats( + manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', + video_id, f4m_id='hds', fatal=fatal) def _real_extract(self, url): video_id = self._match_id(url) data = self._download_json( - 'http://v8.psapi.nrk.no/mediaelement/%s' % video_id, - video_id, 'Downloading media JSON') + 'http://%s/mediaelement/%s' % (self._API_HOST, video_id), + video_id, 'Downloading mediaelement JSON') - media_url = data.get('mediaUrl') + title = data.get('fullTitle') or data.get('mainTitle') or data['title'] + video_id = data.get('id') or video_id - if not media_url: - if data['usageRights']['isGeoBlocked']: + entries = [] + + media_assets = data.get('mediaAssets') + if media_assets and isinstance(media_assets, list): + def video_id_and_title(idx): + return ((video_id, title) if len(media_assets) == 1 + else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx))) + for num, asset in enumerate(media_assets, 1): + asset_url = asset.get('url') + if not asset_url: + continue + formats = self._extract_formats(asset_url, video_id, fatal=False) + if not formats: + continue + self._sort_formats(formats) + entry_id, entry_title = video_id_and_title(num) + duration = parse_duration(asset.get('duration')) + subtitles = {} + for subtitle in ('webVtt', 'timedText'): + subtitle_url = asset.get('%sSubtitlesUrl' % subtitle) + if subtitle_url: + subtitles.setdefault('no', []).append({'url': subtitle_url}) + entries.append({ + 'id': asset.get('carrierId') or entry_id, + 'title': entry_title, + 'duration': duration, + 'subtitles': subtitles, + 'formats': formats, + }) + + if not entries: + media_url = data.get('mediaUrl') + if media_url: + formats = self._extract_formats(media_url, video_id) + self._sort_formats(formats) + duration = parse_duration(data.get('duration')) + entries = [{ + 'id': video_id, + 'title': title, + 'duration': duration, + 'formats': formats, + }] + + if not entries: + if data.get('usageRights', {}).get('isGeoBlocked'): raise ExtractorError( 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', expected=True) - if determine_ext(media_url) == 'f4m': - formats = self._extract_f4m_formats( - media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds') - self._sort_formats(formats) - else: - formats = [{ - 'url': media_url, - 'ext': 'flv', - }] - - duration = parse_duration(data.get('duration')) + conviva = data.get('convivaStatistics') or {} + series = conviva.get('seriesName') or data.get('seriesTitle') + episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') + thumbnails = None images = data.get('images') - if images: - thumbnails = images['webImages'] - thumbnails.sort(key=lambda image: image['pixelWidth']) - thumbnail = thumbnails[-1]['imageUrl'] - else: - thumbnail = None + if images and isinstance(images, dict): + web_images = images.get('webImages') + if isinstance(web_images, list): + thumbnails = [{ + 'url': image['imageUrl'], + 'width': int_or_none(image.get('width')), + 'height': int_or_none(image.get('height')), + } for image in web_images if image.get('imageUrl')] - return { - 'id': video_id, - 'title': data['title'], - 'description': data['description'], - 'duration': duration, - 'thumbnail': thumbnail, - 'formats': formats, + description = data.get('description') + + common_info = { + 'description': description, + 'series': series, + 'episode': episode, + 'age_limit': parse_age_limit(data.get('legalAge')), + 'thumbnails': thumbnails, } + vcodec = 'none' if data.get('mediaType') == 'Audio' else None + + # TODO: extract chapters when https://github.com/rg3/youtube-dl/pull/9409 is merged + + for entry in entries: + entry.update(common_info) + for f in entry['formats']: + f['vcodec'] = vcodec + + return self.playlist_result(entries, video_id, title, description) + + +class NRKIE(NRKBaseIE): + _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)' + _API_HOST = 'v8.psapi.nrk.no' + _TESTS = [{ + # video + 'url': 'http://www.nrk.no/video/PS*150533', + # MD5 is unstable + 'info_dict': { + 'id': '150533', + 'ext': 'flv', + 'title': 'Dompap og andre fugler i Piip-Show', + 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', + 'duration': 263, + } + }, { + # audio + 'url': 'http://www.nrk.no/video/PS*154915', + # MD5 is unstable + 'info_dict': { + 'id': '154915', + 'ext': 'flv', + 'title': 'Slik høres internett ut når du er blind', + 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', + 'duration': 20, + } + }] + + +class NRKTVIE(NRKBaseIE): + IE_DESC = 'NRK TV and NRK Radio' + _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' + _API_HOST = 'psapi-we.nrk.no' + + _TESTS = [{ + 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', + 'info_dict': { + 'id': 'MUHH48000314', + 'ext': 'mp4', + 'title': '20 spørsmål', + 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', + 'upload_date': '20140523', + 'duration': 1741.52, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://tv.nrk.no/program/mdfp15000514', + 'info_dict': { + 'id': 'mdfp15000514', + 'ext': 'mp4', + 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting', + 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', + 'upload_date': '20140524', + 'duration': 4605.08, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # single playlist video + 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', + 'md5': 'adbd1dbd813edaf532b0a253780719c2', + 'info_dict': { + 'id': 'MSPO40010515-part2', + 'ext': 'flv', + 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', + 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'upload_date': '20150106', + }, + 'skip': 'Only works from Norway', + }, { + 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', + 'playlist': [{ + 'md5': '9480285eff92d64f06e02a5367970a7a', + 'info_dict': { + 'id': 'MSPO40010515-part1', + 'ext': 'flv', + 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', + 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'upload_date': '20150106', + }, + }, { + 'md5': 'adbd1dbd813edaf532b0a253780719c2', + 'info_dict': { + 'id': 'MSPO40010515-part2', + 'ext': 'flv', + 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', + 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'upload_date': '20150106', + }, + }], + 'info_dict': { + 'id': 'MSPO40010515', + 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', + 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', + 'upload_date': '20150106', + 'duration': 6947.52, + }, + 'skip': 'Only works from Norway', + }, { + 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', + 'only_matching': True, + }] + class NRKPlaylistIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)' @@ -159,179 +292,3 @@ class NRKSkoleIE(InfoExtractor): nrk_id = self._search_regex(r'data-nrk-id=["\'](\d+)', webpage, 'nrk id') return self.url_result('nrk:%s' % nrk_id) - - -class NRKTVIE(InfoExtractor): - IE_DESC = 'NRK TV and NRK Radio' - _VALID_URL = r'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' - - _TESTS = [ - { - 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', - 'info_dict': { - 'id': 'MUHH48000314', - 'ext': 'mp4', - 'title': '20 spørsmål', - 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', - 'upload_date': '20140523', - 'duration': 1741.52, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'https://tv.nrk.no/program/mdfp15000514', - 'info_dict': { - 'id': 'mdfp15000514', - 'ext': 'mp4', - 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting', - 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', - 'upload_date': '20140524', - 'duration': 4605.08, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - # single playlist video - 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', - 'md5': 'adbd1dbd813edaf532b0a253780719c2', - 'info_dict': { - 'id': 'MSPO40010515-part2', - 'ext': 'flv', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', - }, - 'skip': 'Only works from Norway', - }, - { - 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', - 'playlist': [ - { - 'md5': '9480285eff92d64f06e02a5367970a7a', - 'info_dict': { - 'id': 'MSPO40010515-part1', - 'ext': 'flv', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', - }, - }, - { - 'md5': 'adbd1dbd813edaf532b0a253780719c2', - 'info_dict': { - 'id': 'MSPO40010515-part2', - 'ext': 'flv', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', - }, - }, - ], - 'info_dict': { - 'id': 'MSPO40010515', - 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', - 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', - 'duration': 6947.5199999999995, - }, - 'skip': 'Only works from Norway', - }, - { - 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', - 'only_matching': True, - } - ] - - def _extract_f4m(self, manifest_url, video_id): - return self._extract_f4m_formats( - manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id, f4m_id='hds') - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - part_id = mobj.group('part_id') - base_url = mobj.group('baseurl') - - webpage = self._download_webpage(url, video_id) - - title = self._html_search_meta( - 'title', webpage, 'title') - description = self._html_search_meta( - 'description', webpage, 'description') - - thumbnail = self._html_search_regex( - r'data-posterimage="([^"]+)"', - webpage, 'thumbnail', fatal=False) - upload_date = unified_strdate(self._html_search_meta( - 'rightsfrom', webpage, 'upload date', fatal=False)) - duration = float_or_none(self._html_search_regex( - r'data-duration="([^"]+)"', - webpage, 'duration', fatal=False)) - - # playlist - parts = re.findall( - r'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage) - if parts: - entries = [] - for current_part_id, stream_url, part_title in parts: - if part_id and current_part_id != part_id: - continue - video_part_id = '%s-part%s' % (video_id, current_part_id) - formats = self._extract_f4m(stream_url, video_part_id) - entries.append({ - 'id': video_part_id, - 'title': part_title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'formats': formats, - }) - if part_id: - if entries: - return entries[0] - else: - playlist = self.playlist_result(entries, video_id, title, description) - playlist.update({ - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'duration': duration, - }) - return playlist - - formats = [] - - f4m_url = re.search(r'data-media="([^"]+)"', webpage) - if f4m_url: - formats.extend(self._extract_f4m(f4m_url.group(1), video_id)) - - m3u8_url = re.search(r'data-hls-media="([^"]+)"', webpage) - if m3u8_url: - formats.extend(self._extract_m3u8_formats(m3u8_url.group(1), video_id, 'mp4', m3u8_id='hls')) - self._sort_formats(formats) - - subtitles_url = self._html_search_regex( - r'data-subtitlesurl\s*=\s*(["\'])(?P<url>.+?)\1', - webpage, 'subtitle URL', default=None, group='url') - subtitles = {} - if subtitles_url: - subtitles['no'] = [{ - 'ext': 'ttml', - 'url': compat_urlparse.urljoin(base_url, subtitles_url), - }] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - } From b9e7bc55da1c1275737b356efadc06435b8bfa2c Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 12 May 2016 22:45:54 +0100 Subject: [PATCH 0505/3599] [mgtv] extract http formats --- youtube_dl/extractor/mgtv.py | 43 ++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index a14d176a5..9fbc74f5d 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -11,7 +11,7 @@ class MGTVIE(InfoExtractor): _TEST = { 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', - 'md5': '', + 'md5': '1bdadcf760a0b90946ca68ee9a2db41a', 'info_dict': { 'id': '3116640', 'ext': 'mp4', @@ -20,15 +20,6 @@ class MGTVIE(InfoExtractor): 'duration': 7461, 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - 'skip_download': True, # m3u8 download - }, - } - - _FORMAT_MAP = { - '标清': ('Standard', 0), - '高清': ('High', 1), - '超清': ('SuperHigh', 2), } def _real_extract(self, url): @@ -40,17 +31,27 @@ class MGTVIE(InfoExtractor): formats = [] for idx, stream in enumerate(api_data['stream']): - format_name = stream.get('name') - format_id, preference = self._FORMAT_MAP.get(format_name, (None, None)) - format_info = self._download_json( - stream['url'], video_id, - note='Download video info for format %s' % format_id or '#%d' % idx) - formats.append({ - 'format_id': format_id, - 'url': format_info['info'], - 'ext': 'mp4', # These are m3u8 playlists - 'preference': preference, - }) + stream_url = stream.get('url') + if not stream_url: + continue + tbr = int_or_none(self._search_regex( + r'(\d+)\.mp4', stream_url, 'tbr', default=None)) + + def extract_format(stream_url, format_id, idx, query={}): + format_info = self._download_json( + stream_url, video_id, + note='Download video info for format %s' % format_id or '#%d' % idx, query=query) + return { + 'format_id': format_id, + 'url': format_info['info'], + 'ext': 'mp4', + 'tbr': tbr, + } + + formats.append(extract_format( + stream_url, 'hls-%d' % tbr if tbr else None, idx * 2)) + formats.append(extract_format(stream_url.replace( + '/playlist.m3u8', ''), 'http-%d' % tbr if tbr else None, idx * 2 + 1, {'pno': 1031})) self._sort_formats(formats) return { From 99d79b8692ae8981aff91cf5b1475516b60eb765 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 13 May 2016 05:21:45 +0100 Subject: [PATCH 0506/3599] [ustudio] add support ustudio app/embed urls --- youtube_dl/extractor/ustudio.py | 66 +++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ustudio.py b/youtube_dl/extractor/ustudio.py index cafc082b6..3484a2046 100644 --- a/youtube_dl/extractor/ustudio.py +++ b/youtube_dl/extractor/ustudio.py @@ -6,10 +6,12 @@ from .common import InfoExtractor from ..utils import ( int_or_none, unified_strdate, + unescapeHTML, ) class UstudioIE(InfoExtractor): + IE_NAME = 'ustudio' _VALID_URL = r'https?://(?:(?:www|v1)\.)?ustudio\.com/video/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)' _TEST = { 'url': 'http://ustudio.com/video/Uxu2my9bgSph/san_francisco_golden_gate_bridge', @@ -27,9 +29,7 @@ class UstudioIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') + video_id, display_id = re.match(self._VALID_URL, url).groups() config = self._download_xml( 'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id, @@ -37,7 +37,7 @@ class UstudioIE(InfoExtractor): def extract(kind): return [{ - 'url': item.attrib['url'], + 'url': unescapeHTML(item.attrib['url']), 'width': int_or_none(item.get('width')), 'height': int_or_none(item.get('height')), } for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')] @@ -65,3 +65,61 @@ class UstudioIE(InfoExtractor): 'uploader': uploader, 'formats': formats, } + + +class UstudioEmbedIE(InfoExtractor): + IE_NAME = 'ustudio:embed' + _VALID_URL = r'https?://(?:(?:app|embed)\.)?ustudio\.com/embed/(?P<uid>[^/]+)/(?P<id>[^/]+)' + _TEST = { + 'url': 'http://app.ustudio.com/embed/DeN7VdYRDKhP/Uw7G1kMCe65T', + 'md5': '47c0be52a09b23a7f40de9469cec58f4', + 'info_dict': { + 'id': 'Uw7G1kMCe65T', + 'ext': 'mp4', + 'title': '5 Things IT Should Know About Video', + 'description': 'md5:93d32650884b500115e158c5677d25ad', + 'uploader_id': 'DeN7VdYRDKhP', + } + } + + def _real_extract(self, url): + uploader_id, video_id = re.match(self._VALID_URL, url).groups() + video_data = self._download_json( + 'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id, video_id), + video_id)['videos'][0] + title = video_data['name'] + + formats = [] + for ext, qualities in video_data.get('transcodes', {}).items(): + for quality in qualities: + quality_url = quality.get('url') + if not quality_url: + continue + height = int_or_none(quality.get('height')) + formats.append({ + 'format_id': '%s-%dp' % (ext, height) if height else ext, + 'url': quality_url, + 'width': int_or_none(quality.get('width')), + 'height': height, + }) + self._sort_formats(formats) + + thumbnails = [] + for image in video_data.get('images', []): + image_url = image.get('url') + if not image_url: + continue + thumbnails.append({ + 'url': image_url, + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'duration': int_or_none(video_data.get('duration')), + 'uploader_id': uploader_id, + 'tags': video_data.get('keywords'), + 'thumbnails': thumbnails, + 'formats': formats, + } From cdf32ff15d6fc9d1902bfb3ed10a582070d20cd9 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 13 May 2016 05:25:32 +0100 Subject: [PATCH 0507/3599] [extractors] add import for UstudioEmbedIE --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f2bd4fe97..50d2204f2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -851,7 +851,10 @@ from .unistra import UnistraIE from .urort import UrortIE from .usatoday import USATodayIE from .ustream import UstreamIE, UstreamChannelIE -from .ustudio import UstudioIE +from .ustudio import ( + UstudioIE, + UstudioEmbedIE, +) from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE From 18cf6381f6b140431f3a747fc2d222be08ab2e23 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 13 May 2016 08:05:28 +0100 Subject: [PATCH 0508/3599] [nrk] extract m3u8 formats --- youtube_dl/extractor/nrk.py | 39 +++++++++++++++---------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index f0fbdd8be..7532f40c1 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -15,9 +15,14 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): def _extract_formats(self, manifest_url, video_id, fatal=True): - return self._extract_f4m_formats( + formats = [] + formats.extend(self._extract_f4m_formats( manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', - video_id, f4m_id='hds', fatal=fatal) + video_id, f4m_id='hds', fatal=fatal)) + formats.extend(self._extract_m3u8_formats(manifest_url.replace( + 'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'), + video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=fatal)) + return formats def _real_extract(self, url): video_id = self._match_id(url) @@ -121,10 +126,10 @@ class NRKIE(NRKBaseIE): _TESTS = [{ # video 'url': 'http://www.nrk.no/video/PS*150533', - # MD5 is unstable + 'md5': '2f7f6eeb2aacdd99885f355428715cfa', 'info_dict': { 'id': '150533', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Dompap og andre fugler i Piip-Show', 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', 'duration': 263, @@ -150,32 +155,24 @@ class NRKTVIE(NRKBaseIE): _TESTS = [{ 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', + 'md5': '4e9ca6629f09e588ed240fb11619922a', 'info_dict': { - 'id': 'MUHH48000314', + 'id': 'MUHH48000314AA', 'ext': 'mp4', - 'title': '20 spørsmål', + 'title': '20 spørsmål 23.05.2014', 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', - 'upload_date': '20140523', 'duration': 1741.52, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, }, { 'url': 'https://tv.nrk.no/program/mdfp15000514', + 'md5': '43d0be26663d380603a9cf0c24366531', 'info_dict': { - 'id': 'mdfp15000514', + 'id': 'MDFP15000514CA', 'ext': 'mp4', - 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting', - 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', - 'upload_date': '20140524', + 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014', + 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db', 'duration': 4605.08, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, }, { # single playlist video 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', @@ -185,7 +182,6 @@ class NRKTVIE(NRKBaseIE): 'ext': 'flv', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', }, 'skip': 'Only works from Norway', }, { @@ -197,7 +193,6 @@ class NRKTVIE(NRKBaseIE): 'ext': 'flv', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', }, }, { 'md5': 'adbd1dbd813edaf532b0a253780719c2', @@ -206,14 +201,12 @@ class NRKTVIE(NRKBaseIE): 'ext': 'flv', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', }, }], 'info_dict': { 'id': 'MSPO40010515', 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', - 'upload_date': '20150106', 'duration': 6947.52, }, 'skip': 'Only works from Norway', From ad55e101651edc732acac22cfb25d276d6c8bdca Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 13 May 2016 08:35:38 +0100 Subject: [PATCH 0509/3599] [brightcove] change the protocol for m3u8 formats to m3u8_native --- youtube_dl/extractor/brightcove.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index f0781fc27..fc7fc5b16 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -307,9 +307,10 @@ class BrightcoveLegacyIE(InfoExtractor): playlist_title=playlist_info['mediaCollectionDTO']['displayName']) def _extract_video_info(self, video_info): + video_id = compat_str(video_info['id']) publisher_id = video_info.get('publisherId') info = { - 'id': compat_str(video_info['id']), + 'id': video_id, 'title': video_info['displayName'].strip(), 'description': video_info.get('shortDescription'), 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), @@ -331,7 +332,8 @@ class BrightcoveLegacyIE(InfoExtractor): url_comp = compat_urllib_parse_urlparse(url) if url_comp.path.endswith('.m3u8'): formats.extend( - self._extract_m3u8_formats(url, info['id'], 'mp4')) + self._extract_m3u8_formats( + url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) continue elif 'akamaihd.net' in url_comp.netloc: # This type of renditions are served through @@ -365,7 +367,7 @@ class BrightcoveLegacyIE(InfoExtractor): a_format.update({ 'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''), 'ext': 'mp4', - 'protocol': 'm3u8', + 'protocol': 'm3u8_native', }) formats.append(a_format) @@ -395,7 +397,7 @@ class BrightcoveLegacyIE(InfoExtractor): return ad_info if 'url' not in info and not info.get('formats'): - raise ExtractorError('Unable to extract video url for %s' % info['id']) + raise ExtractorError('Unable to extract video url for %s' % video_id) return info @@ -527,7 +529,7 @@ class BrightcoveNewIE(InfoExtractor): if not src: continue formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', m3u8_id='hls', fatal=False)) + src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif source_type == 'application/dash+xml': if not src: continue From cc1028aa6d27aeec39617d1ff8d2edcf1ee989d7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 13 May 2016 18:11:08 +0800 Subject: [PATCH 0510/3599] [openload] Fix extraction (closes #9472) --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 456561bcc..5049b870e 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -100,7 +100,7 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) code = self._search_regex( - r'<video[^>]+>\s*<script[^>]+>([^<]+)</script>', + r'</video>\s*</div>\s*<script[^>]+>([^<]+)</script>', webpage, 'JS code') decoded = self.openload_decode(code) From f196508f7b872963d13bcff94c0105d743322f71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 13 May 2016 22:19:00 +0600 Subject: [PATCH 0511/3599] [imdb] Relax _VALID_URL (Closes #9481) --- youtube_dl/extractor/imdb.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 8bed8ccd0..203156229 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -12,9 +12,9 @@ from ..utils import ( class ImdbIE(InfoExtractor): IE_NAME = 'imdb' IE_DESC = 'Internet Movie Database trailers' - _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)' + _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.imdb.com/video/imdb/vi2524815897', 'info_dict': { 'id': '2524815897', @@ -22,7 +22,10 @@ class ImdbIE(InfoExtractor): 'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb', 'description': 'md5:9061c2219254e5d14e03c25c98e96a81', } - } + }, { + 'url': 'http://www.imdb.com/video/_/vi2524815897', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 96c2e3e909171d103beafd1fd88e9d6e215681c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 13 May 2016 23:25:05 +0600 Subject: [PATCH 0512/3599] [imdb] Improve extraction --- youtube_dl/extractor/imdb.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 203156229..3a2b7cec5 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -1,10 +1,10 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( + mimetype2ext, qualities, ) @@ -51,13 +51,27 @@ class ImdbIE(InfoExtractor): json_data = self._search_regex( r'<script[^>]+class="imdb-player-data"[^>]*?>(.*?)</script>', format_page, 'json data', flags=re.DOTALL) - info = json.loads(json_data) - format_info = info['videoPlayerObject']['video'] - f_id = format_info['ffname'] + info = self._parse_json(json_data, video_id, fatal=False) + if not info: + continue + format_info = info.get('videoPlayerObject', {}).get('video', {}) + if not format_info: + continue + video_info_list = format_info.get('videoInfoList') + if not video_info_list or not isinstance(video_info_list, list): + continue + video_info = video_info_list[0] + if not video_info or not isinstance(video_info, dict): + continue + video_url = video_info.get('videoUrl') + if not video_url: + continue + format_id = format_info.get('ffname') formats.append({ - 'format_id': f_id, - 'url': format_info['videoInfoList'][0]['videoUrl'], - 'quality': quality(f_id), + 'format_id': format_id, + 'url': video_url, + 'ext': mimetype2ext(video_info.get('videoMimeType')), + 'quality': quality(format_id), }) self._sort_formats(formats) From 0730be9022b415738e917c4cf72c2347ff0008e0 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 13 May 2016 20:24:36 +0100 Subject: [PATCH 0513/3599] [sina] fix extraction(fixes #1146) --- youtube_dl/extractor/sina.py | 124 ++++++++++++++++++++++++----------- 1 file changed, 84 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py index d03f1b1d4..8fc66732a 100644 --- a/youtube_dl/extractor/sina.py +++ b/youtube_dl/extractor/sina.py @@ -4,28 +4,35 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode -from ..utils import sanitized_Request +from ..utils import ( + HEADRequest, + ExtractorError, + int_or_none, + update_url_query, + qualities, + get_element_by_attribute, + clean_html, +) class SinaIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/ - ( - (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-)))) - | + _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/ + (?: + (?:view/|.*\#)(?P<video_id>\d+)| + .+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)| # This is used by external sites like Weibo - (api/sinawebApi/outplay.php/(?P<token>.+?)\.swf) + api/sinawebApi/outplay.php/(?P<token>.+?)\.swf ) ''' _TESTS = [ { - 'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898', - 'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f', + 'url': 'http://video.sina.com.cn/news/spj/topvideoes20160504/?opsubject_id=top1#250576622', + 'md5': 'd38433e2fc886007729735650ae4b3e9', 'info_dict': { - 'id': '110028898', - 'ext': 'flv', - 'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员', + 'id': '250576622', + 'ext': 'mp4', + 'title': '现场:克鲁兹宣布退选 特朗普将稳获提名', } }, { @@ -35,37 +42,74 @@ class SinaIE(InfoExtractor): 'ext': 'flv', 'title': '军方提高对朝情报监视级别', }, + 'skip': 'the page does not exist or has been deleted', + }, + { + 'url': 'http://video.sina.com.cn/view/250587748.html', + 'md5': '3d1807a25c775092aab3bc157fff49b4', + 'info_dict': { + 'id': '250587748', + 'ext': 'mp4', + 'title': '瞬间泪目:8年前汶川地震珍贵视频首曝光', + }, }, ] - def _extract_video(self, video_id): - data = compat_urllib_parse_urlencode({'vid': video_id}) - url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data, - video_id, 'Downloading video url') - image_page = self._download_webpage( - 'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data, - video_id, 'Downloading thumbnail info') - - return {'id': video_id, - 'url': url_doc.find('./durl/url').text, - 'ext': 'flv', - 'title': url_doc.find('./vname').text, - 'thumbnail': image_page.split('=')[1], - } - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - if mobj.group('token') is not None: - # The video id is in the redirected url - self.to_screen('Getting video id') - request = sanitized_Request(url) - request.get_method = lambda: 'HEAD' - (_, urlh) = self._download_webpage_handle(request, 'NA', False) - return self._real_extract(urlh.geturl()) - elif video_id is None: - pseudo_id = mobj.group('pseudo_id') - webpage = self._download_webpage(url, pseudo_id) - video_id = self._search_regex(r'vid:\'(\d+?)\'', webpage, 'video id') - return self._extract_video(video_id) + video_id = mobj.group('video_id') + if not video_id: + if mobj.group('token') is not None: + # The video id is in the redirected url + self.to_screen('Getting video id') + request = HEADRequest(url) + (_, urlh) = self._download_webpage_handle(request, 'NA', False) + return self._real_extract(urlh.geturl()) + else: + pseudo_id = mobj.group('pseudo_id') + webpage = self._download_webpage(url, pseudo_id) + error = get_element_by_attribute('class', 'errtitle', webpage) + if error: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, clean_html(error)), expected=True) + video_id = self._search_regex( + r"video_id\s*:\s*'(\d+)'", webpage, 'video id') + + video_data = self._download_json( + 'http://s.video.sina.com.cn/video/h5play', + video_id, query={'video_id': video_id}) + if video_data['code'] != 1: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, video_data['message']), expected=True) + else: + video_data = video_data['data'] + title = video_data['title'] + description = video_data.get('description') + if description: + description = description.strip() + + preference = qualities(['cif', 'sd', 'hd', 'fhd', 'ffd']) + formats = [] + for quality_id, quality in video_data.get('videos', {}).get('mp4', {}).items(): + file_api = quality.get('file_api') + file_id = quality.get('file_id') + if not file_api or not file_id: + continue + formats.append({ + 'format_id': quality_id, + 'url': update_url_query(file_api, {'vid': file_id}), + 'preference': preference(quality_id), + 'ext': 'mp4', + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': video_data.get('image'), + 'duration': int_or_none(video_data.get('length')), + 'timestamp': int_or_none(video_data.get('create_time')), + 'formats': formats, + } From 134c6ea856be472f253bffbe99b88546fe417806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 04:46:38 +0600 Subject: [PATCH 0514/3599] [YoutubeDL] Sanitize url for url and url_transparent extraction results --- youtube_dl/YoutubeDL.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 34eeb77c5..03a6a1890 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -720,6 +720,7 @@ class YoutubeDL(object): result_type = ie_result.get('_type', 'video') if result_type in ('url', 'url_transparent'): + ie_result['url'] = sanitize_url(ie_result['url']) extract_flat = self.params.get('extract_flat', False) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or extract_flat is True): From b5abf8614898cc728488d7ecc7a55a4c5c92758f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 04:53:14 +0600 Subject: [PATCH 0515/3599] [cinemassacre] Remove extractor (Closes #9457) It now uses jwplatform --- youtube_dl/extractor/cinemassacre.py | 119 --------------------------- youtube_dl/extractor/extractors.py | 1 - 2 files changed, 120 deletions(-) delete mode 100644 youtube_dl/extractor/cinemassacre.py diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py deleted file mode 100644 index 042c4f2f1..000000000 --- a/youtube_dl/extractor/cinemassacre.py +++ /dev/null @@ -1,119 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ExtractorError -from .screenwavemedia import ScreenwaveMediaIE - - -class CinemassacreIE(InfoExtractor): - _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' - _TESTS = [ - { - 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', - 'md5': 'fde81fbafaee331785f58cd6c0d46190', - 'info_dict': { - 'id': 'Cinemassacre-19911', - 'ext': 'mp4', - 'upload_date': '20121110', - 'title': '“Angry Video Game Nerd: The Movie” – Trailer', - 'description': 'md5:fb87405fcb42a331742a0dce2708560b', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', - 'md5': 'd72f10cd39eac4215048f62ab477a511', - 'info_dict': { - 'id': 'Cinemassacre-521be8ef82b16', - 'ext': 'mp4', - 'upload_date': '20131002', - 'title': 'The Mummy’s Hand (1940)', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - # Youtube embedded video - 'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/', - 'md5': 'ec9838a5520ef5409b3e4e42fcb0a3b9', - 'info_dict': { - 'id': 'OEVzPCY2T-g', - 'ext': 'webm', - 'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles', - 'upload_date': '20061207', - 'uploader': 'Cinemassacre', - 'uploader_id': 'JamesNintendoNerd', - 'description': 'md5:784734696c2b8b7f4b8625cc799e07f6', - } - }, - { - # Youtube embedded video - 'url': 'http://cinemassacre.com/2006/09/01/mckids/', - 'md5': '7393c4e0f54602ad110c793eb7a6513a', - 'info_dict': { - 'id': 'FnxsNhuikpo', - 'ext': 'webm', - 'upload_date': '20060901', - 'uploader': 'Cinemassacre Extra', - 'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53', - 'uploader_id': 'Cinemassacre', - 'title': 'AVGN: McKids', - } - }, - { - 'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/', - 'md5': '1376908e49572389e7b06251a53cdd08', - 'info_dict': { - 'id': 'Cinemassacre-555779690c440', - 'ext': 'mp4', - 'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!', - 'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays', - 'upload_date': '20150525', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d') - - webpage = self._download_webpage(url, display_id) - - playerdata_url = self._search_regex( - [ - ScreenwaveMediaIE.EMBED_PATTERN, - r'<iframe[^>]+src="(?P<url>(?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', - ], - webpage, 'player data URL', default=None, group='url') - if not playerdata_url: - raise ExtractorError('Unable to find player data') - - video_title = self._html_search_regex( - r'<title>(?P<title>.+?)\|', webpage, 'title') - video_description = self._html_search_regex( - r'<div class="entry-content">(?P<description>.+?)</div>', - webpage, 'description', flags=re.DOTALL, fatal=False) - video_thumbnail = self._og_search_thumbnail(webpage) - - return { - '_type': 'url_transparent', - 'display_id': display_id, - 'title': video_title, - 'description': video_description, - 'upload_date': video_date, - 'thumbnail': video_thumbnail, - 'url': playerdata_url, - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 50d2204f2..b6f4ccc5d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -124,7 +124,6 @@ from .chirbit import ( ChirbitProfileIE, ) from .cinchcast import CinchcastIE -from .cinemassacre import CinemassacreIE from .cliprs import ClipRsIE from .clipfish import ClipfishIE from .cliphunter import CliphunterIE From 98d560f205e6aeddc767844d142b00525a9eaff9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 14 May 2016 18:48:36 +0800 Subject: [PATCH 0516/3599] [test/test_socks] Skip SOCKS tests They occasional trigger errors or blocks (https://travis-ci.org/rg3/youtube-dl/jobs/130184883) --- test/test_socks.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/test_socks.py b/test/test_socks.py index d07003ceb..1e68eb0da 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -77,17 +77,28 @@ class TestMultipleSocks(unittest.TestCase): class TestSocks(unittest.TestCase): + _SKIP_SOCKS_TEST = True + def setUp(self): + if self._SKIP_SOCKS_TEST: + return + self.port = random.randint(20000, 30000) self.server_process = subprocess.Popen([ 'srelay', '-f', '-i', '127.0.0.1:%d' % self.port], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) def tearDown(self): + if self._SKIP_SOCKS_TEST: + return + self.server_process.terminate() self.server_process.communicate() def _get_ip(self, protocol): + if self._SKIP_SOCKS_TEST: + return '127.0.0.1' + ydl = FakeYDL({ 'proxy': '%s://127.0.0.1:%d' % (protocol, self.port), }) From 791ff52f753ee123426766aaa5320eb63a874b7b Mon Sep 17 00:00:00 2001 From: Jakub Wilk <jwilk@jwilk.net> Date: Sat, 14 May 2016 13:19:54 +0200 Subject: [PATCH 0517/3599] [teamcoco] Fix base64 regexp --- youtube_dl/extractor/teamcoco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index b49ab5f5b..79a778920 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -88,7 +88,7 @@ class TeamcocoIE(InfoExtractor): preload_codes = self._html_search_regex( r'(function.+)setTimeout\(function\(\)\{playlist', webpage, 'preload codes') - base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes) + base64_fragments = re.findall(r'"([a-zA-Z0-9+/=]+)"', preload_codes) base64_fragments.remove('init') def _check_sequence(cur_fragments): From 66e7ace17a36ed0f761ae620801e9e27d5c3cb3f Mon Sep 17 00:00:00 2001 From: Jakub Wilk <jwilk@jwilk.net> Date: Sat, 14 May 2016 13:41:41 +0200 Subject: [PATCH 0518/3599] Don't hardcode errno constant The value of ENOENT is architecture-dependent, so don't assume it's always 2. --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 03a6a1890..3917ca9dc 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -326,7 +326,7 @@ class YoutubeDL(object): ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: - if ose.errno == 2: + if ose.errno == errno.ENOENT: self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.') else: raise From bd1e484448c84904ce0d99fe05c3721053aa3c00 Mon Sep 17 00:00:00 2001 From: felix <felix.von.s@posteo.de> Date: Sun, 13 Mar 2016 12:29:15 +0100 Subject: [PATCH 0519/3599] [utils] js_to_json: various improvements now JS object literals like { /* " */ 0: ",]\xaa<\/p>", } will be correctly converted to JSON. --- test/test_utils.py | 12 ++++++++++++ youtube_dl/utils.py | 30 ++++++++++++++++-------------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index ca254779f..ab2842f3b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -640,6 +640,18 @@ class TestUtil(unittest.TestCase): on = js_to_json('{"abc": "def",}') self.assertEqual(json.loads(on), {'abc': 'def'}) + on = js_to_json('{ 0: /* " \n */ ",]" , }') + self.assertEqual(json.loads(on), {'0': ',]'}) + + on = js_to_json(r'["<p>x<\/p>"]') + self.assertEqual(json.loads(on), ['<p>x</p>']) + + on = js_to_json(r'["\xaa"]') + self.assertEqual(json.loads(on), ['\u00aa']) + + on = js_to_json("['a\\\nb']") + self.assertEqual(json.loads(on), ['ab']) + def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d6f94f8cd..52a20632f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1914,24 +1914,26 @@ def js_to_json(code): v = m.group(0) if v in ('true', 'false', 'null'): return v - if v.startswith('"'): - v = re.sub(r"\\'", "'", v[1:-1]) - elif v.startswith("'"): - v = v[1:-1] - v = re.sub(r"\\\\|\\'|\"", lambda m: { - '\\\\': '\\\\', - "\\'": "'", + elif v.startswith('/*') or v == ',': + return "" + + if v[0] in ("'", '"'): + v = re.sub(r'(?s)\\.|"', lambda m: { '"': '\\"', - }[m.group(0)], v) + "\\'": "'", + '\\\n': '', + '\\x': '\\u00', + }.get(m.group(0), m.group(0)), v[1:-1]) + return '"%s"' % v - res = re.sub(r'''(?x) - "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"| - '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'| - [a-zA-Z_][.a-zA-Z_0-9]* + return re.sub(r'''(?sx) + "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| + '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| + /\*.*?\*/|,(?=\s*[\]}])| + [a-zA-Z_][.a-zA-Z_0-9]*| + [0-9]+(?=\s*:) ''', fix_kv, code) - res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res) - return res def qualities(quality_ids): From 640eea0a0cf7ae589126f7762e1cfc7bdd2250d9 Mon Sep 17 00:00:00 2001 From: felix <felix.von.s@posteo.de> Date: Sun, 20 Mar 2016 12:17:57 +0100 Subject: [PATCH 0520/3599] [ora] minimise fragile regex shenanigans; recognise unsafespeech.com URLs --- youtube_dl/extractor/ora.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/ora.py b/youtube_dl/extractor/ora.py index 8545fb1b8..cfae71bcc 100644 --- a/youtube_dl/extractor/ora.py +++ b/youtube_dl/extractor/ora.py @@ -6,13 +6,14 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( get_element_by_attribute, + js_to_json, qualities, unescapeHTML, ) class OraTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ora\.tv/([^/]+/)*(?P<id>[^/\?#]+)' + _VALID_URL = r'https?://(?:www\.)?(ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)' _TEST = { 'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq', 'md5': 'fa33717591c631ec93b04b0e330df786', @@ -28,10 +29,13 @@ class OraTVIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_data = self._search_regex( - r'"(?:video|current)"\s*:\s*({[^}]+?})', webpage, 'current video') - m3u8_url = self._search_regex( - r'hls_stream"?\s*:\s*"([^"]+)', video_data, 'm3u8 url', None) + ora_meta = self._parse_json(self._search_regex( + r'(?s);\s*ora_meta = ({.*?});</script>', webpage, 'ora_meta'), display_id, + transform_source=lambda data: js_to_json(re.sub('":(document|\().*?(:false|\(\)),', '":null,', data))) + + video_data = ora_meta.get('video', ora_meta.get('current')) + m3u8_url = video_data['hls_stream'] + if m3u8_url: formats = self._extract_m3u8_formats( m3u8_url, display_id, 'mp4', 'm3u8_native', @@ -60,13 +64,11 @@ class OraTVIE(InfoExtractor): r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube') return { - 'id': self._search_regex( - r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id), + 'id': video_data.get('id', display_id), 'display_id': display_id, 'title': unescapeHTML(self._og_search_title(webpage)), 'description': get_element_by_attribute( 'class', 'video_txt_decription', webpage), - 'thumbnail': self._proto_relative_url(self._search_regex( - r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)), + 'thumbnail': self._proto_relative_url(video_data.get('thumb')), 'formats': formats, } From 89ac4a19e658203db85c6a1d4b267a2eeb47a38e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 20:39:58 +0600 Subject: [PATCH 0521/3599] [utils] Process non-base 10 integers in js_to_json --- test/test_utils.py | 19 +++++++++++++++++++ youtube_dl/utils.py | 12 ++++++++++++ 2 files changed, 31 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index ab2842f3b..26f66bff6 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -617,6 +617,15 @@ class TestUtil(unittest.TestCase): json_code = js_to_json(inp) self.assertEqual(json.loads(json_code), json.loads(inp)) + inp = '''{ + 0:{src:'skipped', type: 'application/dash+xml'}, + 1:{src:'skipped', type: 'application/vnd.apple.mpegURL'}, + }''' + self.assertEqual(js_to_json(inp), '''{ + "0":{"src":"skipped", "type": "application/dash+xml"}, + "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"} + }''') + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) @@ -652,6 +661,16 @@ class TestUtil(unittest.TestCase): on = js_to_json("['a\\\nb']") self.assertEqual(json.loads(on), ['ab']) + on = js_to_json('{0xff:0xff}') + self.assertEqual(json.loads(on), {'255': 255}) + + on = js_to_json('{077:077}') + self.assertEqual(json.loads(on), {'63': 63}) + + on = js_to_json('{42:42}') + self.assertEqual(json.loads(on), {'42': 42}) + + def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 52a20632f..25a9f33c0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1925,6 +1925,17 @@ def js_to_json(code): '\\x': '\\u00', }.get(m.group(0), m.group(0)), v[1:-1]) + INTEGER_TABLE = ( + (r'^(0[xX][0-9a-fA-F]+)', 16), + (r'^(0+[0-7]+)', 8), + ) + + for regex, base in INTEGER_TABLE: + im = re.match(regex, v) + if im: + i = int(im.group(1), base) + return '"%d":' % i if v.endswith(':') else '%d' % i + return '"%s"' % v return re.sub(r'''(?sx) @@ -1932,6 +1943,7 @@ def js_to_json(code): '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| /\*.*?\*/|,(?=\s*[\]}])| [a-zA-Z_][.a-zA-Z_0-9]*| + (?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) ''', fix_kv, code) From ca950f49e909baf6672034ffc2c1c2ee7133cf23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 20:45:18 +0600 Subject: [PATCH 0522/3599] [ora] Revert extraction to regexes It's less fragile than using js_to_json with ora js --- youtube_dl/extractor/ora.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/ora.py b/youtube_dl/extractor/ora.py index cfae71bcc..1d42be39b 100644 --- a/youtube_dl/extractor/ora.py +++ b/youtube_dl/extractor/ora.py @@ -6,15 +6,14 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( get_element_by_attribute, - js_to_json, qualities, unescapeHTML, ) class OraTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?(?:ora\.tv|unsafespeech\.com)/([^/]+/)*(?P<id>[^/\?#]+)' + _TESTS = [{ 'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq', 'md5': 'fa33717591c631ec93b04b0e330df786', 'info_dict': { @@ -23,19 +22,19 @@ class OraTVIE(InfoExtractor): 'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!', 'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1', } - } + }, { + 'url': 'http://www.unsafespeech.com/video/2016/5/10/student-self-censorship-and-the-thought-police-on-university-campuses-0_6622bnkppw4d', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - ora_meta = self._parse_json(self._search_regex( - r'(?s);\s*ora_meta = ({.*?});</script>', webpage, 'ora_meta'), display_id, - transform_source=lambda data: js_to_json(re.sub('":(document|\().*?(:false|\(\)),', '":null,', data))) - - video_data = ora_meta.get('video', ora_meta.get('current')) - m3u8_url = video_data['hls_stream'] - + video_data = self._search_regex( + r'"(?:video|current)"\s*:\s*({[^}]+?})', webpage, 'current video') + m3u8_url = self._search_regex( + r'hls_stream"?\s*:\s*"([^"]+)', video_data, 'm3u8 url', None) if m3u8_url: formats = self._extract_m3u8_formats( m3u8_url, display_id, 'mp4', 'm3u8_native', @@ -64,11 +63,13 @@ class OraTVIE(InfoExtractor): r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube') return { - 'id': video_data.get('id', display_id), + 'id': self._search_regex( + r'"id"\s*:\s*(\d+)', video_data, 'video id', default=display_id), 'display_id': display_id, 'title': unescapeHTML(self._og_search_title(webpage)), 'description': get_element_by_attribute( 'class', 'video_txt_decription', webpage), - 'thumbnail': self._proto_relative_url(video_data.get('thumb')), + 'thumbnail': self._proto_relative_url(self._search_regex( + r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)), 'formats': formats, } From 364cf465dd53e8006f5523c348f127f8df657bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 20:46:33 +0600 Subject: [PATCH 0523/3599] [test_utils] PEP 8 --- test/test_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 26f66bff6..520d32ff5 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -670,7 +670,6 @@ class TestUtil(unittest.TestCase): on = js_to_json('{42:42}') self.assertEqual(json.loads(on), {'42': 42}) - def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) From 5c86bfe70ff0048e59c6e890af14a055522fd3fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 23:35:03 +0600 Subject: [PATCH 0524/3599] [3qsdn] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/threeqsdn.py | 132 +++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 youtube_dl/extractor/threeqsdn.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b6f4ccc5d..2db3b3c3f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -766,6 +766,7 @@ from .thesixtyone import TheSixtyOneIE from .thestar import TheStarIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE +from .threeqsdn import ThreeQSDNIE from .tinypic import TinyPicIE from .tlc import TlcDeIE from .tmz import ( diff --git a/youtube_dl/extractor/threeqsdn.py b/youtube_dl/extractor/threeqsdn.py new file mode 100644 index 000000000..27a3de5c4 --- /dev/null +++ b/youtube_dl/extractor/threeqsdn.py @@ -0,0 +1,132 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + js_to_json, + mimetype2ext, +) + + +class ThreeQSDNIE(InfoExtractor): + IE_NAME = '3qsdn' + IE_DESC = '3Q SDN' + _VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _TESTS = [{ + # ondemand from http://www.philharmonie.tv/veranstaltung/26/ + 'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http', + 'md5': 'ab040e37bcfa2e0c079f92cb1dd7f6cd', + 'info_dict': { + 'id': '0280d6b9-1215-11e6-b427-0cc47a188158', + 'ext': 'mp4', + 'title': '0280d6b9-1215-11e6-b427-0cc47a188158', + 'is_live': False, + }, + 'expected_warnings': ['Failed to download MPD manifest'], + }, { + # live video stream + 'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true', + 'info_dict': { + 'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f', + 'ext': 'mp4', + 'title': 'd755d94b-4ab9-11e3-9162-0025907ad44f', + 'is_live': False, + }, + }, { + # live audio stream + 'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48', + 'only_matching': True, + }, { + # live audio stream with some 404 URLs + 'url': 'http://playout.3qsdn.com/ac5c3186-777a-11e2-9c30-9acf09e2db48', + 'only_matching': True, + }, { + # geo restricted with 'This content is not available in your country' + 'url': 'http://playout.3qsdn.com/d63a3ffe-75e8-11e2-9c30-9acf09e2db48', + 'only_matching': True, + }, { + # geo restricted with 'playout.3qsdn.com/forbidden' + 'url': 'http://playout.3qsdn.com/8e330f26-6ae2-11e2-a16a-9acf09e2db48', + 'only_matching': True, + }, { + # live video with rtmp link + 'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + js = self._download_webpage( + 'http://playout.3qsdn.com/%s' % video_id, video_id, + query={'js': 'true'}) + + if any(p in js for p in ( + '>This content is not available in your country', + 'playout.3qsdn.com/forbidden')): + self.raise_geo_restricted() + + stream_content = self._search_regex( + r'streamContent\s*:\s*(["\'])(?P<content>.+?)\1', js, + 'stream content', default='demand', group='content') + + live = stream_content == 'live' + + stream_type = self._search_regex( + r'streamType\s*:\s*(["\'])(?P<type>audio|video)\1', js, + 'stream type', default='video', group='type') + + formats = [] + urls = set() + + def extract_formats(item_url, item={}): + if not item_url or item_url in urls: + return + urls.add(item_url) + type_ = item.get('type') + ext = determine_ext(item_url, default_ext=None) + if type_ == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + item_url, video_id, mpd_id='mpd', fatal=False)) + elif type_ in ('application/vnd.apple.mpegURL', 'application/x-mpegurl') or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + item_url, video_id, 'mp4', + entry_protocol='m3u8' if live else 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + item_url, video_id, f4m_id='hds', fatal=False)) + else: + if not self._is_valid_url(item_url, video_id): + return + formats.append({ + 'url': item_url, + 'format_id': item.get('quality'), + 'ext': 'mp4' if item_url.startswith('rtsp') else mimetype2ext(type_) or ext, + 'vcodec': 'none' if stream_type == 'audio' else None, + }) + + for item_js in re.findall(r'({.*?\b(?:src|source)\s*:\s*["\'].+?})', js): + f = self._parse_json( + item_js, video_id, transform_source=js_to_json, fatal=False) + if not f: + continue + extract_formats(f.get('src'), f) + + # More relaxed version to collect additional URLs and acting + # as a future-proof fallback + for _, src in re.findall(r'\b(?:src|source)\s*:\s*(["\'])((?:https?|rtsp)://.+?)\1', js): + extract_formats(src) + + self._sort_formats(formats) + + title = self._live_title(video_id) if live else video_id + + return { + 'id': video_id, + 'title': title, + 'is_live': live, + 'formats': formats, + } From 5d39176f6de8bab1e019ead7cd497659f3fc1a94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 23:40:34 +0600 Subject: [PATCH 0525/3599] [extractor/generic:3qsdn] Add support for embeds --- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/threeqsdn.py | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0f1eb7fa6..b48ccfc97 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -61,6 +61,7 @@ from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE from .instagram import InstagramIE from .liveleak import LiveLeakIE +from .threeqsdn import ThreeQSDNIE class GenericIE(InfoExtractor): @@ -1983,6 +1984,11 @@ class GenericIE(InfoExtractor): if liveleak_url: return self.url_result(liveleak_url, 'LiveLeak') + # Look for 3Q SDN embeds + threeqsdn_url = ThreeQSDNIE._extract_url(webpage) + if threeqsdn_url: + return self.url_result(self._proto_relative_url(threeqsdn_url), ThreeQSDNIE.ie_key()) + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/threeqsdn.py b/youtube_dl/extractor/threeqsdn.py index 27a3de5c4..c77a07989 100644 --- a/youtube_dl/extractor/threeqsdn.py +++ b/youtube_dl/extractor/threeqsdn.py @@ -56,6 +56,13 @@ class ThreeQSDNIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % ThreeQSDNIE._VALID_URL, webpage) + if mobj: + return mobj.group('url') + def _real_extract(self, url): video_id = self._match_id(url) From cda6d47aad106a825f837c7a583fffc783c4b63b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 23:41:57 +0600 Subject: [PATCH 0526/3599] [utils] Simplify integer conversion in js_to_json --- youtube_dl/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 25a9f33c0..a637563cb 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1926,14 +1926,14 @@ def js_to_json(code): }.get(m.group(0), m.group(0)), v[1:-1]) INTEGER_TABLE = ( - (r'^(0[xX][0-9a-fA-F]+)', 16), - (r'^(0+[0-7]+)', 8), + (r'^0[xX][0-9a-fA-F]+', 16), + (r'^0+[0-7]+', 8), ) for regex, base in INTEGER_TABLE: im = re.match(regex, v) if im: - i = int(im.group(1), base) + i = int(im.group(0), base) return '"%d":' % i if v.endswith(':') else '%d' % i return '"%s"' % v From 6f41b2bcf16899f8c3f0ea705b2914cf1ae668a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 May 2016 23:58:25 +0600 Subject: [PATCH 0527/3599] [extractor/generic] Improve 3qsdn embeds support (Closes #9453) --- youtube_dl/extractor/generic.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b48ccfc97..a6b1e23e3 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1428,7 +1428,8 @@ class GenericIE(InfoExtractor): # Site Name | Video Title # Video Title - Tagline | Site Name # and so on and so forth; it's just not practical - video_title = self._html_search_regex( + video_title = self._og_search_title( + webpage, default=None) or self._html_search_regex( r'(?s)<title>(.*?)', webpage, 'video title', default='video') @@ -1446,6 +1447,9 @@ class GenericIE(InfoExtractor): video_uploader = self._search_regex( r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') + video_description = self._og_search_description(webpage, default=None) + video_thumbnail = self._og_search_thumbnail(webpage, default=None) + # Helper method def _playlist_from_matches(matches, getter=None, ie=None): urlrs = orderedSet( @@ -1987,7 +1991,15 @@ class GenericIE(InfoExtractor): # Look for 3Q SDN embeds threeqsdn_url = ThreeQSDNIE._extract_url(webpage) if threeqsdn_url: - return self.url_result(self._proto_relative_url(threeqsdn_url), ThreeQSDNIE.ie_key()) + return { + '_type': 'url_transparent', + 'ie_key': ThreeQSDNIE.ie_key(), + 'url': self._proto_relative_url(threeqsdn_url), + 'title': video_title, + 'description': video_description, + 'thumbnail': video_thumbnail, + 'uploader': video_uploader, + } def check_video(vurl): if YoutubeIE.suitable(vurl): From ed56f260399728f1975dd30f4c8ee110cf106d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 May 2016 03:34:35 +0600 Subject: [PATCH 0528/3599] [extractor/common] Improve name extraction for m3u8 formats --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 0843d89af..8a8c07226 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1139,11 +1139,14 @@ class InfoExtractor(object): if m3u8_id: format_id.append(m3u8_id) last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None + # Despite specification does not mention NAME attribute for + # EXT-X-STREAM-INF it still sometimes may be present + stream_name = last_info.get('NAME') or last_media_name # Bandwidth of live streams may differ over time thus making # format_id unpredictable. So it's better to keep provided # format_id intact. if not live: - format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats))) + format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats))) f = { 'format_id': '-'.join(format_id), 'url': format_url(line.strip()), From 69c9cc2716a4d076b023096c23b6f7646627824a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 15 May 2016 03:38:04 +0600 Subject: [PATCH 0529/3599] [xvideos] Extract html5 player formats (Closes #9495) --- youtube_dl/extractor/xvideos.py | 43 ++++++++++++++++----------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 710ad5041..1dfe031ca 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -8,7 +8,6 @@ from ..utils import ( clean_html, ExtractorError, determine_ext, - sanitized_Request, ) @@ -25,8 +24,6 @@ class XVideosIE(InfoExtractor): } } - _ANDROID_USER_AGENT = 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19' - def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -35,31 +32,34 @@ class XVideosIE(InfoExtractor): if mobj: raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) - video_url = compat_urllib_parse_unquote( - self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL')) video_title = self._html_search_regex( r'(.*?)\s+-\s+XVID', webpage, 'title') video_thumbnail = self._search_regex( r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False) - formats = [{ - 'url': video_url, - }] + formats = [] - android_req = sanitized_Request(url) - android_req.add_header('User-Agent', self._ANDROID_USER_AGENT) - android_webpage = self._download_webpage(android_req, video_id, fatal=False) + video_url = compat_urllib_parse_unquote(self._search_regex( + r'flv_url=(.+?)&', webpage, 'video URL', default='')) + if video_url: + formats.append({'url': video_url}) - if android_webpage is not None: - player_params_str = self._search_regex( - 'mobileReplacePlayerDivTwoQual\(([^)]+)\)', - android_webpage, 'player parameters', default='') - player_params = list(map(lambda s: s.strip(' \''), player_params_str.split(','))) - if player_params: - formats.extend([{ - 'url': param, - 'preference': -10, - } for param in player_params if determine_ext(param) == 'mp4']) + player_args = self._search_regex( + r'(?s)new\s+HTML5Player\((.+?)\)', webpage, ' html5 player', default=None) + if player_args: + for arg in player_args.split(','): + format_url = self._search_regex( + r'(["\'])(?P<url>https?://.+?)\1', arg, 'url', + default=None, group='url') + if not format_url: + continue + ext = determine_ext(format_url) + if ext == 'mp4': + formats.append({'url': format_url}) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) self._sort_formats(formats) @@ -67,7 +67,6 @@ class XVideosIE(InfoExtractor): 'id': video_id, 'formats': formats, 'title': video_title, - 'ext': 'flv', 'thumbnail': video_thumbnail, 'age_limit': 18, } From 79298173c5a957456cb17b2b26338a657f1aae1e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 15 May 2016 15:32:54 +0800 Subject: [PATCH 0530/3599] [utils] Fix getheader in urlhandle_detect_ext Fixes #7049, related to #9440 --- youtube_dl/utils.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a637563cb..24e74428b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2031,11 +2031,7 @@ def mimetype2ext(mt): def urlhandle_detect_ext(url_handle): - try: - url_handle.headers - getheader = lambda h: url_handle.headers[h] - except AttributeError: # Python < 3 - getheader = url_handle.info().getheader + getheader = url_handle.headers.get cd = getheader('Content-Disposition') if cd: From cec9727c7f6a0dad8b10a51f0a6581ac5a1dbe86 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 15 May 2016 15:35:31 +0800 Subject: [PATCH 0531/3599] [hearthisat] Detect invalid download links (fixes #9440) --- youtube_dl/extractor/hearthisat.py | 38 +++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/hearthisat.py b/youtube_dl/extractor/hearthisat.py index 7d8698655..ac42ef414 100644 --- a/youtube_dl/extractor/hearthisat.py +++ b/youtube_dl/extractor/hearthisat.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( HEADRequest, + KNOWN_EXTENSIONS, sanitized_Request, str_to_int, urlencode_postdata, @@ -17,7 +18,7 @@ from ..utils import ( class HearThisAtIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$' _PLAYLIST_URL = 'https://hearthis.at/playlist.php' - _TEST = { + _TESTS = [{ 'url': 'https://hearthis.at/moofi/dr-kreep', 'md5': 'ab6ec33c8fed6556029337c7885eb4e0', 'info_dict': { @@ -34,7 +35,25 @@ class HearThisAtIE(InfoExtractor): 'duration': 71, 'categories': ['Experimental'], } - } + }, { + # 'download' link redirects to the original webpage + 'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/', + 'md5': '5980ceb7c461605d30f1f039df160c6e', + 'info_dict': { + 'id': '811296', + 'ext': 'mp3', + 'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!', + 'description': 'Listen to DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix! by TwitchSF on hearthis.at - Dance', + 'upload_date': '20160328', + 'timestamp': 1459186146, + 'thumbnail': 're:^https?://.*\.jpg$', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'duration': 4360, + 'categories': ['Dance'], + }, + }] def _real_extract(self, url): m = re.match(self._VALID_URL, url) @@ -90,13 +109,14 @@ class HearThisAtIE(InfoExtractor): ext_handle = self._request_webpage( ext_req, display_id, note='Determining extension') ext = urlhandle_detect_ext(ext_handle) - formats.append({ - 'format_id': 'download', - 'vcodec': 'none', - 'ext': ext, - 'url': download_url, - 'preference': 2, # Usually better quality - }) + if ext in KNOWN_EXTENSIONS: + formats.append({ + 'format_id': 'download', + 'vcodec': 'none', + 'ext': ext, + 'url': download_url, + 'preference': 2, # Usually better quality + }) self._sort_formats(formats) return { From 5572d598a537998615c760ca06bd8d3894150c6a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 15 May 2016 15:44:04 +0800 Subject: [PATCH 0532/3599] [hearthisat] Update the first test --- youtube_dl/extractor/hearthisat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hearthisat.py b/youtube_dl/extractor/hearthisat.py index ac42ef414..256453882 100644 --- a/youtube_dl/extractor/hearthisat.py +++ b/youtube_dl/extractor/hearthisat.py @@ -27,7 +27,7 @@ class HearThisAtIE(InfoExtractor): 'title': 'Moofi - Dr. Kreep', 'thumbnail': 're:^https?://.*\.jpg$', 'timestamp': 1421564134, - 'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.', + 'description': 'Listen to Dr. Kreep by Moofi on hearthis.at - Modular, Eurorack, Mutable Intruments Braids, Valhalla-DSP', 'upload_date': '20150118', 'comment_count': int, 'view_count': int, From a0a81918f18252805b161e4f7d0dc4924b672948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 15 May 2016 22:07:51 +0600 Subject: [PATCH 0533/3599] [collegehumor] Remove extractor It now uses brightcove --- youtube_dl/extractor/collegehumor.py | 101 --------------------------- youtube_dl/extractor/extractors.py | 1 - 2 files changed, 102 deletions(-) delete mode 100644 youtube_dl/extractor/collegehumor.py diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py deleted file mode 100644 index 002b24037..000000000 --- a/youtube_dl/extractor/collegehumor.py +++ /dev/null @@ -1,101 +0,0 @@ -from __future__ import unicode_literals - -import json -import re - -from .common import InfoExtractor -from ..utils import int_or_none - - -class CollegeHumorIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$' - - _TESTS = [ - { - 'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', - 'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd', - 'info_dict': { - 'id': '6902724', - 'ext': 'mp4', - 'title': 'Comic-Con Cosplay Catastrophe', - 'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.", - 'age_limit': 13, - 'duration': 187, - }, - }, { - 'url': 'http://www.collegehumor.com/video/3505939/font-conference', - 'md5': '72fa701d8ef38664a4dbb9e2ab721816', - 'info_dict': { - 'id': '3505939', - 'ext': 'mp4', - 'title': 'Font Conference', - 'description': "This video wasn't long enough, so we made it double-spaced.", - 'age_limit': 10, - 'duration': 179, - }, - }, { - # embedded youtube video - 'url': 'http://www.collegehumor.com/embed/6950306', - 'info_dict': { - 'id': 'Z-bao9fg6Yc', - 'ext': 'mp4', - 'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!', - 'uploader': 'Mark Dice', - 'uploader_id': 'MarkDice', - 'description': 'md5:62c3dab9351fac7bb44b53b69511d87f', - 'upload_date': '20140127', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Youtube'], - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') - - jsonUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id + '.json' - data = json.loads(self._download_webpage( - jsonUrl, video_id, 'Downloading info JSON')) - vdata = data['video'] - if vdata.get('youtubeId') is not None: - return { - '_type': 'url', - 'url': vdata['youtubeId'], - 'ie_key': 'Youtube', - } - - AGE_LIMITS = {'nc17': 18, 'r': 18, 'pg13': 13, 'pg': 10, 'g': 0} - rating = vdata.get('rating') - if rating: - age_limit = AGE_LIMITS.get(rating.lower()) - else: - age_limit = None # None = No idea - - PREFS = {'high_quality': 2, 'low_quality': 0} - formats = [] - for format_key in ('mp4', 'webm'): - for qname, qurl in vdata.get(format_key, {}).items(): - formats.append({ - 'format_id': format_key + '_' + qname, - 'url': qurl, - 'format': format_key, - 'preference': PREFS.get(qname), - }) - self._sort_formats(formats) - - duration = int_or_none(vdata.get('duration'), 1000) - like_count = int_or_none(vdata.get('likes')) - - return { - 'id': video_id, - 'title': vdata['title'], - 'description': vdata.get('description'), - 'thumbnail': vdata.get('thumbnail'), - 'formats': formats, - 'age_limit': age_limit, - 'duration': duration, - 'like_count': like_count, - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2db3b3c3f..ca9d85e33 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -138,7 +138,6 @@ from .cnn import ( CNNBlogsIE, CNNArticleIE, ) -from .collegehumor import CollegeHumorIE from .collegerama import CollegeRamaIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comcarcoff import ComCarCoffIE From f7199423e542580cf8c30991d122673276113497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 16 May 2016 00:30:13 +0600 Subject: [PATCH 0534/3599] [groupon] Add support for Youtube embeds (Closes #9508) --- youtube_dl/extractor/groupon.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py index f6b69662b..1dd0a81cc 100644 --- a/youtube_dl/extractor/groupon.py +++ b/youtube_dl/extractor/groupon.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class GrouponIE(InfoExtractor): - _VALID_URL = r'https?://www\.groupon\.com/deals/(?P<id>[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?groupon\.com/deals/(?P<id>[^/?#&]+)' _TEST = { 'url': 'https://www.groupon.com/deals/bikram-yoga-huntington-beach-2#ooid=tubGNycTo_9Uxg82uESj4i61EYX8nyuf', @@ -15,18 +15,26 @@ class GrouponIE(InfoExtractor): }, 'playlist': [{ 'info_dict': { - 'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf', - 'ext': 'flv', - 'title': 'Bikram Yoga Huntington Beach | Orange County', + 'id': 'fk6OhWpXgIQ', + 'ext': 'mp4', + 'title': 'Bikram Yoga Huntington Beach | Orange County !tubGNycTo@9Uxg82uESj4i61EYX8nyuf', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', - 'duration': 44.961, + 'duration': 45, + 'upload_date': '20160405', + 'uploader_id': 'groupon', + 'uploader': 'Groupon', }, }], 'params': { - 'skip_download': 'HDS', + 'skip_download': True, } } + _PROVIDERS = { + 'ooyala': ('ooyala:%s', 'Ooyala'), + 'youtube': ('%s', 'Youtube'), + } + def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) @@ -36,12 +44,17 @@ class GrouponIE(InfoExtractor): videos = payload['carousel'].get('dealVideos', []) entries = [] for v in videos: - if v.get('provider') != 'OOYALA': + provider = v.get('provider') + video_id = v.get('media') or v.get('id') or v.get('baseURL') + if not provider or not video_id: + continue + url_pattern, ie_key = self._PROVIDERS.get(provider.lower()) + if not url_pattern: self.report_warning( '%s: Unsupported video provider %s, skipping video' % - (playlist_id, v.get('provider'))) + (playlist_id, provider)) continue - entries.append(self.url_result('ooyala:%s' % v['media'])) + entries.append(self.url_result(url_pattern % video_id, ie_key)) return { '_type': 'playlist', From 36755d9d694f818ce8f367ce7eb41374f194893d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 16 May 2016 17:25:47 +0200 Subject: [PATCH 0535/3599] release 2016.05.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 7 +++---- youtube_dl/version.py | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1fb878b59..7024fc729 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.10** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.10 +[debug] youtube-dl version 2016.05.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index de84e5c84..29db13883 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -6,6 +6,7 @@ - **22tracks:genre** - **22tracks:track** - **24video** + - **3qsdn**: 3Q SDN - **3sat** - **4tube** - **56.com** @@ -114,7 +115,6 @@ - **chirbit** - **chirbit:profile** - **Cinchcast** - - **Cinemassacre** - **Clipfish** - **cliphunter** - **ClipRs** @@ -128,7 +128,6 @@ - **CNN** - **CNNArticle** - **CNNBlogs** - - **CollegeHumor** - **CollegeRama** - **ComCarCoff** - **ComedyCentral** @@ -680,7 +679,6 @@ - **tvp.pl:Series** - **TVPlay**: TV3Play and related services - **Tweakers** - - **twitch:bookmarks** - **twitch:chapter** - **twitch:past_broadcasts** - **twitch:profile** @@ -698,7 +696,8 @@ - **USAToday** - **ustream** - **ustream:channel** - - **Ustudio** + - **ustudio** + - **ustudio:embed** - **Varzesh3** - **Vbox7** - **VeeHD** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 45e40c0d1..5a0fdd6ce 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.10' +__version__ = '2016.05.16' From cdd94c2eae6c6f0a627d457c3a73894a62eb86c5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 17 May 2016 14:38:15 +0800 Subject: [PATCH 0536/3599] [utils] Check for None values in SOCKS proxy Originally reported at https://github.com/rg3/youtube-dl/pull/9287#issuecomment-219617864 --- youtube_dl/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 24e74428b..ac60ba18c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -883,12 +883,17 @@ def make_socks_conn_class(base_class, socks_proxy): elif url_components.scheme.lower() == 'socks4a': socks_type = ProxyType.SOCKS4A + def unquote_if_non_empty(s): + if not s: + return s + return compat_urllib_parse_unquote_plus(s) + proxy_args = ( socks_type, url_components.hostname, url_components.port or 1080, True, # Remote DNS - compat_urllib_parse_unquote_plus(url_components.username), - compat_urllib_parse_unquote_plus(url_components.password), + unquote_if_non_empty(url_components.username), + unquote_if_non_empty(url_components.password), ) class SocksConnection(base_class): From 055f0d3d0636e343354a19cd558a3aac3cf31399 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 17 May 2016 15:38:57 +0800 Subject: [PATCH 0537/3599] [abcnews] Added a new extractor (closes #3992) Related: #6108, #8664, #9459 --- youtube_dl/extractor/abcnews.py | 135 +++++++++++++++++++++++++++++ youtube_dl/extractor/amp.py | 4 +- youtube_dl/extractor/extractors.py | 4 + 3 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 youtube_dl/extractor/abcnews.py diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py new file mode 100644 index 000000000..b61a6327c --- /dev/null +++ b/youtube_dl/extractor/abcnews.py @@ -0,0 +1,135 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import calendar +import re +import time + +from .amp import AMPIE +from .common import InfoExtractor +from ..compat import compat_urlparse + + +class AbcNewsVideoIE(AMPIE): + IE_NAME = 'abcnews:video' + _VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' + + _TESTS = [{ + 'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', + 'info_dict': { + 'id': '20411932', + 'ext': 'mp4', + 'display_id': 'week-exclusive-irans-foreign-minister-zarif', + 'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif', + 'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.', + 'duration': 180, + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + video_id = mobj.group('id') + info_dict = self._extract_feed_info( + 'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) + info_dict.update({ + 'id': video_id, + 'display_id': display_id, + }) + return info_dict + + +class AbcNewsIE(InfoExtractor): + IE_NAME = 'abcnews' + _VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)' + + _TESTS = [{ + 'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY', + 'info_dict': { + 'id': '10498713', + 'ext': 'flv', + 'display_id': 'dramatic-video-rare-death-job-america', + 'title': 'Occupational Hazards', + 'description': 'Nightline investigates the dangers that lurk at various jobs.', + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20100428', + 'timestamp': 1272412800, + }, + 'add_ie': ['AbcNewsVideo'], + }, { + 'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818', + 'info_dict': { + 'id': '39125818', + 'ext': 'mp4', + 'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016', + 'title': 'Justin Timberlake Drops Hints For Secret Single', + 'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.', + 'upload_date': '20160515', + 'timestamp': 1463329500, + }, + 'params': { + # m3u8 download + 'skip_download': True, + # The embedded YouTube video is blocked due to copyright issues + 'playlist_items': '1', + }, + 'add_ie': ['AbcNewsVideo'], + }, { + 'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + video_url = self._search_regex( + r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL') + full_video_url = compat_urlparse.urljoin(url, video_url) + + youtube_url = self._html_search_regex( + r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"', + webpage, 'YouTube URL', default=None) + + timestamp = None + date_str = self._html_search_regex( + r'<span[^>]+class="timestamp">([^<]+)</span>', + webpage, 'timestamp', fatal=False) + if date_str: + tz_offset = 0 + if date_str.endswith(' ET'): # Eastern Time + tz_offset = -5 + date_str = date_str[:-3] + date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p'] + for date_format in date_formats: + try: + timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format)) + except ValueError: + continue + if timestamp is not None: + timestamp -= tz_offset * 3600 + + entry = { + '_type': 'url_transparent', + 'ie_key': AbcNewsVideoIE.ie_key(), + 'url': full_video_url, + 'id': video_id, + 'display_id': display_id, + 'timestamp': timestamp, + } + + if youtube_url: + entries = [entry, self.url_result(youtube_url, 'Youtube')] + return self.playlist_result(entries) + + return entry diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py index 138fa0808..8545681be 100644 --- a/youtube_dl/extractor/amp.py +++ b/youtube_dl/extractor/amp.py @@ -52,7 +52,7 @@ class AMPIE(InfoExtractor): for media_data in media_content: media = media_data['@attributes'] media_type = media['type'] - if media_type == 'video/f4m': + if media_type in ('video/f4m', 'application/f4m+xml'): formats.extend(self._extract_f4m_formats( media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id, f4m_id='hds', fatal=False)) @@ -61,7 +61,7 @@ class AMPIE(InfoExtractor): media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)) else: formats.append({ - 'format_id': media_data['media-category']['@attributes']['label'], + 'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'), 'url': media['url'], 'tbr': int_or_none(media.get('bitrate')), 'filesize': int_or_none(media.get('fileSize')), diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ca9d85e33..861701f4c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -3,6 +3,10 @@ from __future__ import unicode_literals from .abc import ABCIE from .abc7news import Abc7NewsIE +from .abcnews import ( + AbcNewsIE, + AbcNewsVideoIE, +) from .academicearth import AcademicEarthCourseIE from .acast import ( ACastIE, From 15cda1ef774e9dbc538765f59dff5b10a492eca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 17 May 2016 23:46:47 +0600 Subject: [PATCH 0538/3599] [nfb] Fix uploader extraction --- youtube_dl/extractor/nfb.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index 51e4a34f7..234e49047 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -37,8 +37,7 @@ class NFBIE(InfoExtractor): uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"', page, 'director id', fatal=False) - uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>', - page, 'director name', fatal=False) + uploader = self._og_search_property('video:director', page, 'director name') request = sanitized_Request( 'https://www.nfb.ca/film/%s/player_config' % video_id, From 11e6a0b64130f9b4aea1a6115a3ebaad73f2f5e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 18 May 2016 00:25:15 +0600 Subject: [PATCH 0539/3599] [nfb] Modernize and extract subtitles --- youtube_dl/extractor/nfb.py | 110 +++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 46 deletions(-) diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index 234e49047..adcc636bc 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -2,8 +2,12 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - sanitized_Request, + clean_html, + determine_ext, + int_or_none, + qualities, urlencode_postdata, + xpath_text, ) @@ -16,12 +20,12 @@ class NFBIE(InfoExtractor): 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', 'info_dict': { 'id': 'qallunaat_why_white_people_are_funny', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Qallunaat! Why White People Are Funny ', - 'description': 'md5:836d8aff55e087d04d9f6df554d4e038', + 'description': 'md5:6b8e32dde3abf91e58857b174916620c', 'duration': 3128, + 'creator': 'Mark Sandiford', 'uploader': 'Mark Sandiford', - 'uploader_id': 'mark-sandiford', }, 'params': { # rtmp download @@ -31,64 +35,78 @@ class NFBIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - page = self._download_webpage( - 'https://www.nfb.ca/film/%s' % video_id, video_id, - 'Downloading film page') - uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"', - page, 'director id', fatal=False) - uploader = self._og_search_property('video:director', page, 'director name') - - request = sanitized_Request( + config = self._download_xml( 'https://www.nfb.ca/film/%s/player_config' % video_id, - urlencode_postdata({'getConfig': 'true'})) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') - request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') + video_id, 'Downloading player config XML', + data=urlencode_postdata({'getConfig': 'true'}), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf' + }) - config = self._download_xml(request, video_id, 'Downloading player config XML') - - title = None - description = None - thumbnail = None - duration = None - formats = [] - - def extract_thumbnail(media): - thumbnails = {} - for asset in media.findall('assets/asset'): - thumbnails[asset.get('quality')] = asset.find('default/url').text - if not thumbnails: - return None - if 'high' in thumbnails: - return thumbnails['high'] - return list(thumbnails.values())[0] + title, description, thumbnail, duration, uploader, author = [None] * 6 + thumbnails, formats = [[]] * 2 + subtitles = {} for media in config.findall('./player/stream/media'): if media.get('type') == 'posterImage': - thumbnail = extract_thumbnail(media) - elif media.get('type') == 'video': - duration = int(media.get('duration')) - title = media.find('title').text - description = media.find('description').text - # It seems assets always go from lower to better quality, so no need to sort + quality_key = qualities(('low', 'high')) + thumbnails = [] for asset in media.findall('assets/asset'): - for x in asset: + asset_url = xpath_text(asset, 'default/url', default=None) + if not asset_url: + continue + quality = asset.get('quality') + thumbnails.append({ + 'url': asset_url, + 'id': quality, + 'preference': quality_key(quality), + }) + elif media.get('type') == 'video': + title = xpath_text(media, 'title', fatal=True) + for asset in media.findall('assets/asset'): + quality = asset.get('quality') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', quality or '', 'height', default=None)) + for node in asset: + streamer = xpath_text(node, 'streamerURI', default=None) + if not streamer: + continue + play_path = xpath_text(node, 'url', default=None) + if not play_path: + continue formats.append({ - 'url': x.find('streamerURI').text, - 'app': x.find('streamerURI').text.split('/', 3)[3], - 'play_path': x.find('url').text, + 'url': streamer, + 'app': streamer.split('/', 3)[3], + 'play_path': play_path, 'rtmp_live': False, - 'ext': 'mp4', - 'format_id': '%s-%s' % (x.tag, asset.get('quality')), + 'ext': 'flv', + 'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag, + 'height': height, }) + self._sort_formats(formats) + description = clean_html(xpath_text(media, 'description')) + uploader = xpath_text(media, 'author') + duration = int_or_none(media.get('duration')) + for subtitle in media.findall('./subtitles/subtitle'): + subtitle_url = xpath_text(subtitle, 'url', default=None) + if not subtitle_url: + continue + lang = xpath_text(subtitle, 'lang', default='en') + subtitles.setdefault(lang, []).append({ + 'url': subtitle_url, + 'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(), + }) return { 'id': video_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'duration': duration, + 'creator': uploader, 'uploader': uploader, - 'uploader_id': uploader_id, 'formats': formats, + 'subtitles': subtitles, } From b78531a36abd765aa9c9df1dba1cf82dc23f8fec Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Wed, 18 May 2016 22:24:46 +0100 Subject: [PATCH 0540/3599] [formula1] Add new extractor(closes #3617) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/formula1.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 youtube_dl/extractor/formula1.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 861701f4c..efbe970fe 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -242,6 +242,7 @@ from .fktv import FKTVIE from .flickr import FlickrIE from .folketinget import FolketingetIE from .footyroom import FootyRoomIE +from .formula1 import Formula1IE from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py new file mode 100644 index 000000000..726393fcc --- /dev/null +++ b/youtube_dl/extractor/formula1.py @@ -0,0 +1,25 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class Formula1IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html' + _TEST = { + 'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html', + 'md5': '8c79e54be72078b26b89e0e111c0502b', + 'info_dict': { + 'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', + 'ext': 'flv', + 'title': 'Race highlights - Spain 2016', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + ooyala_embed_code = self._search_regex( + r'data-videoid="([^"]+)"', webpage, 'ooyala embed code') + return self.url_result( + 'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code) From 46bc9b7d7cea2e161670e65abe42ef01d39e8957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 19 May 2016 04:31:30 +0600 Subject: [PATCH 0541/3599] [utils] Allow None in remove_{start,end} --- test/test_utils.py | 12 ++++++++++++ youtube_dl/utils.py | 8 ++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 520d32ff5..a697232a8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -50,6 +50,8 @@ from youtube_dl.utils import ( sanitize_path, prepend_extension, replace_extension, + remove_start, + remove_end, remove_quotes, shell_quote, smuggle_url, @@ -215,6 +217,16 @@ class TestUtil(unittest.TestCase): self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') + def test_remove_start(self): + self.assertEqual(remove_start(None, 'A - '), None) + self.assertEqual(remove_start('A - B', 'A - '), 'B') + self.assertEqual(remove_start('B - A', 'A - '), 'B - A') + + def test_remove_end(self): + self.assertEqual(remove_end(None, ' - B'), None) + self.assertEqual(remove_end('A - B', ' - B'), 'A') + self.assertEqual(remove_end('B - A', ' - B'), 'B - A') + def test_remove_quotes(self): self.assertEqual(remove_quotes(None), None) self.assertEqual(remove_quotes('"'), '"') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ac60ba18c..5301d0740 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1549,15 +1549,11 @@ def setproctitle(title): def remove_start(s, start): - if s.startswith(start): - return s[len(start):] - return s + return s[len(start):] if s is not None and s.startswith(start) else s def remove_end(s, end): - if s.endswith(end): - return s[:-len(end)] - return s + return s[:-len(end)] if s is not None and s.endswith(end) else s def remove_quotes(s): From dd81769c62661d168fb87b896ffb8a80dacbe45b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 19 May 2016 04:34:19 +0600 Subject: [PATCH 0542/3599] [ndtv] Fix extraction --- youtube_dl/extractor/ndtv.py | 40 ++++++++++-------------------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/ndtv.py b/youtube_dl/extractor/ndtv.py index 2a1ca80df..96528f649 100644 --- a/youtube_dl/extractor/ndtv.py +++ b/youtube_dl/extractor/ndtv.py @@ -1,19 +1,18 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( - month_by_name, int_or_none, + remove_end, + unified_strdate, ) class NDTVIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)' _TEST = { - 'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710', + 'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710', 'md5': '39f992dbe5fb531c395d8bbedb1e5e88', 'info_dict': { 'id': '300710', @@ -22,7 +21,7 @@ class NDTVIE(InfoExtractor): 'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02', 'upload_date': '20131208', 'duration': 1327, - 'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg', + 'thumbnail': 're:https?://.*\.jpg', }, } @@ -30,36 +29,19 @@ class NDTVIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + title = remove_end(self._og_search_title(webpage), ' - NDTV') + filename = self._search_regex( r"__filename='([^']+)'", webpage, 'video filename') - video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % - filename) + video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename duration = int_or_none(self._search_regex( r"__duration='([^']+)'", webpage, 'duration', fatal=False)) - date_m = re.search(r'''(?x) - <p\s+class="vod_dateline">\s* - Published\s+On:\s* - (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+) - ''', webpage) - upload_date = None + upload_date = unified_strdate(self._html_search_meta( + 'publish-date', webpage, 'upload date', fatal=False)) - if date_m is not None: - month = month_by_name(date_m.group('monthname')) - if month is not None: - upload_date = '%s%02d%02d' % ( - date_m.group('year'), month, int(date_m.group('day'))) - - description = self._og_search_description(webpage) - READ_MORE = ' (Read more)' - if description.endswith(READ_MORE): - description = description[:-len(READ_MORE)] - - title = self._og_search_title(webpage) - TITLE_SUFFIX = ' - NDTV' - if title.endswith(TITLE_SUFFIX): - title = title[:-len(TITLE_SUFFIX)] + description = remove_end(self._og_search_description(webpage), ' (Read more)') return { 'id': video_id, From 8585dc4cdc735eb8a077dffb68affa81e1a98693 Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Thu, 19 May 2016 01:18:01 +0200 Subject: [PATCH 0543/3599] [Makefile] delete thumbnails --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5d7cd5a7e..d760e4576 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From a00129670390c241d097afd873b4ee226ca7d550 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 19 May 2016 18:18:03 +0100 Subject: [PATCH 0544/3599] [learnr] Add new extractor(closes #4284) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/learnr.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 youtube_dl/extractor/learnr.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index efbe970fe..74aba2d5c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -370,6 +370,7 @@ from .kuwo import ( ) from .la7 import LA7IE from .laola1tv import Laola1TvIE +from .learnr import LearnrIE from .lecture2go import Lecture2GoIE from .lemonde import LemondeIE from .leeco import ( diff --git a/youtube_dl/extractor/learnr.py b/youtube_dl/extractor/learnr.py new file mode 100644 index 000000000..1435e090e --- /dev/null +++ b/youtube_dl/extractor/learnr.py @@ -0,0 +1,33 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class LearnrIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript', + 'md5': '3719fdf0a68397f49899e82c308a89de', + 'info_dict': { + 'id': '51624', + 'ext': 'mp4', + 'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript', + 'description': 'md5:b36dbfa92350176cdf12b4d388485503', + 'uploader': 'LearnCode.academy', + 'uploader_id': 'learncodeacademy', + 'upload_date': '20131021', + }, + 'add_ie': ['Youtube'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + return { + '_type': 'url_transparent', + 'url': self._search_regex( + r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'), + 'id': video_id, + } From f6e588afc0b12ebec2bc65551e882e6d99467499 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 08:53:04 +0600 Subject: [PATCH 0545/3599] [24video] Fix description extraction --- youtube_dl/extractor/twentyfourvideo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index e03e2dbaa..4025edf02 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -47,7 +47,8 @@ class TwentyFourVideoIE(InfoExtractor): title = self._og_search_title(webpage) description = self._html_search_regex( - r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False) + r'<(p|span)[^>]+itemprop="description"[^>]*>(?P<description>[^<]+)</\1>', + webpage, 'description', fatal=False, group='description') thumbnail = self._og_search_thumbnail(webpage) duration = int_or_none(self._og_search_property( 'duration', webpage, 'duration', fatal=False)) From 52f7c75cff3d7f7923deda469f9d2a551742c193 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 20 May 2016 06:53:14 +0100 Subject: [PATCH 0546/3599] [cbc] extract http formats and update tests --- youtube_dl/extractor/cbc.py | 63 +++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 68a0633b6..581928f7d 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( + js_to_json, + smuggle_url, +) class CBCIE(InfoExtractor): @@ -12,57 +15,54 @@ class CBCIE(InfoExtractor): _TESTS = [{ # with mediaId 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs', + 'md5': '97e24d09672fc4cf56256d6faa6c25bc', 'info_dict': { 'id': '2682904050', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Don Cherry – All-Stars', 'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.', - 'timestamp': 1454475540, + 'timestamp': 1454463000, 'upload_date': '20160203', - }, - 'params': { - # rtmp download - 'skip_download': True, + 'uploader': 'CBCC-NEW', }, }, { # with clipId 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', 'info_dict': { 'id': '2487345465', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Robin Williams freestyles on 90 Minutes Live', 'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.', - 'upload_date': '19700101', + 'upload_date': '19780210', 'uploader': 'CBCC-NEW', - }, - 'params': { - # rtmp download - 'skip_download': True, + 'timestamp': 255977160, }, }, { # multiple iframes 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot', 'playlist': [{ + 'md5': '377572d0b49c4ce0c9ad77470e0b96b4', 'info_dict': { 'id': '2680832926', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'An Eagle\'s-Eye View Off Burrard Bridge', 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.', - 'upload_date': '19700101', + 'upload_date': '20160201', + 'timestamp': 1454342820, + 'uploader': 'CBCC-NEW', }, }, { + 'md5': '415a0e3f586113894174dfb31aa5bb1a', 'info_dict': { 'id': '2658915080', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Fly like an eagle!', 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower', - 'upload_date': '19700101', + 'upload_date': '20150315', + 'timestamp': 1426443984, + 'uploader': 'CBCC-NEW', }, }], - 'params': { - # rtmp download - 'skip_download': True, - }, }] @classmethod @@ -95,20 +95,23 @@ class CBCPlayerIE(InfoExtractor): 'url': 'http://www.cbc.ca/player/play/2683190193', 'info_dict': { 'id': '2683190193', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Gerry Runs a Sweat Shop', 'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0', - 'timestamp': 1455067800, + 'timestamp': 1455071400, 'upload_date': '20160210', - }, - 'params': { - # rtmp download - 'skip_download': True, + 'uploader': 'CBCC-NEW', }, } def _real_extract(self, url): video_id = self._match_id(url) - return self.url_result( - 'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id, - 'ThePlatformFeed', video_id) + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url( + 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true' % video_id, { + 'force_smil_url': True + }), + 'id': video_id, + } From 043dc9d36fea85a964bad3ec13f77d32c462115b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 20 May 2016 18:39:54 +0800 Subject: [PATCH 0547/3599] [cbc] Fix for old-styled URLs The URL http://www.cbc.ca/player/News/ID/2672225049/ (#6342) redirects to http://www.cbc.ca/player/play/2672224672, while youtube-dl wasn't able to handle it correctly. --- youtube_dl/extractor/cbc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 581928f7d..daf237ca8 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -11,7 +11,7 @@ from ..utils import ( class CBCIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)' _TESTS = [{ # with mediaId 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs', From ad96b4c8f56ba9873c62a2ce9916253f9b8a49ee Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 20 May 2016 19:02:53 +0800 Subject: [PATCH 0548/3599] [common] Extract audio formats in SMIL Found in http://www.cbc.ca/player/play/2657631896 Closes #5156 --- youtube_dl/extractor/common.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8a8c07226..9f22ee930 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1278,21 +1278,21 @@ class InfoExtractor(object): m3u8_count = 0 srcs = [] - videos = smil.findall(self._xpath_ns('.//video', namespace)) - for video in videos: - src = video.get('src') + media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace)) + for medium in media: + src = medium.get('src') if not src or src in srcs: continue srcs.append(src) - bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) - filesize = int_or_none(video.get('size') or video.get('fileSize')) - width = int_or_none(video.get('width')) - height = int_or_none(video.get('height')) - proto = video.get('proto') - ext = video.get('ext') + bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000) + filesize = int_or_none(medium.get('size') or medium.get('fileSize')) + width = int_or_none(medium.get('width')) + height = int_or_none(medium.get('height')) + proto = medium.get('proto') + ext = medium.get('ext') src_ext = determine_ext(src) - streamer = video.get('streamer') or base + streamer = medium.get('streamer') or base if proto == 'rtmp' or streamer.startswith('rtmp'): rtmp_count += 1 From 31a70191e730a2a963c8b2e4d19921cad573ad8a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 20 May 2016 19:04:50 +0800 Subject: [PATCH 0549/3599] [cbc] Add the test case from #5156 --- youtube_dl/extractor/cbc.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index daf237ca8..22d5e72d5 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -91,7 +91,7 @@ class CBCIE(InfoExtractor): class CBCPlayerIE(InfoExtractor): _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.cbc.ca/player/play/2683190193', 'info_dict': { 'id': '2683190193', @@ -102,7 +102,20 @@ class CBCPlayerIE(InfoExtractor): 'upload_date': '20160210', 'uploader': 'CBCC-NEW', }, - } + }, { + # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ + 'url': 'http://www.cbc.ca/player/play/2657631896', + 'md5': 'e5e708c34ae6fca156aafe17c43e8b75', + 'info_dict': { + 'id': '2657631896', + 'ext': 'mp3', + 'title': 'CBC Montreal is organizing its first ever community hackathon!', + 'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.', + 'timestamp': 1425704400, + 'upload_date': '20150307', + 'uploader': 'CBCC-NEW', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) From f0c96af9cb0edc69f9ba73d39e6e191994e31256 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 20:55:10 +0600 Subject: [PATCH 0550/3599] [wistia] Add alias and modernize --- youtube_dl/extractor/wistia.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 8b14840a2..478c42833 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -3,16 +3,16 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( ExtractorError, - sanitized_Request, int_or_none, ) class WistiaIE(InfoExtractor): - _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' - _API_URL = 'http://fast.wistia.com/embed/medias/{0:}.json' + _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)' + _API_URL = 'http://fast.wistia.com/embed/medias/%s.json' + _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s' - _TEST = { + _TESTS = [{ 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', 'md5': 'cafeb56ec0c53c18c97405eecb3133df', 'info_dict': { @@ -24,17 +24,25 @@ class WistiaIE(InfoExtractor): 'timestamp': 1386185018, 'duration': 117, }, - } + }, { + 'url': 'wistia:sh7fpupwlt', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - request = sanitized_Request(self._API_URL.format(video_id)) - request.add_header('Referer', url) # Some videos require this. - data_json = self._download_json(request, video_id) + data_json = self._download_json( + self._API_URL % video_id, video_id, + # Some videos require this. + headers={ + 'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id, + }) + if data_json.get('error'): - raise ExtractorError('Error while getting the playlist', - expected=True) + raise ExtractorError( + 'Error while getting the playlist', expected=True) + data = data_json['media'] title = data['name'] From 36ca2c55db7939aff2dc700523843a9a0f82ae2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 21:04:01 +0600 Subject: [PATCH 0551/3599] [wistia] Skip storyboard and improve extraction --- youtube_dl/extractor/wistia.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 478c42833..6eb94fcab 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -49,19 +49,23 @@ class WistiaIE(InfoExtractor): formats = [] thumbnails = [] for a in data['assets']: + aurl = a.get('url') + if not aurl: + continue astatus = a.get('status') atype = a.get('type') - if (astatus is not None and astatus != 2) or atype == 'preview': + if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'): continue elif atype in ('still', 'still_image'): thumbnails.append({ - 'url': a['url'], - 'resolution': '%dx%d' % (a['width'], a['height']), + 'url': aurl, + 'width': int_or_none(a.get('width')), + 'height': int_or_none(a.get('height')), }) else: formats.append({ 'format_id': atype, - 'url': a['url'], + 'url': aurl, 'tbr': int_or_none(a.get('bitrate')), 'vbr': int_or_none(a.get('opt_vbitrate')), 'width': int_or_none(a.get('width')), From 45f160a43c5f103af7a843f1159a1f6e8f498f0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 21:16:08 +0600 Subject: [PATCH 0552/3599] [wistia] Improve hls support --- youtube_dl/extractor/wistia.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 6eb94fcab..97139a35a 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -27,6 +27,10 @@ class WistiaIE(InfoExtractor): }, { 'url': 'wistia:sh7fpupwlt', 'only_matching': True, + }, { + # with hls video + 'url': 'wistia:807fafadvk', + 'only_matching': True, }] def _real_extract(self, url): @@ -63,6 +67,8 @@ class WistiaIE(InfoExtractor): 'height': int_or_none(a.get('height')), }) else: + aext = a.get('ext') + is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8' formats.append({ 'format_id': atype, 'url': aurl, @@ -73,7 +79,8 @@ class WistiaIE(InfoExtractor): 'filesize': int_or_none(a.get('size')), 'vcodec': a.get('codec'), 'container': a.get('container'), - 'ext': a.get('ext'), + 'ext': 'mp4' if is_m3u8 else aext, + 'protocol': 'm3u8' if is_m3u8 else None, 'preference': 1 if atype == 'original' else None, }) From 64413f7563eb7a89e06ede91fc135de73bc57db4 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 20 May 2016 16:20:05 +0100 Subject: [PATCH 0553/3599] [cbc] fix extraction for flv only videos(fixes #5309) --- youtube_dl/extractor/cbc.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index 22d5e72d5..ff663d079 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -28,6 +28,7 @@ class CBCIE(InfoExtractor): }, { # with clipId 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', + 'md5': '0274a90b51a9b4971fe005c63f592f12', 'info_dict': { 'id': '2487345465', 'ext': 'mp4', @@ -93,6 +94,7 @@ class CBCPlayerIE(InfoExtractor): _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.cbc.ca/player/play/2683190193', + 'md5': '64d25f841ddf4ddb28a235338af32e2c', 'info_dict': { 'id': '2683190193', 'ext': 'mp4', @@ -115,6 +117,19 @@ class CBCPlayerIE(InfoExtractor): 'upload_date': '20150307', 'uploader': 'CBCC-NEW', }, + }, { + # available only when we add `formats=MPEG4,FLV,MP3` to theplatform url + 'url': 'http://www.cbc.ca/player/play/2164402062', + 'md5': '17a61eb813539abea40618d6323a7f82', + 'info_dict': { + 'id': '2164402062', + 'ext': 'flv', + 'title': 'Cancer survivor four times over', + 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.', + 'timestamp': 1320410746, + 'upload_date': '20111104', + 'uploader': 'CBCC-NEW', + }, }] def _real_extract(self, url): @@ -123,7 +138,7 @@ class CBCPlayerIE(InfoExtractor): '_type': 'url_transparent', 'ie_key': 'ThePlatform', 'url': smuggle_url( - 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true' % video_id, { + 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, { 'force_smil_url': True }), 'id': video_id, From aa5957ac49aad5165ce9ab5b9403539d61a09dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 21:33:31 +0600 Subject: [PATCH 0554/3599] [extractor/generic] Add support for async wistia embeds (Closes #9549) --- youtube_dl/extractor/generic.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a6b1e23e3..632d7b5f0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1563,6 +1563,15 @@ class GenericIE(InfoExtractor): 'id': match.group('id') } + match = re.search( + r'''(?sx) + <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? + <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2 + ''', webpage) + if match: + return self.url_result(self._proto_relative_url( + 'wistia:%s' % match.group('id')), 'Wistia') + # Look for SVT player svt_url = SVTIE._extract_url(webpage) if svt_url: From 7ded6545edb18bb008e8277b42a21d60fb6cd653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 21:43:36 +0600 Subject: [PATCH 0555/3599] [extractor/generic] Add test for wistia standard embed --- youtube_dl/extractor/generic.py | 16 ++++++++++++++++ youtube_dl/extractor/wistia.py | 3 ++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 632d7b5f0..9883cde61 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -742,6 +742,22 @@ class GenericIE(InfoExtractor): 'timestamp': 1401832161, }, }, + # Wistia standard embed (async) + { + 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/', + 'info_dict': { + 'id': '807fafadvk', + 'ext': 'mp4', + 'title': 'Drip Brennan Dunn Workshop', + 'description': 'a JV Webinars video from getdrip-1', + 'duration': 4986.95, + 'upload_date': '20160518', + 'timestamp': 1463607249, + }, + 'params': { + 'skip_download': True, + } + }, # Soundcloud embed { 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/', diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index 97139a35a..c634b8dec 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + float_or_none, ) @@ -92,6 +93,6 @@ class WistiaIE(InfoExtractor): 'description': data.get('seoDescription'), 'formats': formats, 'thumbnails': thumbnails, - 'duration': int_or_none(data.get('duration')), + 'duration': float_or_none(data.get('duration')), 'timestamp': int_or_none(data.get('createdAt')), } From 6c114b12104e8c9d0713d1cb2cd6c4ddc7872b7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 21:55:35 +0600 Subject: [PATCH 0556/3599] [extractor/generic] Remove generic id and title from wistia extractionand update tests --- youtube_dl/extractor/generic.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9883cde61..c368f08e1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -717,15 +717,18 @@ class GenericIE(InfoExtractor): }, # Wistia embed { - 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', - 'md5': '8788b683c777a5cf25621eaf286d0c23', + 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', + 'md5': '1953f3a698ab51cfc948ed3992a0b7ff', 'info_dict': { - 'id': '1cfaf6b7ea', + 'id': '6e2wtrbdaf', 'ext': 'mov', - 'title': 'md5:51364a8d3d009997ba99656004b5e20d', - 'duration': 643.0, - 'filesize': 182808282, - 'uploader': 'education-portal.com', + 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england', + 'description': 'a Paywall Videos video from Remilon', + 'duration': 644.072, + 'uploader': 'study.com', + 'timestamp': 1459678540, + 'upload_date': '20160403', + 'filesize': 24687186, }, }, { @@ -734,12 +737,12 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'uxjb0lwrcz', 'ext': 'mp4', - 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks', + 'title': 'Conversation about Hexagonal Rails Part 1', 'description': 'a Martin Fowler video from ThoughtWorks', 'duration': 1715.0, 'uploader': 'thoughtworks.wistia.com', - 'upload_date': '20140603', 'timestamp': 1401832161, + 'upload_date': '20140603', }, }, # Wistia standard embed (async) @@ -751,8 +754,8 @@ class GenericIE(InfoExtractor): 'title': 'Drip Brennan Dunn Workshop', 'description': 'a JV Webinars video from getdrip-1', 'duration': 4986.95, - 'upload_date': '20160518', 'timestamp': 1463607249, + 'upload_date': '20160518', }, 'params': { 'skip_download': True, @@ -1564,19 +1567,15 @@ class GenericIE(InfoExtractor): 'url': embed_url, 'ie_key': 'Wistia', 'uploader': video_uploader, - 'title': video_title, - 'id': video_id, } match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage) if match: return { '_type': 'url_transparent', - 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')), + 'url': 'wistia:%s' % match.group('id'), 'ie_key': 'Wistia', 'uploader': video_uploader, - 'title': video_title, - 'id': match.group('id') } match = re.search( From 6756602be6b59c7bff57ccaeb33844cdc5636910 Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Thu, 19 May 2016 03:42:09 +0200 Subject: [PATCH 0557/3599] [LocalNews8] add extractor (Closes #9200) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/localnews8.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 youtube_dl/extractor/localnews8.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 74aba2d5c..5b96a086d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -396,6 +396,7 @@ from .livestream import ( LivestreamShortenerIE, ) from .lnkgo import LnkGoIE +from .localnews8 import LocalNews8IE from .lovehomeporn import LoveHomePornIE from .lrt import LRTIE from .lynda import ( diff --git a/youtube_dl/extractor/localnews8.py b/youtube_dl/extractor/localnews8.py new file mode 100644 index 000000000..b38d1d58a --- /dev/null +++ b/youtube_dl/extractor/localnews8.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class LocalNews8IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?localnews8\.com/.+?/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304', + 'md5': '477bdb188f177788c65db27ecb56649b', + 'info_dict': { + 'id': '35183304', + 'ext': 'mp4', + 'title': 'Rexburg business turns carbon fiber scraps into wedding ring', + 'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.', + 'duration': '153', + 'timestamp': '1441844822', + 'uploader_id': 'api', + }} + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + partner_id = self._search_regex(r'partnerId\s*:\s*"(\d+)"', webpage, video_id) + kaltura_id = self._search_regex(r'var\s+videoIdString\s*=\s*"kaltura:(.+)";', webpage, video_id) + + return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') From 1846e9ade0fb9508459282a992539c700aa26f9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 May 2016 22:31:08 +0600 Subject: [PATCH 0558/3599] [localnews8] Fix extractor (Closes #9539) --- youtube_dl/extractor/localnews8.py | 38 ++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/localnews8.py b/youtube_dl/extractor/localnews8.py index b38d1d58a..aad396135 100644 --- a/youtube_dl/extractor/localnews8.py +++ b/youtube_dl/extractor/localnews8.py @@ -1,29 +1,47 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class LocalNews8IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?localnews8\.com/.+?/(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P<display_id>[^/]+)/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304', - 'md5': '477bdb188f177788c65db27ecb56649b', + 'md5': 'be4d48aea61aa2bde7be2ee47691ad20', 'info_dict': { 'id': '35183304', + 'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings', 'ext': 'mp4', 'title': 'Rexburg business turns carbon fiber scraps into wedding ring', 'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.', - 'duration': '153', - 'timestamp': '1441844822', + 'duration': 153, + 'timestamp': 1441844822, + 'upload_date': '20150910', 'uploader_id': 'api', - }} + } + } def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') - partner_id = self._search_regex(r'partnerId\s*:\s*"(\d+)"', webpage, video_id) - kaltura_id = self._search_regex(r'var\s+videoIdString\s*=\s*"kaltura:(.+)";', webpage, video_id) + webpage = self._download_webpage(url, display_id) - return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') + partner_id = self._search_regex( + r'partnerId\s*[:=]\s*(["\'])(?P<id>\d+)\1', + webpage, 'partner id', group='id') + kaltura_id = self._search_regex( + r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P<id>[0-9a-z_]+)\1', + webpage, 'videl id', group='id') + + return { + '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id), + 'ie_key': 'Kaltura', + 'id': video_id, + 'display_id': display_id, + } From b219f5e51be520b2e23acd1ec08735fc733f9619 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 21 May 2016 00:59:06 +0600 Subject: [PATCH 0559/3599] [brightcove:new] Improve error reporting --- youtube_dl/extractor/brightcove.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index fc7fc5b16..ef560b592 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -444,6 +444,10 @@ class BrightcoveNewIE(InfoExtractor): # non numeric ref: prefixed video id 'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356', 'only_matching': True, + }, { + # unavailable video without message but with error_code + 'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001', + 'only_matching': True, }] @staticmethod @@ -514,8 +518,9 @@ class BrightcoveNewIE(InfoExtractor): }) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - json_data = self._parse_json(e.cause.read().decode(), video_id) - raise ExtractorError(json_data[0]['message'], expected=True) + json_data = self._parse_json(e.cause.read().decode(), video_id)[0] + raise ExtractorError( + json_data.get('message') or json_data['error_code'], expected=True) raise title = json_data['name'].strip() From c8602b2f9bcdda00398b2c54db4c1be85b75ce39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 21 May 2016 05:09:16 +0600 Subject: [PATCH 0560/3599] [nrk] Unquote subtitles' URLs --- youtube_dl/extractor/nrk.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 7532f40c1..486e086bb 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -55,7 +55,9 @@ class NRKBaseIE(InfoExtractor): for subtitle in ('webVtt', 'timedText'): subtitle_url = asset.get('%sSubtitlesUrl' % subtitle) if subtitle_url: - subtitles.setdefault('no', []).append({'url': subtitle_url}) + subtitles.setdefault('no', []).append({ + 'url': compat_urllib_parse_unquote(subtitle_url) + }) entries.append({ 'id': asset.get('carrierId') or entry_id, 'title': entry_title, From 16da9bbc29b76b6e6e1a6134a17e9f25d91296c8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:15:28 +0800 Subject: [PATCH 0561/3599] [common] Add _m3u8_meta_format() template For extractors who handle m3u8 manifests by themselves. (eg., AnvatoIE) Part of #9522 --- youtube_dl/extractor/common.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9f22ee930..17e866f91 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1058,12 +1058,8 @@ class InfoExtractor(object): }) return formats - def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, - entry_protocol='m3u8', preference=None, - m3u8_id=None, note=None, errnote=None, - fatal=True, live=False): - - formats = [{ + def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None): + return { 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])), 'url': m3u8_url, 'ext': ext, @@ -1071,7 +1067,14 @@ class InfoExtractor(object): 'preference': preference - 1 if preference else -1, 'resolution': 'multiple', 'format_note': 'Quality selection URL', - }] + } + + def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, + entry_protocol='m3u8', preference=None, + m3u8_id=None, note=None, errnote=None, + fatal=True, live=False): + + formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] format_url = lambda u: ( u From 7b2fcbfd4ea34e6d29484f5987a36665117aefaa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:16:28 +0800 Subject: [PATCH 0562/3599] [common] Skip TYPE=CLOSED-CAPTIONS lines in m3u8 manifests According to [1], valid values for TYPE are AUDIO, VIDEO, SUBTITLES and CLOSED-CAPTIONS. Such a value is found in Anvato master playlists, though I don't use _extract_m3u8_formats() in the end. Part of #9522. [1] https://tools.ietf.org/html/draft-pantos-http-live-streaming-19#section-4.3.4.1 --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 17e866f91..4bfa610c1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1141,7 +1141,7 @@ class InfoExtractor(object): format_id = [] if m3u8_id: format_id.append(m3u8_id) - last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None + last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None # Despite specification does not mention NAME attribute for # EXT-X-STREAM-INF it still sometimes may be present stream_name = last_info.get('NAME') or last_media_name From 9f54e692d2de2d52f147f2d714d0312dbe21a5ed Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:18:29 +0800 Subject: [PATCH 0563/3599] [anvato] Add new extractor Used in CBSLocal (#9522) --- youtube_dl/extractor/anvato.py | 224 +++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 youtube_dl/extractor/anvato.py diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py new file mode 100644 index 000000000..cb29cf111 --- /dev/null +++ b/youtube_dl/extractor/anvato.py @@ -0,0 +1,224 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import hashlib +import json +import random +import time + +from .common import InfoExtractor +from ..aes import aes_encrypt +from ..compat import compat_str +from ..utils import ( + bytes_to_intlist, + determine_ext, + intlist_to_bytes, + int_or_none, + strip_jsonp, +) + + +def md5_text(s): + if not isinstance(s, compat_str): + s = compat_str(s) + return hashlib.md5(s.encode('utf-8')).hexdigest() + + +class AnvatoIE(InfoExtractor): + # Copied from anvplayer.min.js + _ANVACK_TABLE = { + 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ', + 'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA', + 'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP', + 'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv', + 'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7', + 'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR', + 'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg', + 'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto', + 'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY', + 'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh', + 'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK', + 'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D', + 'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad', + 'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp', + 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih', + 'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR', + 'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW', + 'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su', + 'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q', + 'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5', + 'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3', + 'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI', + 'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s', + 'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz', + 'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg', + 'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x', + 'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH', + 'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX', + 'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc', + 'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK', + 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7', + 'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C', + 'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e', + 'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1', + 'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re', + 'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51', + 'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho', + 'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9', + 'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH', + 'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F', + 'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo', + 'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR', + 'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa', + 'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk', + 'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ', + 'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ', + 'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m', + 'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b', + 'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3', + 'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK', + 'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua', + 'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua', + 'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F', + 'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx', + 'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ', + 'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH', + 'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm', + 'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt', + 'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl', + 'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b', + 'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV', + 'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg', + 'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk', + 'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT', + 'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa', + 'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv', + 'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k', + 'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI', + 'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr', + 'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw', + 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K', + 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH', + 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK', + 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu', + 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg', + 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg', + 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK', + 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n', + 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD', + 'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk', + 'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn', + 'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W', + 'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ', + 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ' + } + + _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' + + def __init__(self, *args, **kwargs): + super(AnvatoIE, self).__init__(*args, **kwargs) + self.__server_time = None + + def _server_time(self, access_key, video_id): + if self.__server_time is not None: + return self.__server_time + + self.__server_time = int(self._download_json( + self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id, + note='Fetching server time')['server_time']) + + return self.__server_time + + def _api_prefix(self, access_key): + return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage') + + def _get_video_json(self, access_key, video_id): + # See et() in anvplayer.min.js, which is an alias of getVideoJSON() + video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key) + server_time = self._server_time(access_key, video_id) + input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time)) + + auth_secret = intlist_to_bytes(aes_encrypt( + bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY))) + + video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii') + anvrid = md5_text(time.time() * 1000 * random.random())[:30] + payload = { + 'api': { + 'anvrid': anvrid, + 'anvstk': md5_text('%s|%s|%d|%s' % ( + access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])), + 'anvts': server_time, + }, + } + + return self._download_json( + video_data_url, video_id, transform_source=strip_jsonp, + data=json.dumps(payload).encode('utf-8')) + + def _extract_anvato_videos(self, webpage, video_id): + anvplayer_data = self._parse_json(self._html_search_regex( + r'<script[^>]+data-anvp=\'([^\']+)\'', webpage, + 'Anvato player data'), video_id) + + video_id = anvplayer_data['video'] + access_key = anvplayer_data['accessKey'] + + video_data = self._get_video_json(access_key, video_id) + + formats = [] + for published_url in video_data['published_urls']: + video_url = published_url['embed_url'] + ext = determine_ext(video_url) + + if ext == 'smil': + formats.extend(self._extract_smil_formats(video_url, video_id)) + continue + + tbr = int_or_none(published_url.get('kbps')) + a_format = { + 'url': video_url, + 'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(), + 'tbr': tbr if tbr != 0 else None, + } + + if ext == 'm3u8': + # Not using _extract_m3u8_formats here as individual media + # playlists are also included in published_urls. + if tbr is None: + formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls')) + continue + else: + a_format.update({ + 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])), + 'ext': 'mp4', + }) + elif ext == 'mp3': + a_format['vcodec'] = 'none' + else: + a_format.update({ + 'width': int_or_none(published_url.get('width')), + 'height': int_or_none(published_url.get('height')), + }) + formats.append(a_format) + + self._sort_formats(formats) + + subtitles = {} + for caption in video_data.get('captions', []): + a_caption = { + 'url': caption['url'], + 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None + } + subtitles.setdefault(caption['language'], []).append(a_caption) + + return { + 'id': video_id, + 'formats': formats, + 'title': video_data.get('def_title'), + 'description': video_data.get('def_description'), + 'categories': video_data.get('categories'), + 'thumbnail': video_data.get('thumbnail'), + 'subtitles': subtitles, + } From 612b5f403e33d5c164b5c0bbad9f01ef6d38d050 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:38:01 +0800 Subject: [PATCH 0564/3599] [jwplatform] Improved m3u8 and rtmp support Changes made for SendtoNewsIE. Part of #9522 --- youtube_dl/extractor/jwplatform.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 8a5e562db..0aa6fc750 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -5,33 +5,47 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, float_or_none, int_or_none, ) class JWPlatformBaseIE(InfoExtractor): - def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True): + def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None): video_data = jwplayer_data['playlist'][0] formats = [] for source in video_data['sources']: source_url = self._proto_relative_url(source['file']) source_type = source.get('type') or '' - if source_type in ('application/vnd.apple.mpegurl', 'hls'): + if source_type in ('application/vnd.apple.mpegurl', 'hls') or determine_ext(source_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', 'm3u8_native', fatal=False)) + source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) elif source_type.startswith('audio'): formats.append({ 'url': source_url, 'vcodec': 'none', }) else: - formats.append({ + a_format = { 'url': source_url, 'width': int_or_none(source.get('width')), 'height': int_or_none(source.get('height')), - }) + } + if source_url.startswith('rtmp'): + # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as + # of jwplayer.flash.swf + rtmp_url, prefix, play_path = re.split( + r'((?:mp4|mp3|flv):)', source_url, 1) + a_format.update({ + 'url': rtmp_url, + 'ext': 'flv', + 'play_path': prefix + play_path, + }) + if rtmp_params: + a_format.update(rtmp_params) + formats.append(a_format) self._sort_formats(formats) subtitles = {} From 5ce3d5bd1b0933a26a4224643cf8d3ad14330e17 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:39:42 +0800 Subject: [PATCH 0565/3599] [sendtonews] Add new extractor Used in CBSLocal. Part of #9522 --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/sendtonews.py | 86 ++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 youtube_dl/extractor/sendtonews.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5b96a086d..8352b3c3a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -670,6 +670,7 @@ from .screencastomatic import ScreencastOMaticIE from .screenjunkies import ScreenJunkiesIE from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .senateisvp import SenateISVPIE +from .sendtonews import SendtoNewsIE from .servingsys import ServingSysIE from .sexu import SexuIE from .shahid import ShahidIE diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py new file mode 100644 index 000000000..1c636f672 --- /dev/null +++ b/youtube_dl/extractor/sendtonews.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .jwplatform import JWPlatformBaseIE +from ..compat import compat_parse_qs +from ..utils import ( + ExtractorError, + parse_duration, +) + + +class SendtoNewsIE(JWPlatformBaseIE): + _VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P<query>[^#]+)' + + _TEST = { + # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ + 'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes', + 'info_dict': { + 'id': 'GxfCe0Zo7D-175909-5588', + 'ext': 'mp4', + 'title': 'Recap: CLE 15, CIN 6', + 'description': '5/16/16: Indians\' bats explode for 15 runs in a win', + 'duration': 49, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + _URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s' + + @classmethod + def _extract_url(cls, webpage): + mobj = re.search(r'''(?x)<script[^>]+src=([\'"]) + (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\? + .*\bSC=(?P<SC>[0-9a-zA-Z-]+).* + \1>''', webpage) + if mobj: + sk, mk, pk = mobj.group('SC').split('-') + return cls._URL_TEMPLATE % (sk, mk, pk) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + params = compat_parse_qs(mobj.group('query')) + + if 'SK' not in params or 'MK' not in params or 'PK' not in params: + raise ExtractorError('Invalid URL', expected=True) + + video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]]) + + webpage = self._download_webpage(url, video_id) + + jwplayer_data_str = self._search_regex( + r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data') + js_vars = { + 'w': 1024, + 'h': 768, + 'modeVar': 'html5', + } + for name, val in js_vars.items(): + js_val = '%d' % val if isinstance(val, int) else '"%s"' % val + jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val) + + info_dict = self._parse_jwplayer_data( + self._parse_json(jwplayer_data_str, video_id), + video_id, require_title=False, rtmp_params={'no_resume': True}) + + title = self._html_search_regex( + r'<div[^>]+class="embedTitle">([^<]+)</div>', webpage, 'title') + description = self._html_search_regex( + r'<div[^>]+class="embedSubTitle">([^<]+)</div>', webpage, + 'description', fatal=False) + duration = parse_duration(self._html_search_regex( + r'<div[^>]+class="embedDetails">([0-9:]+)', webpage, + 'duration', fatal=False)) + + info_dict.update({ + 'title': title, + 'description': description, + 'duration': duration, + }) + + return info_dict From 661d46b28f6de2772fc642c36b505a3c7b9a3b10 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:40:45 +0800 Subject: [PATCH 0566/3599] [cbslocal] Add new extractor (closes #9522) --- youtube_dl/extractor/cbslocal.py | 84 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 85 insertions(+) create mode 100644 youtube_dl/extractor/cbslocal.py diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py new file mode 100644 index 000000000..74adb38a6 --- /dev/null +++ b/youtube_dl/extractor/cbslocal.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import calendar +import datetime + +from .anvato import AnvatoIE +from .sendtonews import SendtoNewsIE +from ..compat import compat_urlparse + + +class CBSLocalIE(AnvatoIE): + _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)' + + _TESTS = [{ + # Anvato backend + 'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis', + 'md5': 'f0ee3081e3843f575fccef901199b212', + 'info_dict': { + 'id': '3401037', + 'ext': 'mp4', + 'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'', + 'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.', + 'thumbnail': 're:^https?://.*', + 'timestamp': 1463440500, + 'upload_date': '20160516', + 'subtitles': { + 'en': 'mincount:5', + }, + 'categories': [ + 'Stations\\Spoken Word\\KCBSTV', + 'Syndication\\MSN', + 'Syndication\\NDN', + 'Syndication\\AOL', + 'Syndication\\Yahoo', + 'Syndication\\Tribune', + 'Syndication\\Curb.tv', + 'Content\\News' + ], + }, + }, { + # SendtoNews embed + 'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/', + 'info_dict': { + 'id': 'GxfCe0Zo7D-175909-5588', + 'ext': 'mp4', + 'title': 'Recap: CLE 15, CIN 6', + 'description': '5/16/16: Indians\' bats explode for 15 runs in a win', + 'upload_date': '20160516', + 'timestamp': 1463433840, + 'duration': 49, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + sendtonews_url = SendtoNewsIE._extract_url(webpage) + if sendtonews_url: + info_dict = { + '_type': 'url_transparent', + 'url': compat_urlparse.urljoin(url, sendtonews_url), + } + else: + info_dict = self._extract_anvato_videos(webpage, display_id) + + time_str = self._html_search_regex( + r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) + timestamp = None + if time_str: + timestamp = calendar.timegm(datetime.datetime.strptime( + time_str, '%b %d, %Y %I:%M %p').timetuple()) + + info_dict.update({ + 'display_id': display_id, + 'timestamp': timestamp, + }) + + return info_dict diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8352b3c3a..c93cd2765 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -111,6 +111,7 @@ from .cbc import ( CBCPlayerIE, ) from .cbs import CBSIE +from .cbslocal import CBSLocalIE from .cbsinteractive import CBSInteractiveIE from .cbsnews import ( CBSNewsIE, From 115c65793af4c56c8f1986d2640105fc7e760c13 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 May 2016 13:50:38 +0800 Subject: [PATCH 0567/3599] [jwplatform] Don't fail with RTMP URLs without mp4:, mp3: or flv: --- youtube_dl/extractor/jwplatform.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 0aa6fc750..fa6f335e1 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -34,15 +34,18 @@ class JWPlatformBaseIE(InfoExtractor): 'height': int_or_none(source.get('height')), } if source_url.startswith('rtmp'): + a_format['ext'] = 'flv', + # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as # of jwplayer.flash.swf - rtmp_url, prefix, play_path = re.split( + rtmp_url_parts = re.split( r'((?:mp4|mp3|flv):)', source_url, 1) - a_format.update({ - 'url': rtmp_url, - 'ext': 'flv', - 'play_path': prefix + play_path, - }) + if len(rtmp_url_parts) == 3: + rtmp_url, prefix, play_path = rtmp_url_parts + a_format.update({ + 'url': rtmp_url, + 'play_path': prefix + play_path, + }) if rtmp_params: a_format.update(rtmp_params) formats.append(a_format) From 4c718d3c50b8d80bf07e44c73a5bdcd98544388f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 21 May 2016 17:37:35 +0200 Subject: [PATCH 0568/3599] [rtve] Recognize 'filmoteca' URLs --- youtube_dl/extractor/rtve.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index edd0d108e..f11e3588b 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -64,7 +64,7 @@ def _decrypt_url(png): class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' IE_DESC = 'RTVE a la carta' - _VALID_URL = r'https?://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://www\.rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', @@ -87,6 +87,9 @@ class RTVEALaCartaIE(InfoExtractor): }, { 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', 'only_matching': True, + }, { + 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', + 'only_matching': True, }] def _real_initialize(self): From c8cc3745fbb34d39f4dfb0c3facb6fa9278af93c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 21 May 2016 21:18:59 +0200 Subject: [PATCH 0569/3599] release 2016.05.21 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 7 +++++++ youtube_dl/version.py | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7024fc729..00cc634e3 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.16 +[debug] youtube-dl version 2016.05.21 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 29db13883..cd6bfa51c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -16,6 +16,8 @@ - **9gag** - **abc.net.au** - **Abc7News** + - **abcnews** + - **abcnews:video** - **AcademicEarth:Course** - **acast** - **acast:channel** @@ -104,6 +106,7 @@ - **CBCPlayer** - **CBS** - **CBSInteractive** + - **CBSLocal** - **CBSNews**: CBS News - **CBSNewsLiveVideo**: CBS News Live Videos - **CBSSports** @@ -213,6 +216,7 @@ - **Flickr** - **Folketinget**: Folketinget (ft.dk; Danish parliament) - **FootyRoom** + - **Formula1** - **FOX** - **Foxgay** - **FoxNews**: Fox News and Fox Business Video @@ -316,6 +320,7 @@ - **la7.tv** - **Laola1Tv** - **Le**: 乐视网 + - **Learnr** - **Lecture2Go** - **Lemonde** - **LePlaylist** @@ -331,6 +336,7 @@ - **livestream** - **livestream:original** - **LnkGo** + - **LocalNews8** - **LoveHomePorn** - **lrt.lt** - **lynda**: lynda.com videos @@ -556,6 +562,7 @@ - **ScreenJunkies** - **ScreenwaveMedia** - **SenateISVP** + - **SendtoNews** - **ServingSys** - **Sexu** - **Shahid** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5a0fdd6ce..4bdb5f352 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.16' +__version__ = '2016.05.21' From 7e642e4fd68f9418ecdcb852aa34a4e49c41e58b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 21 May 2016 21:24:53 +0200 Subject: [PATCH 0570/3599] release: check for pandoc Abort releaseing if pandoc is missing. (pandoc was not included in my essential app database, and thus missing on my new machine.) --- devscripts/release.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/devscripts/release.sh b/devscripts/release.sh index 8dea55dbb..8b37152a7 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -33,6 +33,7 @@ if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: th useless_files=$(find youtube_dl -type f -not -name '*.py') if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi +if ! type pandoc 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi /bin/echo -e "\n### First of all, testing..." make clean From 598869afb139707c7064a6c8397bbcf09b2b43f5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 21 May 2016 21:27:00 +0200 Subject: [PATCH 0571/3599] release 2016.05.21.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 00cc634e3..7f8650553 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.21 +[debug] youtube-dl version 2016.05.21.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4bdb5f352..0a2e43d05 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.21' +__version__ = '2016.05.21.1' From d2fee3c99e9d1c8eba5bd55aa3a9dd5702b23b34 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 21 May 2016 21:46:42 +0200 Subject: [PATCH 0572/3599] release.sh: also check for python3 rsa module --- devscripts/release.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 8b37152a7..7dd391b38 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -33,7 +33,8 @@ if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: th useless_files=$(find youtube_dl -type f -not -name '*.py') if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi -if ! type pandoc 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi +if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi +if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi /bin/echo -e "\n### First of all, testing..." make clean From e03b35b8f998692aa853c6dbd498655fc831f9e7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 21 May 2016 21:47:39 +0200 Subject: [PATCH 0573/3599] release 2016.05.21.2 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7f8650553..2d80d45b6 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.2** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.21.1 +[debug] youtube-dl version 2016.05.21.2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0a2e43d05..522a56669 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.21.1' +__version__ = '2016.05.21.2' From 0db9a05f88cbbe6709da3875b798634dc536536b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 02:15:56 +0600 Subject: [PATCH 0574/3599] [periscope:user] Adapt to layout changes (Closes #9563) --- youtube_dl/extractor/periscope.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 0a4bc761d..b2008decc 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import parse_iso8601 +from ..utils import ( + parse_iso8601, + unescapeHTML, +) class PeriscopeIE(InfoExtractor): @@ -92,6 +95,7 @@ class PeriscopeUserIE(InfoExtractor): 'info_dict': { 'id': 'LularoeHusbandMike', 'title': 'LULAROE HUSBAND MIKE', + 'description': 'md5:6cf4ec8047768098da58e446e82c82f0', }, # Periscope only shows videos in the last 24 hours, so it's possible to # get 0 videos @@ -103,16 +107,19 @@ class PeriscopeUserIE(InfoExtractor): webpage = self._download_webpage(url, user_id) - broadcast_data = self._parse_json(self._html_search_meta( - 'broadcast-data', webpage, default='{}'), user_id) - username = broadcast_data.get('user', {}).get('display_name') - user_broadcasts = self._parse_json( - self._html_search_meta('user-broadcasts', webpage, default='{}'), + data_store = self._parse_json( + unescapeHTML(self._search_regex( + r'data-store=(["\'])(?P<data>.+?)\1', + webpage, 'data store', default='{}', group='data')), user_id) + user = data_store.get('User', {}).get('user', {}) + title = user.get('display_name') or user.get('username') + description = user.get('description') + entries = [ self.url_result( 'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id'])) - for broadcast in user_broadcasts.get('broadcasts', [])] + for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])] - return self.playlist_result(entries, user_id, username) + return self.playlist_result(entries, user_id, title, description) From 92d221ad4858a62143ce5645c56261b26023308e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 02:39:15 +0600 Subject: [PATCH 0575/3599] [periscope] Update uploader_id (Closes #9565) --- youtube_dl/extractor/periscope.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index b2008decc..c23b314e7 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -45,8 +45,11 @@ class PeriscopeIE(InfoExtractor): broadcast = broadcast_data['broadcast'] status = broadcast['status'] - uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name') - uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id') + user = broadcast_data.get('user', {}) + + uploader = broadcast.get('user_display_name') or user.get('display_name') + uploader_id = (broadcast.get('username') or user.get('username') or + broadcast.get('user_id') or user.get('id')) title = '%s - %s' % (uploader, status) if uploader else status state = broadcast.get('state').lower() From c8831015f41879e0d8788c228acf52579e6cf12b Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Sat, 21 May 2016 18:51:34 +0200 Subject: [PATCH 0576/3599] [ComedyCentral] Add support for tosh.cc.com and cc.com/video-clips --- youtube_dl/extractor/comedycentral.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 0c59102e0..830073834 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -44,10 +44,10 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): # or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524 _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow) |https?://(:www\.)? - (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/ + (?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/ ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| (?P<clip> - (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+)) + (?:(?:guests/[^/]+|videos|video-clips|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+)) |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) )| From 0150a00f333371b366ff10871458e0b071f20ee3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 02:58:41 +0600 Subject: [PATCH 0577/3599] [cc] Add test for tosh.cc (Closes #9566) --- youtube_dl/extractor/comedycentral.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 830073834..2b6aaa3aa 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -47,7 +47,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): (?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/ ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| (?P<clip> - (?:(?:guests/[^/]+|videos|video-clips|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+)) + (?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+)) |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) )| @@ -129,6 +129,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): }, { 'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel', 'only_matching': True, + }, { + 'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans', + 'only_matching': True, }] _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] From 898f4b49ccc828f86a075d656aa9a1e1428e538c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 06:47:22 +0600 Subject: [PATCH 0578/3599] [theplatform] Add _extract_urls --- youtube_dl/extractor/theplatform.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index a25417f94..02dbef913 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -151,6 +151,22 @@ class ThePlatformIE(ThePlatformBaseIE): 'only_matching': True, }] + @classmethod + def _extract_urls(cls, webpage): + m = re.search( + r'''(?x) + <meta\s+ + property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+ + content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2 + ''', webpage) + if m: + return [m.group('url')] + + matches = re.findall( + r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) + if matches: + return list(zip(*matches))[1] + @staticmethod def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): flags = '10' if include_qs else '00' From 4d8819d2492e10f10bd09490f8f203d2f5e2cac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 06:52:39 +0600 Subject: [PATCH 0579/3599] [extractor/generic] Add support for theplatform embeds (Closes #8636, closes #9476) --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c368f08e1..ad6a40730 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -62,6 +62,7 @@ from .digiteka import DigitekaIE from .instagram import InstagramIE from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE +from .theplatform import ThePlatformIE class GenericIE(InfoExtractor): @@ -1499,6 +1500,11 @@ class GenericIE(InfoExtractor): if bc_urls: return _playlist_from_matches(bc_urls, ie='BrightcoveNew') + # Look for ThePlatform embeds + tp_urls = ThePlatformIE._extract_urls(webpage) + if tp_urls: + return _playlist_from_matches(tp_urls, ie='ThePlatform') + # Look for embedded rtl.nl player matches = re.findall( r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', From c6b9cf05e1dbd5e2534607fd3319ac73791d1c89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 08:28:41 +0600 Subject: [PATCH 0580/3599] [utils] Do not fail on unknown date formats in unified_strdate --- youtube_dl/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5301d0740..d65f5e833 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1055,7 +1055,10 @@ def unified_strdate(date_str, day_first=True): if upload_date is None: timetuple = email.utils.parsedate_tz(date_str) if timetuple: - upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') + try: + upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') + except ValueError: + pass if upload_date is not None: return compat_str(upload_date) From 21a19aa94d7a650d90ab258bd277a8648378c135 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 22 May 2016 08:59:28 +0600 Subject: [PATCH 0581/3599] [README.md] Clarify location for youtube-dl.exe --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a2febab2c..96cefb548 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget: sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl -Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29). +Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory (`%USERPROFILE%`, for example `C:\Users\<user name>\` or `C:\Documents and Settings\<user name>\`) or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\system32` (e.g. **do not** put in `C:\Windows\System32`). OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/). From 4f3a25c2b413977bf0ea6f7bd16d3d20259470bb Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 22 May 2016 09:00:08 +0600 Subject: [PATCH 0582/3599] [README.md] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 96cefb548..759d2bb01 100644 --- a/README.md +++ b/README.md @@ -433,7 +433,7 @@ You can use `--ignore-config` if you want to disable the configuration file for ### Authentication with `.netrc` file -You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a`.netrc` file in your `$HOME` and restrict permissions to read/write by you only: +You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only: ``` touch $HOME/.netrc chmod a-rwx,u+rw $HOME/.netrc From 9b06b0fb9297efe47a8de71142e926dda5031b65 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 22 May 2016 09:26:06 +0600 Subject: [PATCH 0583/3599] [README.md] Clarify updating on Windows --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 759d2bb01..649e78186 100644 --- a/README.md +++ b/README.md @@ -675,6 +675,8 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231 If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`). +Note that on Windows before running the update command in command prompt you should either `cd` to the directory where `youtube-dl.exe` is located or use the full path to `youtube-dl.exe` (e.g. `C:\Program Files (x86)\youtube-dl\youtube-dl.exe -U`). + If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update. If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum. From e5871c672b32d30fe4a943ad1375a7000829f03c Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 22 May 2016 09:36:07 +0600 Subject: [PATCH 0584/3599] [README.md] Clarify location for youtube-dl.exe even more %USERPROFILE% not in %PATH% by default. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 649e78186..185df5e76 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget: sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl -Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory (`%USERPROFILE%`, for example `C:\Users\<user name>\` or `C:\Documents and Settings\<user name>\`) or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\system32` (e.g. **do not** put in `C:\Windows\System32`). +Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/). From e9297256d405651428d5d52f0bb6b32ca66ea15a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 May 2016 10:06:45 +0600 Subject: [PATCH 0585/3599] [update] Fix youtube-dl.exe updating from arbitrary directory (Closes #2718) --- youtube_dl/update.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/update.py b/youtube_dl/update.py index 676ebe1c4..ebce9666a 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -83,11 +83,8 @@ def update_self(to_screen, verbose, opener): print_notes(to_screen, versions_info['versions']) - filename = sys.argv[0] - # Py2EXE: Filename could be different - if hasattr(sys, 'frozen') and not os.path.isfile(filename): - if os.path.isfile(filename + '.exe'): - filename += '.exe' + # sys.executable is set to the full pathname of the exe-file for py2exe + filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0] if not os.access(filename, os.W_OK): to_screen('ERROR: no write permissions on %s' % filename) @@ -95,7 +92,7 @@ def update_self(to_screen, verbose, opener): # Py2EXE if hasattr(sys, 'frozen'): - exe = os.path.abspath(filename) + exe = filename directory = os.path.dirname(exe) if not os.access(directory, os.W_OK): to_screen('ERROR: no write permissions on %s' % directory) From c776b99691e5fdec75cc7d5c268c260f23bd2ac7 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 22 May 2016 10:14:02 +0600 Subject: [PATCH 0586/3599] [README.md] Remove Windows updating trickery Windows updating fixed in e9297256d405651428d5d52f0bb6b32ca66ea15a. --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 185df5e76..00f42e056 100644 --- a/README.md +++ b/README.md @@ -675,8 +675,6 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231 If you've followed [our manual installation instructions](http://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`). -Note that on Windows before running the update command in command prompt you should either `cd` to the directory where `youtube-dl.exe` is located or use the full path to `youtube-dl.exe` (e.g. `C:\Program Files (x86)\youtube-dl\youtube-dl.exe -U`). - If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update. If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to http://yt-dl.org/ to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum. From 70346165fe9348b54e8d71fb40654d135af945f8 Mon Sep 17 00:00:00 2001 From: Thor77 <xXThor77Xx@gmail.com> Date: Sun, 22 May 2016 08:15:39 +0200 Subject: [PATCH 0587/3599] [bandcamp] raise ExtractorError when track not streamable (#9465) * [bandcamp] raise ExtractorError when track not streamable * [bandcamp] update md5 for second test * don't rely on json-data, but just check for 'file' * don't rely on presence of 'file' --- youtube_dl/extractor/bandcamp.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index c1ef8051d..991ab0676 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor): '_skip': 'There is a limit of 200 free downloads / month for the test song' }, { 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', - 'md5': '2b68e5851514c20efdff2afc5603b8b4', + 'md5': '73d0b3171568232574e45652f8720b5c', 'info_dict': { 'id': '2650410135', 'ext': 'mp3', @@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor): if m_trackinfo: json_code = m_trackinfo.group(1) data = json.loads(json_code)[0] + track_id = compat_str(data['id']) + + if not data.get('file'): + raise ExtractorError('Not streamable', video_id=track_id, expected=True) formats = [] for format_id, format_url in data['file'].items(): @@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor): self._sort_formats(formats) return { - 'id': compat_str(data['id']), + 'id': track_id, 'title': data['title'], 'formats': formats, 'duration': float_or_none(data.get('duration')), From a4a7c44bd337cdda534ad879c516d5b33e25a893 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 22 May 2016 15:04:51 +0600 Subject: [PATCH 0588/3599] [README.md] Document solution for extremely slow start on Windows --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 00f42e056..ef0e265c8 100644 --- a/README.md +++ b/README.md @@ -693,6 +693,10 @@ hash -r Again, from then on you'll be able to update with `sudo youtube-dl -U`. +### youtube-dl is extremely slow to start on Windows + +Add a file exclusion for `youtube-dl.exe` in Windows Defender settings. + ### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos. From 4a12077855026a0ca9cf31868c13d2d029f7a723 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 22 May 2016 22:22:27 +0800 Subject: [PATCH 0589/3599] [genric] Eliminate duplicated video URLs (closes #6562) --- youtube_dl/extractor/generic.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ad6a40730..bb96e7231 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1194,6 +1194,16 @@ class GenericIE(InfoExtractor): 'uploader': 'Lake8737', } }, + # Duplicated embedded video URLs + { + 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443', + 'info_dict': { + 'id': '149298443_480_16c25b74_2', + 'ext': 'mp4', + 'title': 'vs. Blue Orange Spring Game', + 'uploader': 'www.hudl.com', + }, + }, ] def report_following_redirect(self, new_url): @@ -2111,7 +2121,7 @@ class GenericIE(InfoExtractor): raise UnsupportedError(url) entries = [] - for video_url in found: + for video_url in orderedSet(found): video_url = unescapeHTML(video_url) video_url = video_url.replace('\\/', '/') video_url = compat_urlparse.urljoin(url, video_url) From c5f51551006c9d9ad7263cb3d3d90d1c91c8c648 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 22 Apr 2016 09:36:14 +0100 Subject: [PATCH 0590/3599] [wat] extract all formats --- youtube_dl/extractor/wat.py | 129 +++++++++++++++++------------------- 1 file changed, 59 insertions(+), 70 deletions(-) diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 5227bb5ad..de7d6b559 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -2,25 +2,26 @@ from __future__ import unicode_literals import re -import hashlib from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, unified_strdate, + HEADRequest, + float_or_none, ) class WatIE(InfoExtractor): - _VALID_URL = r'(?:wat:(?P<real_id>\d{8})|https?://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)' + _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)' IE_NAME = 'wat.tv' _TESTS = [ { 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html', - 'md5': 'ce70e9223945ed26a8056d413ca55dc9', + 'md5': '83d882d9de5c9d97f0bb2c6273cde56a', 'info_dict': { 'id': '11713067', - 'display_id': 'soupe-figues-l-orange-aux-epices', 'ext': 'mp4', 'title': 'Soupe de figues à l\'orange et aux épices', 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.', @@ -33,7 +34,6 @@ class WatIE(InfoExtractor): 'md5': 'fbc84e4378165278e743956d9c1bf16b', 'info_dict': { 'id': '11713075', - 'display_id': 'gregory-lemarchal-voix-ange', 'ext': 'mp4', 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)', 'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3', @@ -44,96 +44,85 @@ class WatIE(InfoExtractor): }, ] - def download_video_info(self, real_id): + def _real_extract(self, url): + video_id = self._match_id(url) + video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36)) + # 'contentv4' is used in the website, but it also returns the related # videos, we don't need them - info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id) - return info['media'] - - def _real_extract(self, url): - def real_id_for_chapter(chapter): - return chapter['tc_start'].split('-')[0] - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - real_id = mobj.group('real_id') - if not real_id: - short_id = mobj.group('short_id') - webpage = self._download_webpage(url, display_id or short_id) - real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id') - - video_info = self.download_video_info(real_id) + video_info = self._download_json( + 'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media'] error_desc = video_info.get('error_desc') if error_desc: raise ExtractorError( '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True) - geo_list = video_info.get('geoList') - country = geo_list[0] if geo_list else '' - chapters = video_info['chapters'] first_chapter = chapters[0] - files = video_info['files'] - first_file = files[0] - if real_id_for_chapter(first_chapter) != real_id: + def video_id_for_chapter(chapter): + return chapter['tc_start'].split('-')[0] + + if video_id_for_chapter(first_chapter) != video_id: self.to_screen('Multipart video detected') - chapter_urls = [] - for chapter in chapters: - chapter_id = real_id_for_chapter(chapter) - # Yes, when we this chapter is processed by WatIE, - # it will download the info again - chapter_info = self.download_video_info(chapter_id) - chapter_urls.append(chapter_info['url']) - entries = [self.url_result(chapter_url) for chapter_url in chapter_urls] - return self.playlist_result(entries, real_id, video_info['title']) - - upload_date = None - if 'date_diffusion' in first_chapter: - upload_date = unified_strdate(first_chapter['date_diffusion']) + entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters] + return self.playlist_result(entries, video_id, video_info['title']) # Otherwise we can continue and extract just one part, we have to use - # the short id for getting the video url + # the video id for getting the video url - formats = [{ - 'url': 'http://wat.tv/get/android5/%s.mp4' % real_id, - 'format_id': 'Mobile', - }] + date_diffusion = first_chapter.get('date_diffusion') + upload_date = unified_strdate(date_diffusion) if date_diffusion else None - fmts = [('SD', 'web')] - if first_file.get('hasHD'): - fmts.append(('HD', 'webhd')) + def extract_url(path_template, url_type): + req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id) + head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type) + red_url = head.geturl() + if req_url == red_url: + raise ExtractorError( + '%s said: Sorry, this video is not available from your country.' % self.IE_NAME, + expected=True) + return red_url - def compute_token(param): - timestamp = '%08x' % int(self._download_webpage( - 'http://www.wat.tv/servertime', real_id, - 'Downloading server time').split('|')[0]) - magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564' - return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp) + m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8') + http_url = extract_url('android5/%s.mp4', 'http') - for fmt in fmts: - webid = '/%s/%s' % (fmt[1], real_id) - video_url = self._download_webpage( - 'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country), - real_id, - 'Downloading %s video URL' % fmt[0], - 'Failed to download %s video URL' % fmt[0], - False) - if not video_url: + formats = [] + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') + formats.extend(m3u8_formats) + formats.extend(self._extract_f4m_formats( + m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'), + video_id, f4m_id='hds', fatal=False)) + for m3u8_format in m3u8_formats: + mobj = re.search( + r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url']) + if not mobj: continue - formats.append({ - 'url': video_url, - 'ext': 'mp4', - 'format_id': fmt[0], + abr, vbr = mobj.groups() + abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000) + m3u8_format.update({ + 'vbr': vbr, + 'abr': abr, }) + if not vbr or not abr: + continue + f = m3u8_format.copy() + f.update({ + 'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url), + 'format_id': f['format_id'].replace('hls', 'http'), + 'protocol': 'http', + }) + formats.append(f) + self._sort_formats(formats) return { - 'id': real_id, - 'display_id': display_id, + 'id': video_id, 'title': first_chapter['title'], 'thumbnail': first_chapter['preview'], 'description': first_chapter['description'], 'view_count': video_info['views'], 'upload_date': upload_date, - 'duration': first_file['duration'], + 'duration': video_info['files'][0]['duration'], 'formats': formats, } From db3b8b2103099a8859402f2167d7ad1a8fa66829 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Sun, 22 May 2016 16:54:41 +0100 Subject: [PATCH 0591/3599] [tf1] add support for more related web sites --- youtube_dl/extractor/tf1.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 3f54b2744..aff5121b9 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class TF1IE(InfoExtractor): """TF1 uses the wat.tv player.""" - _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html' + _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|(?:www\.)?(?:tfou|ushuaiatv|histoire|tvbreizh))\.fr/(?:[^/]+/)*(?P<id>[^/?#.]+)' _TESTS = [{ 'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', 'info_dict': { @@ -48,6 +48,6 @@ class TF1IE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) wat_id = self._html_search_regex( - r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:#.*?)?\1', + r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:.*?)?\1', webpage, 'wat id', group='id') return self.url_result('wat:%s' % wat_id, 'Wat') From eb7941e3e6e92bac26f5d21525fc8ac89c934abe Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 23 May 2016 01:34:08 +0800 Subject: [PATCH 0592/3599] [compat] Fix for XML with <!DOCTYPE> in Python 2.7 and 3.2 Such XML documents cause DeprecationWarning if python is run with `-W error` --- test/test_compat.py | 6 ++++++ youtube_dl/compat.py | 17 ++++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index 539b30540..f5317ac3e 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase): self.assertTrue(isinstance(doc.find('chinese').text, compat_str)) self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) + def test_compat_etree_fromstring_doctype(self): + xml = '''<?xml version="1.0"?> +<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd"> +<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>''' + compat_etree_fromstring(xml) + def test_struct_unpack(self): self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,)) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 1392361a1..06e5f3ff6 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -245,13 +245,20 @@ try: except ImportError: # Python 2.6 from xml.parsers.expat import ExpatError as compat_xml_parse_error + +etree = xml.etree.ElementTree + + +class _TreeBuilder(etree.TreeBuilder): + def doctype(self, name, pubid, system): + pass + if sys.version_info[0] >= 3: - compat_etree_fromstring = xml.etree.ElementTree.fromstring + def compat_etree_fromstring(text): + return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) else: # python 2.x tries to encode unicode strings with ascii (see the # XMLParser._fixtext method) - etree = xml.etree.ElementTree - try: _etree_iter = etree.Element.iter except AttributeError: # Python <=2.6 @@ -265,7 +272,7 @@ else: # 2.7 source def _XML(text, parser=None): if not parser: - parser = etree.XMLParser(target=etree.TreeBuilder()) + parser = etree.XMLParser(target=_TreeBuilder()) parser.feed(text) return parser.close() @@ -277,7 +284,7 @@ else: return el def compat_etree_fromstring(text): - doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) + doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory))) for el in _etree_iter(doc): if el.text is not None and isinstance(el.text, bytes): el.text = el.text.decode('utf-8') From 7a46542f97c99e47ad86707bf21628630c8d871e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 23 May 2016 01:38:00 +0800 Subject: [PATCH 0593/3599] [livestream] Video IDs should always be strings (#2234) --- youtube_dl/extractor/livestream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index eada7c299..0edc06c43 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -150,7 +150,7 @@ class LivestreamIE(InfoExtractor): } def _extract_stream_info(self, stream_info): - broadcast_id = stream_info['broadcast_id'] + broadcast_id = compat_str(stream_info['broadcast_id']) is_live = stream_info.get('is_live') formats = [] From 78d3b3e2137f6be75b64e9bbfdec88cb420a91d1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 23 May 2016 01:39:09 +0800 Subject: [PATCH 0594/3599] [generic] Improve Livestream detection (closes #2234) --- youtube_dl/extractor/generic.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index bb96e7231..303e112d2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -784,6 +784,19 @@ class GenericIE(InfoExtractor): 'title': 'Rosetta #CometLanding webcast HL 10', } }, + # Another Livestream embed, without 'new.' in URL + { + 'url': 'https://www.freespeech.org/', + 'info_dict': { + 'id': '123537347', + 'ext': 'mp4', + 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + # Live stream + 'skip_download': True, + }, + }, # LazyYT { 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', @@ -1878,7 +1891,7 @@ class GenericIE(InfoExtractor): return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast') mobj = re.search( - r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"', + r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"', webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'Livestream') From 102810ef0402834bd5d43e70a5e397f2a581a5dc Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Sun, 22 May 2016 20:36:23 +0100 Subject: [PATCH 0595/3599] [voxmedia] fix volume embed extraction --- youtube_dl/extractor/voxmedia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index 0c6b1f030..9d73600aa 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -117,7 +117,7 @@ class VoxMediaIE(InfoExtractor): volume_webpage = self._download_webpage( 'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid) video_data = self._parse_json(self._search_regex( - r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid) + r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid) for provider_video_type in ('ooyala', 'youtube'): provider_video_id = video_data.get('%s_id' % provider_video_type) if provider_video_id: From e54373204ab6c5be36823695a571680d9a641ba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 May 2016 03:44:04 +0600 Subject: [PATCH 0596/3599] [lifenews] Fix metadata extraction --- youtube_dl/extractor/lifenews.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index ba2f80a75..4b1fb9772 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -7,10 +7,10 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( determine_ext, - int_or_none, - remove_end, - unified_strdate, ExtractorError, + int_or_none, + parse_iso8601, + remove_end, ) @@ -28,7 +28,9 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'Мужчина нашел дома архив оборонного завода', 'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26', + 'timestamp': 1344154740, 'upload_date': '20120805', + 'view_count': int, } }, { # single video embedded via iframe @@ -39,7 +41,9 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ', 'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ', + 'timestamp': 1427961840, 'upload_date': '20150402', + 'view_count': int, } }, { # two videos embedded via iframe @@ -48,7 +52,8 @@ class LifeNewsIE(InfoExtractor): 'id': '153461', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве', 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', - 'upload_date': '20150505', + 'timestamp': 1430825520, + 'view_count': int, }, 'playlist': [{ 'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795', @@ -57,6 +62,7 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)', 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', + 'timestamp': 1430825520, 'upload_date': '20150505', }, }, { @@ -66,6 +72,7 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)', 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', + 'timestamp': 1430825520, 'upload_date': '20150505', }, }], @@ -100,21 +107,17 @@ class LifeNewsIE(InfoExtractor): description = self._og_search_description(webpage) view_count = self._html_search_regex( - r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False) - comment_count = self._html_search_regex( - r'=\'commentCount\'[^>]*>\s*(\d+)\s*<', - webpage, 'comment count', fatal=False) + r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>', + webpage, 'view count', fatal=False, group='value') - upload_date = self._html_search_regex( - r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False) - if upload_date is not None: - upload_date = unified_strdate(upload_date) + timestamp = parse_iso8601(self._search_regex( + r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1', + webpage, 'upload date', fatal=False, group='value')) common_info = { 'description': description, 'view_count': int_or_none(view_count), - 'comment_count': int_or_none(comment_count), - 'upload_date': upload_date, + 'timestamp': timestamp, } def make_entry(video_id, video_url, index=None): From 5181759c0d488f9fc30175f6aff4b8d4a236352d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 May 2016 04:00:08 +0600 Subject: [PATCH 0597/3599] [life] Update _VALID_URL --- youtube_dl/extractor/lifenews.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 4b1fb9772..d5d528a36 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -15,13 +15,13 @@ from ..utils import ( class LifeNewsIE(InfoExtractor): - IE_NAME = 'lifenews' - IE_DESC = 'LIFE | NEWS' - _VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)' + IE_NAME = 'life' + IE_DESC = 'Life.ru' + _VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)' _TESTS = [{ # single video embedded via video/source - 'url': 'http://lifenews.ru/news/98736', + 'url': 'https://life.ru/t/новости/98736', 'md5': '77c95eaefaca216e32a76a343ad89d23', 'info_dict': { 'id': '98736', @@ -34,7 +34,7 @@ class LifeNewsIE(InfoExtractor): } }, { # single video embedded via iframe - 'url': 'http://lifenews.ru/news/152125', + 'url': 'https://life.ru/t/новости/152125', 'md5': '77d19a6f0886cd76bdbf44b4d971a273', 'info_dict': { 'id': '152125', @@ -47,7 +47,7 @@ class LifeNewsIE(InfoExtractor): } }, { # two videos embedded via iframe - 'url': 'http://lifenews.ru/news/153461', + 'url': 'https://life.ru/t/новости/153461', 'info_dict': { 'id': '153461', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве', @@ -77,18 +77,20 @@ class LifeNewsIE(InfoExtractor): }, }], }, { - 'url': 'http://lifenews.ru/video/13035', + 'url': 'https://life.ru/t/новости/213035', + 'only_matching': True, + }, { + 'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461', + 'only_matching': True, + }, { + 'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil', 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - section = mobj.group('section') + video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://lifenews.ru/%s/%s' % (section, video_id), - video_id, 'Downloading page') + webpage = self._download_webpage(url, video_id) video_urls = re.findall( r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage) @@ -102,7 +104,7 @@ class LifeNewsIE(InfoExtractor): title = remove_end( self._og_search_title(webpage), - ' - Первый по срочным новостям — LIFE | NEWS') + ' - Life.ru') description = self._og_search_description(webpage) From 5db9df622fb45ba6fbb57ef4a2ad5f2da0236a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 May 2016 04:22:09 +0600 Subject: [PATCH 0598/3599] [life:embed] Use native hls --- youtube_dl/extractor/lifenews.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index d5d528a36..c2b4490c4 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -188,7 +188,8 @@ class LifeEmbedIE(InfoExtractor): ext = determine_ext(video_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id='m3u8')) + video_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='m3u8')) else: formats.append({ 'url': video_url, From 4b464a6a78749dfdc7c71fa932146403f18f6cb5 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 23 May 2016 00:47:22 +0100 Subject: [PATCH 0599/3599] [washingtonpost] improve format extraction and add support for video pages extraction --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/washingtonpost.py | 148 +++++++++++++++++-------- 2 files changed, 103 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c93cd2765..d0346714c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -941,7 +941,10 @@ from .vube import VubeIE from .vuclip import VuClipIE from .vulture import VultureIE from .walla import WallaIE -from .washingtonpost import WashingtonPostIE +from .washingtonpost import ( + WashingtonPostIE, + WashingtonPostArticleIE, +) from .wat import WatIE from .watchindianporn import WatchIndianPornIE from .wdr import ( diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py index ec8b99998..71349d487 100644 --- a/youtube_dl/extractor/washingtonpost.py +++ b/youtube_dl/extractor/washingtonpost.py @@ -11,7 +11,100 @@ from ..utils import ( class WashingtonPostIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])' + IE_NAME = 'washingtonpost' + _VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _TEST = { + 'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d', + 'md5': '6f537e1334b714eb15f9563bd4b9cdfa', + 'info_dict': { + 'id': '480ba4ee-1ec7-11e6-82c2-a7dcb313287d', + 'ext': 'mp4', + 'title': 'Egypt finds belongings, debris from plane crash', + 'description': 'md5:a17ceee432f215a5371388c1f680bd86', + 'upload_date': '20160520', + 'uploader': 'Reuters', + 'timestamp': 1463778452, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + 'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % video_id, + video_id, transform_source=strip_jsonp)[0]['contentConfig'] + title = video_data['title'] + + urls = [] + formats = [] + for s in video_data.get('streams', []): + s_url = s.get('url') + if not s_url or s_url in urls: + continue + urls.append(s_url) + video_type = s.get('type') + if video_type == 'smil': + continue + elif video_type in ('ts', 'hls'): + m3u8_formats = self._extract_m3u8_formats( + s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + for m3u8_format in m3u8_formats: + width = m3u8_format.get('width') + if not width: + continue + vbr = self._search_regex( + r'%d_%d_(\d+)' % (width, m3u8_format['height']), m3u8_format['url'], 'vbr', default=None) + if vbr: + m3u8_format.update({ + 'vbr': int_or_none(vbr), + }) + formats.extend(m3u8_formats) + else: + width = int_or_none(s.get('width')) + vbr = int_or_none(s.get('bitrate')) + has_width = width != 0 + formats.append({ + 'format_id': ( + '%s-%d-%d' % (video_type, width, vbr) + if width + else video_type), + 'vbr': vbr if has_width else None, + 'width': width, + 'height': int_or_none(s.get('height')), + 'acodec': s.get('audioCodec'), + 'vcodec': s.get('videoCodec') if has_width else 'none', + 'filesize': int_or_none(s.get('fileSize')), + 'url': s_url, + 'ext': 'mp4', + 'protocol': { + 'mp4': 'http', + 'ts': 'm3u8_native', + 'hls': 'm3u8_native', + }.get(s.get('type')), + }) + source_media_url = video_data.get('sourceMediaURL') + if source_media_url: + formats.append({ + 'format_id': 'source_media', + 'url': source_media_url, + }) + self._sort_formats( + formats, ('width', 'height', 'vbr', 'filesize', 'tbr', 'format_id')) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('blurb'), + 'uploader': video_data.get('credits', {}).get('source'), + 'formats': formats, + 'duration': int_or_none(video_data.get('videoDuration'), 100), + 'timestamp': int_or_none( + video_data.get('dateConfig', {}).get('dateFirstPublished'), 1000), + } + + +class WashingtonPostArticleIE(InfoExtractor): + IE_NAME = 'washingtonpost:article' + _VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/(?:[^/]+/)*(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/', 'info_dict': { @@ -63,6 +156,10 @@ class WashingtonPostIE(InfoExtractor): }] }] + @classmethod + def suitable(cls, url): + return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url) + def _real_extract(self, url): page_id = self._match_id(url) webpage = self._download_webpage(url, page_id) @@ -74,54 +171,7 @@ class WashingtonPostIE(InfoExtractor): <div\s+class="posttv-video-embed[^>]*?data-uuid=| data-video-uuid= )"([^"]+)"''', webpage) - entries = [] - for i, uuid in enumerate(uuids, start=1): - vinfo_all = self._download_json( - 'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid, - page_id, - transform_source=strip_jsonp, - note='Downloading information of video %d/%d' % (i, len(uuids)) - ) - vinfo = vinfo_all[0]['contentConfig'] - uploader = vinfo.get('credits', {}).get('source') - timestamp = int_or_none( - vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000) - - formats = [{ - 'format_id': ( - '%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate')) - if s.get('width') - else s.get('type')), - 'vbr': s.get('bitrate') if s.get('width') != 0 else None, - 'width': s.get('width'), - 'height': s.get('height'), - 'acodec': s.get('audioCodec'), - 'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none', - 'filesize': s.get('fileSize'), - 'url': s.get('url'), - 'ext': 'mp4', - 'preference': -100 if s.get('type') == 'smil' else None, - 'protocol': { - 'MP4': 'http', - 'F4F': 'f4m', - }.get(s.get('type')), - } for s in vinfo.get('streams', [])] - source_media_url = vinfo.get('sourceMediaURL') - if source_media_url: - formats.append({ - 'format_id': 'source_media', - 'url': source_media_url, - }) - self._sort_formats(formats) - entries.append({ - 'id': uuid, - 'title': vinfo['title'], - 'description': vinfo.get('blurb'), - 'uploader': uploader, - 'formats': formats, - 'duration': int_or_none(vinfo.get('videoDuration'), 100), - 'timestamp': timestamp, - }) + entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids] return { '_type': 'playlist', From 0c50eeb9870ec7d940c35c9cec52bfd35d009420 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 23 May 2016 02:27:31 +0100 Subject: [PATCH 0600/3599] [reuters] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/reuters.py | 69 ++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 youtube_dl/extractor/reuters.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d0346714c..d8b3170ba 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -630,6 +630,7 @@ from .rds import RDSIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .restudy import RestudyIE +from .reuters import ReutersIE from .reverbnation import ReverbNationIE from .revision3 import Revision3IE from .rice import RICEIE diff --git a/youtube_dl/extractor/reuters.py b/youtube_dl/extractor/reuters.py new file mode 100644 index 000000000..961d504eb --- /dev/null +++ b/youtube_dl/extractor/reuters.py @@ -0,0 +1,69 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + js_to_json, + int_or_none, + unescapeHTML, +) + + +class ReutersIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562', + 'md5': '8015113643a0b12838f160b0b81cc2ee', + 'info_dict': { + 'id': '368575562', + 'ext': 'mp4', + 'title': 'San Francisco police chief resigns', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id) + video_data = js_to_json(self._search_regex( + r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);', + webpage, 'video data')) + + def get_json_value(key, fatal=False): + return self._search_regex('"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal) + + title = unescapeHTML(get_json_value('title', fatal=True)) + mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups() + + mas_data = self._download_json( + 'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid), + video_id, transform_source=js_to_json) + formats = [] + for f in mas_data: + f_url = f.get('url') + if not f_url: + continue + method = f.get('method') + if method == 'hls': + formats.extend(self._extract_m3u8_formats( + f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + container = f.get('container') + ext = '3gp' if method == 'mobile' else container + formats.append({ + 'format_id': ext, + 'url': f_url, + 'ext': ext, + 'container': container if method != 'mobile' else None, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': get_json_value('thumb'), + 'duration': int_or_none(get_json_value('seconds')), + 'formats': formats, + } From b1e9ebd08087c7e591b55451551d51120b7eec9d Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 23 May 2016 02:30:12 +0100 Subject: [PATCH 0601/3599] [washingtonpost] remove unnecessary code --- youtube_dl/extractor/washingtonpost.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py index 71349d487..c2c5bae05 100644 --- a/youtube_dl/extractor/washingtonpost.py +++ b/youtube_dl/extractor/washingtonpost.py @@ -74,12 +74,6 @@ class WashingtonPostIE(InfoExtractor): 'vcodec': s.get('videoCodec') if has_width else 'none', 'filesize': int_or_none(s.get('fileSize')), 'url': s_url, - 'ext': 'mp4', - 'protocol': { - 'mp4': 'http', - 'ts': 'm3u8_native', - 'hls': 'm3u8_native', - }.get(s.get('type')), }) source_media_url = video_data.get('sourceMediaURL') if source_media_url: From 42a7439717610530b0f7c630ef0eecf1b0638475 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 23 May 2016 09:30:26 +0100 Subject: [PATCH 0602/3599] [cbs] allow to pass content id to the extractor(closes #9589) --- youtube_dl/extractor/cbs.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 051d783a2..ac2c7dced 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .theplatform import ThePlatformIE from ..utils import ( xpath_text, @@ -21,7 +23,7 @@ class CBSBaseIE(ThePlatformIE): class CBSIE(CBSBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)' + _VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))' _TESTS = [{ 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', @@ -66,11 +68,12 @@ class CBSIE(CBSBaseIE): TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - content_id = self._search_regex( - [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], - webpage, 'content id') + content_id, display_id = re.match(self._VALID_URL, url).groups() + if not content_id: + webpage = self._download_webpage(url, display_id) + content_id = self._search_regex( + [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], + webpage, 'content id') items_data = self._download_xml( 'http://can.cbs.com/thunder/player/videoPlayerService.php', content_id, query={'partner': 'cbs', 'contentId': content_id}) From 05b651e3a58081492eb35d896c80dd1bdb87081c Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 23 May 2016 13:04:50 +0100 Subject: [PATCH 0603/3599] [washingtonpost] reduce requests for m3u8 manifests --- youtube_dl/extractor/washingtonpost.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dl/extractor/washingtonpost.py index c2c5bae05..839cad986 100644 --- a/youtube_dl/extractor/washingtonpost.py +++ b/youtube_dl/extractor/washingtonpost.py @@ -44,7 +44,7 @@ class WashingtonPostIE(InfoExtractor): video_type = s.get('type') if video_type == 'smil': continue - elif video_type in ('ts', 'hls'): + elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url): m3u8_formats = self._extract_m3u8_formats( s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) for m3u8_format in m3u8_formats: @@ -74,6 +74,8 @@ class WashingtonPostIE(InfoExtractor): 'vcodec': s.get('videoCodec') if has_width else 'none', 'filesize': int_or_none(s.get('fileSize')), 'url': s_url, + 'ext': 'mp4', + 'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None, }) source_media_url = video_data.get('sourceMediaURL') if source_media_url: From e8593f346a4b1236d2a023eb3070610bf180459c Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 23 May 2016 23:58:16 +0100 Subject: [PATCH 0604/3599] [ooyala] extract subtitles --- youtube_dl/extractor/ooyala.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 95e982897..4c119071d 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -22,13 +22,7 @@ class OoyalaBaseIE(InfoExtractor): metadata = content_tree[list(content_tree)[0]] embed_code = metadata['embed_code'] pcode = metadata.get('asset_pcode') or embed_code - video_info = { - 'id': embed_code, - 'title': metadata['title'], - 'description': metadata.get('description'), - 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), - 'duration': float_or_none(metadata.get('duration'), 1000), - } + title = metadata['title'] urls = [] formats = [] @@ -78,8 +72,24 @@ class OoyalaBaseIE(InfoExtractor): self.IE_NAME, cur_auth_data['message']), expected=True) self._sort_formats(formats) - video_info['formats'] = formats - return video_info + subtitles = {} + for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items(): + sub_url = sub.get('url') + if not sub_url: + continue + subtitles[lang] = [{ + 'url': sub_url, + }] + + return { + 'id': embed_code, + 'title': title, + 'description': metadata.get('description'), + 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), + 'duration': float_or_none(metadata.get('duration'), 1000), + 'subtitles': subtitles, + 'formats': formats, + } class OoyalaIE(OoyalaBaseIE): From a4760d204fe4cd7592bdfc91cbf550eb985374ac Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 24 May 2016 00:22:29 +0100 Subject: [PATCH 0605/3599] [ooyala] use api v2 to reduce requests for format extraction --- youtube_dl/extractor/ooyala.py | 88 +++++++++++++++++----------------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 4c119071d..09bc291f0 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -15,7 +15,7 @@ from ..compat import compat_urllib_parse_urlencode class OoyalaBaseIE(InfoExtractor): _PLAYER_BASE = 'http://player.ooyala.com/' _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' - _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v1/authorization/embed_code/%s/%s?' + _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' def _extract(self, content_tree_url, video_id, domain='example.org'): content_tree = self._download_json(content_tree_url, video_id)['content_tree'] @@ -24,52 +24,50 @@ class OoyalaBaseIE(InfoExtractor): pcode = metadata.get('asset_pcode') or embed_code title = metadata['title'] + auth_data = self._download_json( + self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + + compat_urllib_parse_urlencode({ + 'domain': domain, + 'supportedFormats': 'mp4,rtmp,m3u8,hds', + }), video_id) + + cur_auth_data = auth_data['authorization_data'][embed_code] + urls = [] formats = [] - for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'): - auth_data = self._download_json( - self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + - compat_urllib_parse_urlencode({ - 'domain': domain, - 'supportedFormats': supported_format - }), - video_id, 'Downloading %s JSON' % supported_format) - - cur_auth_data = auth_data['authorization_data'][embed_code] - - if cur_auth_data['authorized']: - for stream in cur_auth_data['streams']: - url = base64.b64decode( - stream['url']['data'].encode('ascii')).decode('utf-8') - if url in urls: - continue - urls.append(url) - delivery_type = stream['delivery_type'] - if delivery_type == 'hls' or '.m3u8' in url: - formats.extend(self._extract_m3u8_formats( - url, embed_code, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif delivery_type == 'hds' or '.f4m' in url: - formats.extend(self._extract_f4m_formats( - url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) - elif '.smil' in url: - formats.extend(self._extract_smil_formats( - url, embed_code, fatal=False)) - else: - formats.append({ - 'url': url, - 'ext': stream.get('delivery_type'), - 'vcodec': stream.get('video_codec'), - 'format_id': delivery_type, - 'width': int_or_none(stream.get('width')), - 'height': int_or_none(stream.get('height')), - 'abr': int_or_none(stream.get('audio_bitrate')), - 'vbr': int_or_none(stream.get('video_bitrate')), - 'fps': float_or_none(stream.get('framerate')), - }) - else: - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, cur_auth_data['message']), expected=True) + if cur_auth_data['authorized']: + for stream in cur_auth_data['streams']: + url = base64.b64decode( + stream['url']['data'].encode('ascii')).decode('utf-8') + if url in urls: + continue + urls.append(url) + delivery_type = stream['delivery_type'] + if delivery_type == 'hls' or '.m3u8' in url: + formats.extend(self._extract_m3u8_formats( + url, embed_code, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif delivery_type == 'hds' or '.f4m' in url: + formats.extend(self._extract_f4m_formats( + url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) + elif '.smil' in url: + formats.extend(self._extract_smil_formats( + url, embed_code, fatal=False)) + else: + formats.append({ + 'url': url, + 'ext': stream.get('delivery_type'), + 'vcodec': stream.get('video_codec'), + 'format_id': delivery_type, + 'width': int_or_none(stream.get('width')), + 'height': int_or_none(stream.get('height')), + 'abr': int_or_none(stream.get('audio_bitrate')), + 'vbr': int_or_none(stream.get('video_bitrate')), + 'fps': float_or_none(stream.get('framerate')), + }) + else: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, cur_auth_data['message']), expected=True) self._sort_formats(formats) subtitles = {} From 25bcd3550ee67bb521173d7a43dbc91178a11cfc Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi <kagami@genshiken.org> Date: Tue, 24 May 2016 12:13:05 +0300 Subject: [PATCH 0606/3599] [vlive] Address site update Changes: * Fix video params extraction * Don't make status request since status info now available on the page * Remove unneeded code * Fix test --- youtube_dl/extractor/vlive.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index a672ea9c5..147f52d45 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -1,8 +1,7 @@ # coding: utf-8 -from __future__ import division, unicode_literals +from __future__ import unicode_literals import re -import time from .common import InfoExtractor from ..utils import ( @@ -23,7 +22,7 @@ class VLiveIE(InfoExtractor): 'info_dict': { 'id': '1326', 'ext': 'mp4', - 'title': "[V] Girl's Day's Broadcast", + 'title': "[V LIVE] Girl's Day's Broadcast", 'creator': "Girl's Day", 'view_count': int, }, @@ -35,24 +34,11 @@ class VLiveIE(InfoExtractor): webpage = self._download_webpage( 'http://www.vlive.tv/video/%s' % video_id, video_id) - # UTC+x - UTC+9 (KST) - tz = time.altzone if time.localtime().tm_isdst == 1 else time.timezone - tz_offset = -tz // 60 - 9 * 60 - self._set_cookie('vlive.tv', 'timezoneOffset', '%d' % tz_offset) - - status_params = self._download_json( - 'http://www.vlive.tv/video/status?videoSeq=%s' % video_id, - video_id, 'Downloading JSON status', - headers={'Referer': url.encode('utf-8')}) - status = status_params.get('status') - air_start = status_params.get('onAirStartAt', '') - is_live = status_params.get('isLive') - video_params = self._search_regex( - r'vlive\.tv\.video\.ajax\.request\.handler\.init\((.+)\)', + r'\bvlive\.video\.init\(([^)]+)\)', webpage, 'video params') - live_params, long_video_id, key = re.split( - r'"\s*,\s*"', video_params)[1:4] + status, _, _, live_params, long_video_id, key = re.split( + r'"\s*,\s*"', video_params)[2:8] if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR': live_params = self._parse_json('"%s"' % live_params, video_id) @@ -61,8 +47,6 @@ class VLiveIE(InfoExtractor): elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO': if long_video_id and key: return self._replay(video_id, webpage, long_video_id, key) - elif is_live: - status = 'LIVE_END' else: status = 'COMING_SOON' @@ -70,7 +54,7 @@ class VLiveIE(InfoExtractor): raise ExtractorError('Uploading for replay. Please wait...', expected=True) elif status == 'COMING_SOON': - raise ExtractorError('Coming soon! %s' % air_start, expected=True) + raise ExtractorError('Coming soon!', expected=True) elif status == 'CANCELED': raise ExtractorError('We are sorry, ' 'but the live broadcast has been canceled.', From 277c7465f58e0ac50de0dd9ebc2083f6142e9a94 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 24 May 2016 11:24:29 +0100 Subject: [PATCH 0607/3599] [ooyala] check manifest ext with determine_ext and update tests for related extractors --- youtube_dl/extractor/byutv.py | 5 ++--- youtube_dl/extractor/espn.py | 12 ++++-------- youtube_dl/extractor/formula1.py | 3 ++- youtube_dl/extractor/groupon.py | 7 +++---- youtube_dl/extractor/howcast.py | 7 ++----- youtube_dl/extractor/ooyala.py | 24 +++++++++++++----------- youtube_dl/extractor/teachingchannel.py | 6 ++---- youtube_dl/extractor/veoh.py | 1 + youtube_dl/extractor/vice.py | 3 +++ youtube_dl/extractor/voxmedia.py | 12 ++++++++---- 10 files changed, 40 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index dda98059e..54eb57b46 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)' _TEST = { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', + 'md5': '05850eb8c749e2ee05ad5a1c34668493', 'info_dict': { 'id': 'studio-c-season-5-episode-5', 'ext': 'mp4', @@ -19,9 +20,7 @@ class BYUtvIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 1486.486, }, - 'params': { - 'skip_download': True, - } + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index db4b263bc..e3575aed1 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -8,28 +8,24 @@ class ESPNIE(InfoExtractor): _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://espn.go.com/video/clip?id=10365079', + 'md5': '60e5d097a523e767d06479335d1bdc58', 'info_dict': { 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', 'ext': 'mp4', 'title': '30 for 30 Shorts: Judging Jewell', 'description': None, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'add_ie': ['OoyalaExternal'], }, { # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season 'url': 'http://espn.go.com/video/clip?id=2743663', + 'md5': 'f4ac89b59afc7e2d7dbb049523df6768', 'info_dict': { 'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', 'ext': 'mp4', 'title': 'Must-See Moments: Best of the MLS season', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'add_ie': ['OoyalaExternal'], }, { 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', 'only_matching': True, diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py index 726393fcc..322c41e5a 100644 --- a/youtube_dl/extractor/formula1.py +++ b/youtube_dl/extractor/formula1.py @@ -13,7 +13,8 @@ class Formula1IE(InfoExtractor): 'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', 'ext': 'flv', 'title': 'Race highlights - Spain 2016', - } + }, + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py index 1dd0a81cc..7bbb669c7 100644 --- a/youtube_dl/extractor/groupon.py +++ b/youtube_dl/extractor/groupon.py @@ -14,6 +14,7 @@ class GrouponIE(InfoExtractor): 'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors', }, 'playlist': [{ + 'md5': '42428ce8a00585f9bc36e49226eae7a1', 'info_dict': { 'id': 'fk6OhWpXgIQ', 'ext': 'mp4', @@ -24,10 +25,8 @@ class GrouponIE(InfoExtractor): 'uploader_id': 'groupon', 'uploader': 'Groupon', }, - }], - 'params': { - 'skip_download': True, - } + 'add_ie': ['Youtube'], + }] } _PROVIDERS = { diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py index e8f51e545..92caeb8f9 100644 --- a/youtube_dl/extractor/howcast.py +++ b/youtube_dl/extractor/howcast.py @@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)' _TEST = { 'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', - 'md5': '8b743df908c42f60cf6496586c7f12c3', + 'md5': '7d45932269a288149483144f01b99789', 'info_dict': { 'id': '390161', 'ext': 'mp4', @@ -18,10 +18,7 @@ class HowcastIE(InfoExtractor): 'upload_date': '20100609', 'duration': 56.823, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 09bc291f0..2038a6ba5 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -8,6 +8,7 @@ from ..utils import ( float_or_none, ExtractorError, unsmuggle_url, + determine_ext, ) from ..compat import compat_urllib_parse_urlencode @@ -37,26 +38,27 @@ class OoyalaBaseIE(InfoExtractor): formats = [] if cur_auth_data['authorized']: for stream in cur_auth_data['streams']: - url = base64.b64decode( + s_url = base64.b64decode( stream['url']['data'].encode('ascii')).decode('utf-8') - if url in urls: + if s_url in urls: continue - urls.append(url) + urls.append(s_url) + ext = determine_ext(s_url, None) delivery_type = stream['delivery_type'] - if delivery_type == 'hls' or '.m3u8' in url: + if delivery_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - url, embed_code, 'mp4', 'm3u8_native', + s_url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - elif delivery_type == 'hds' or '.f4m' in url: + elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( - url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) - elif '.smil' in url: + s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) + elif ext == 'smil': formats.extend(self._extract_smil_formats( - url, embed_code, fatal=False)) + s_url, embed_code, fatal=False)) else: formats.append({ - 'url': url, - 'ext': stream.get('delivery_type'), + 'url': s_url, + 'ext': ext or stream.get('delivery_type'), 'vcodec': stream.get('video_codec'), 'format_id': delivery_type, 'width': int_or_none(stream.get('width')), diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index e0477382c..e279280e9 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor): _TEST = { 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', + 'md5': '3d6361864d7cac20b57c8784da17166f', 'info_dict': { 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', 'ext': 'mp4', @@ -18,10 +19,7 @@ class TeachingChannelIE(InfoExtractor): 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', 'duration': 422.255, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'add_ie': ['Ooyala'], } def _real_extract(self, url): diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index 23ce0a0d1..0f5d68738 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -37,6 +37,7 @@ class VeohIE(InfoExtractor): 'uploader': 'afp-news', 'duration': 123, }, + 'skip': 'This video has been deleted.', }, { 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX', diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 95daf4dfd..e2b2ce098 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -11,12 +11,14 @@ class ViceIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.vice.com/video/cowboy-capitalists-part-1', + 'md5': 'e9d77741f9e42ba583e683cd170660f7', 'info_dict': { 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', 'ext': 'flv', 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', 'duration': 725.983, }, + 'add_ie': ['Ooyala'], }, { 'url': 'http://www.vice.com/video/how-to-hack-a-car', 'md5': '6fb2989a3fed069fb8eab3401fc2d3c9', @@ -29,6 +31,7 @@ class ViceIE(InfoExtractor): 'uploader': 'Motherboard', 'upload_date': '20140529', }, + 'add_ie': ['Youtube'], }, { 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', 'only_matching': True, diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index 9d73600aa..b1b32ad44 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -15,7 +15,8 @@ class VoxMediaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Google\'s new material design direction', 'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2', - } + }, + 'add_ie': ['Ooyala'], }, { # data-ooyala-id 'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet', @@ -25,7 +26,8 @@ class VoxMediaIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Nexus 6: hands-on with Google\'s phablet', 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af', - } + }, + 'add_ie': ['Ooyala'], }, { # volume embed 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', @@ -35,7 +37,8 @@ class VoxMediaIE(InfoExtractor): 'ext': 'mp4', 'title': 'The new frontier of LGBTQ civil rights, explained', 'description': 'md5:0dc58e94a465cbe91d02950f770eb93f', - } + }, + 'add_ie': ['Ooyala'], }, { # youtube embed 'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance', @@ -48,7 +51,8 @@ class VoxMediaIE(InfoExtractor): 'upload_date': '20160324', 'uploader_id': 'voxdotcom', 'uploader': 'Vox', - } + }, + 'add_ie': ['Youtube'], }, { # SBN.VideoLinkset.entryGroup multiple ooyala embeds 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', From 444417edb55a5bf471697a3b2353fdbfb6f7e26d Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 24 May 2016 15:58:27 +0100 Subject: [PATCH 0608/3599] [radiocanada] Add new extractor(#4020) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/radiocanada.py | 130 ++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 youtube_dl/extractor/radiocanada.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d8b3170ba..f9fed18f6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -617,6 +617,10 @@ from .qqmusic import ( QQMusicPlaylistIE, ) from .r7 import R7IE +from .radiocanada import ( + RadioCanadaIE, + RadioCanadaAudioVideoIE, +) from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py new file mode 100644 index 000000000..4f05bbddc --- /dev/null +++ b/youtube_dl/extractor/radiocanada.py @@ -0,0 +1,130 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + xpath_text, + find_xpath_attr, + determine_ext, + int_or_none, + unified_strdate, + xpath_element, + ExtractorError, +) + + +class RadioCanadaIE(InfoExtractor): + IE_NAME = 'radiocanada' + _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' + _TEST = { + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', + 'info_dict': { + 'id': '7184272', + 'ext': 'flv', + 'title': 'Le parcours du tireur capté sur vidéo', + 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', + 'upload_date': '20141023', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + app_code, video_id = re.match(self._VALID_URL, url).groups() + + formats = [] + # TODO: extract m3u8 and f4m formats + # m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements + # f4m formats can be extracted using flashhd device_type but they produce unplayable file + for device_type in ('flash',): + v_data = self._download_xml( + 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx', + video_id, note='Downloading %s XML' % device_type, query={ + 'appCode': app_code, + 'idMedia': video_id, + 'connectionType': 'broadband', + 'multibitrate': 'true', + 'deviceType': device_type, + # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction + 'paysJ391wsHjbOJwvCs26toz': 'CA', + 'bypasslock': 'NZt5K62gRqfc', + }) + v_url = xpath_text(v_data, 'url') + if not v_url: + continue + if v_url == 'null': + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, xpath_text(v_data, 'message')), expected=True) + ext = determine_ext(v_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + v_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False)) + else: + ext = determine_ext(v_url) + bitrates = xpath_element(v_data, 'bitrates') + for url_e in bitrates.findall('url'): + tbr = int_or_none(url_e.get('bitrate')) + if not tbr: + continue + formats.append({ + 'format_id': 'rtmp-%d' % tbr, + 'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url), + 'ext': 'flv', + 'protocol': 'rtmp', + 'width': int_or_none(url_e.get('width')), + 'height': int_or_none(url_e.get('height')), + 'tbr': tbr, + }) + self._sort_formats(formats) + + metadata = self._download_xml( + 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', + video_id, note='Downloading metadata XML', query={ + 'appCode': app_code, + 'idMedia': video_id, + }) + + def get_meta(name): + el = find_xpath_attr(metadata, './/Meta', 'name', name) + return el.text if el is not None else None + + return { + 'id': video_id, + 'title': get_meta('Title'), + 'description': get_meta('Description') or get_meta('ShortDescription'), + 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), + 'duration': int_or_none(get_meta('length')), + 'series': get_meta('Emission'), + 'season_number': int_or_none('SrcSaison'), + 'episode_number': int_or_none('SrcEpisode'), + 'upload_date': unified_strdate(get_meta('Date')), + 'formats': formats, + } + + +class RadioCanadaAudioVideoIE(InfoExtractor): + 'radiocanada:audiovideo' + _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', + 'info_dict': { + 'id': '7527184', + 'ext': 'flv', + 'title': 'Barack Obama au Vietnam', + 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', + 'upload_date': '20160523', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) From a4690b3244a42a833146c406e622c96045b23df5 Mon Sep 17 00:00:00 2001 From: mexican porn commits <xyz71412@laoeq.com> Date: Mon, 23 May 2016 16:32:39 -0500 Subject: [PATCH 0609/3599] [xhamster] url regex fix for videos with empty title. --- youtube_dl/extractor/xhamster.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index b3547174d..314e5020d 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -12,7 +12,7 @@ from ..utils import ( class XHamsterIE(InfoExtractor): - _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?' + _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?' _TESTS = [ { 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', @@ -38,6 +38,18 @@ class XHamsterIE(InfoExtractor): 'age_limit': 18, } }, + { + 'url': 'http://xhamster.com/movies/5667973/.html', + 'info_dict': { + 'id': '5667973', + 'ext': 'mp4', + 'title': '....', + 'upload_date': '20160208', + 'uploader': 'parejafree', + 'duration': 72.0, + 'age_limit': 18, + } + }, { 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', 'only_matching': True, @@ -170,7 +182,7 @@ class XHamsterEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id, + r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id, webpage, 'xhamster url', default=None) if not video_url: From 6b43132ce9ec7477d69d8ad9d5b868060679de95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 May 2016 21:38:27 +0600 Subject: [PATCH 0610/3599] [xhamster] Update tests --- youtube_dl/extractor/xhamster.py | 79 +++++++++++++++++--------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 314e5020d..bd8e1af2e 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -13,48 +13,51 @@ from ..utils import ( class XHamsterIE(InfoExtractor): _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?' - _TESTS = [ - { - 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', - 'info_dict': { - 'id': '1509445', - 'ext': 'mp4', - 'title': 'FemaleAgent Shy beauty takes the bait', - 'upload_date': '20121014', - 'uploader': 'Ruseful2011', - 'duration': 893.52, - 'age_limit': 18, - } + _TESTS = [{ + 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', + 'md5': '8281348b8d3c53d39fffb377d24eac4e', + 'info_dict': { + 'id': '1509445', + 'ext': 'mp4', + 'title': 'FemaleAgent Shy beauty takes the bait', + 'upload_date': '20121014', + 'uploader': 'Ruseful2011', + 'duration': 893.52, + 'age_limit': 18, }, - { - 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', - 'info_dict': { - 'id': '2221348', - 'ext': 'mp4', - 'title': 'Britney Spears Sexy Booty', - 'upload_date': '20130914', - 'uploader': 'jojo747400', - 'duration': 200.48, - 'age_limit': 18, - } + }, { + 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', + 'info_dict': { + 'id': '2221348', + 'ext': 'mp4', + 'title': 'Britney Spears Sexy Booty', + 'upload_date': '20130914', + 'uploader': 'jojo747400', + 'duration': 200.48, + 'age_limit': 18, }, - { - 'url': 'http://xhamster.com/movies/5667973/.html', - 'info_dict': { - 'id': '5667973', - 'ext': 'mp4', - 'title': '....', - 'upload_date': '20160208', - 'uploader': 'parejafree', - 'duration': 72.0, - 'age_limit': 18, - } + 'params': { + 'skip_download': True, }, - { - 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', - 'only_matching': True, + }, { + # empty seo + 'url': 'http://xhamster.com/movies/5667973/.html', + 'info_dict': { + 'id': '5667973', + 'ext': 'mp4', + 'title': '....', + 'upload_date': '20160208', + 'uploader': 'parejafree', + 'duration': 72.0, + 'age_limit': 18, }, - ] + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', + 'only_matching': True, + }] def _real_extract(self, url): def extract_video_url(webpage, name): From 0d6ee9750801045e45157f38d98ef2be0c6da4f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 May 2016 21:42:47 +0600 Subject: [PATCH 0611/3599] Credit @TRox1972 for tosh.cc (#9566) and localnews8 (#9539) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 5ca71ace7..3272fc6ea 100644 --- a/AUTHORS +++ b/AUTHORS @@ -172,3 +172,4 @@ blahgeek Kevin Deldycke inondle Tomáš Čech +Déstin Reed From 688c634b7d95a20c6081b202427a9e5fd7f36422 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 24 May 2016 16:42:22 +0100 Subject: [PATCH 0612/3599] skip some tests to reduce test time --- youtube_dl/extractor/byutv.py | 3 +++ youtube_dl/extractor/espn.py | 6 ++++++ youtube_dl/extractor/groupon.py | 5 ++++- youtube_dl/extractor/howcast.py | 3 +++ youtube_dl/extractor/teachingchannel.py | 3 +++ 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 54eb57b46..3aec601f8 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -20,6 +20,9 @@ class BYUtvIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 1486.486, }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['Ooyala'], } diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index e3575aed1..66c08bec4 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -15,6 +15,9 @@ class ESPNIE(InfoExtractor): 'title': '30 for 30 Shorts: Judging Jewell', 'description': None, }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['OoyalaExternal'], }, { # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season @@ -25,6 +28,9 @@ class ESPNIE(InfoExtractor): 'ext': 'mp4', 'title': 'Must-See Moments: Best of the MLS season', }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['OoyalaExternal'], }, { 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py index 7bbb669c7..a6da90931 100644 --- a/youtube_dl/extractor/groupon.py +++ b/youtube_dl/extractor/groupon.py @@ -26,7 +26,10 @@ class GrouponIE(InfoExtractor): 'uploader': 'Groupon', }, 'add_ie': ['Youtube'], - }] + }], + 'params': { + 'skip_download': True, + }, } _PROVIDERS = { diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py index 92caeb8f9..7e36b85ad 100644 --- a/youtube_dl/extractor/howcast.py +++ b/youtube_dl/extractor/howcast.py @@ -18,6 +18,9 @@ class HowcastIE(InfoExtractor): 'upload_date': '20100609', 'duration': 56.823, }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['Ooyala'], } diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index e279280e9..d14d93e3a 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -19,6 +19,9 @@ class TeachingChannelIE(InfoExtractor): 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', 'duration': 422.255, }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['Ooyala'], } From 1de32771e1d3f89ef2738883b304ce52a5ecf303 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 24 May 2016 20:10:12 +0100 Subject: [PATCH 0613/3599] [eyedotv] Add new extractor(closes #9582) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/eyedotv.py | 64 ++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 youtube_dl/extractor/eyedotv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f9fed18f6..05561149a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -231,6 +231,7 @@ from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE from .expotv import ExpoTVIE from .extremetube import ExtremeTubeIE +from .eyedotv import EyedoTVIE from .facebook import FacebookIE from .faz import FazIE from .fc2 import FC2IE diff --git a/youtube_dl/extractor/eyedotv.py b/youtube_dl/extractor/eyedotv.py new file mode 100644 index 000000000..2f3035147 --- /dev/null +++ b/youtube_dl/extractor/eyedotv.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + xpath_text, + parse_duration, + ExtractorError, +) + + +class EyedoTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301', + 'md5': 'ba14f17995cdfc20c36ba40e21bf73f7', + 'info_dict': { + 'id': '16301', + 'ext': 'mp4', + 'title': 'Journée du conseil scientifique de l\'Afnic 2015', + 'description': 'md5:4abe07293b2f73efc6e1c37028d58c98', + 'uploader': 'Afnic Live', + 'uploader_id': '8023', + } + } + _ROOT_URL = 'http://live.eyedo.net:1935/' + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id) + + def _add_ns(path): + return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api') + + title = xpath_text(video_data, _add_ns('Titre'), 'title', True) + state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True) + if state_live_code == 'avenir': + raise ExtractorError( + '%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME, + expected=True) + + is_live = state_live_code == 'live' + m3u8_url = None + # http://eyedo.tv/Content/Html5/Scripts/html5view.js + if is_live: + if xpath_text(video_data, 'Cdn') == 'true': + m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id + else: + m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id + else: + m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id) + + return { + 'id': video_id, + 'title': title, + 'formats': self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'), + 'description': xpath_text(video_data, _add_ns('Description')), + 'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))), + 'uploader': xpath_text(video_data, _add_ns('Createur')), + 'uploader_id': xpath_text(video_data, _add_ns('CreateurId')), + 'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')), + 'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')), + } From 4ee0b8afdb384ad3e2d65b6b0159a801ee73d26d Mon Sep 17 00:00:00 2001 From: wankerer <git@wanker.33mail.com> Date: Tue, 24 May 2016 10:18:36 -0700 Subject: [PATCH 0614/3599] [eporner] fix for the new URL layout Recently eporner slightly changed the URL layout, the ID that used to be digits only are now digits and letters, so youtube-dl falls back to the generic extractor that doesn't work. Fix the matching regex to allow letters in ID. [v2: added a test case] --- youtube_dl/extractor/eporner.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index e006921ec..581276694 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -11,8 +11,8 @@ from ..utils import ( class EpornerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)' + _TESTS = [{ 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', 'md5': '39d486f046212d8e1b911c52ab4691f8', 'info_dict': { @@ -23,8 +23,22 @@ class EpornerIE(InfoExtractor): 'duration': 1838, 'view_count': int, 'age_limit': 18, - } - } + }, + }, + # New (May 2016) URL layout + { + 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', + 'md5': '3469eeaa93b6967a34cdbdbb9d064b33', + 'info_dict': { + 'id': '3YRUtzMcWn0', + 'display_id': 'Star-Wars-XXX-Parody', + 'ext': 'mp4', + 'title': 'Star Wars XXX Parody', + 'duration': 361.0, + 'view_count': int, + 'age_limit': 18, + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 6f748df43ff3476e4dbd29c7464837ea63d78b2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 May 2016 20:51:17 +0600 Subject: [PATCH 0615/3599] [eporner] Make test only_matching --- youtube_dl/extractor/eporner.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index 581276694..ac5d0fe24 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -24,20 +24,10 @@ class EpornerIE(InfoExtractor): 'view_count': int, 'age_limit': 18, }, - }, - # New (May 2016) URL layout - { + }, { + # New (May 2016) URL layout 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', - 'md5': '3469eeaa93b6967a34cdbdbb9d064b33', - 'info_dict': { - 'id': '3YRUtzMcWn0', - 'display_id': 'Star-Wars-XXX-Parody', - 'ext': 'mp4', - 'title': 'Star Wars XXX Parody', - 'duration': 361.0, - 'view_count': int, - 'age_limit': 18, - }, + 'only_matching': True, }] def _real_extract(self, url): From 0a5685b26fae0940f14cb063a6e4fc6986f9c124 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 21:41:47 +0800 Subject: [PATCH 0616/3599] [common] Support non-bootstraped streams in f4m manifests Related: #9531 --- youtube_dl/extractor/common.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4bfa610c1..7eb7464ec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -987,7 +987,7 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True): + fatal=True, assume_f4mv2=False): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', @@ -1001,11 +1001,11 @@ class InfoExtractor(object): return self._parse_f4m_formats( manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id, - transform_source=transform_source, fatal=fatal) + transform_source=transform_source, fatal=fatal, assume_f4mv2=assume_f4mv2) def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True): + fatal=True, assume_f4mv2=False): # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: @@ -1029,8 +1029,13 @@ class InfoExtractor(object): 'base URL', default=None) if base_url: base_url = base_url.strip() + + bootstrap_info = xpath_text( + manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'], + 'bootstrap info', default=None) + for i, media_el in enumerate(media_nodes): - if manifest_version == '2.0': + if manifest_version == '2.0' or assume_f4mv2: media_url = media_el.attrib.get('href') or media_el.attrib.get('url') if not media_url: continue @@ -1050,7 +1055,7 @@ class InfoExtractor(object): formats.append({ 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), 'url': manifest_url, - 'ext': 'flv', + 'ext': 'flv' if bootstrap_info else None, 'tbr': tbr, 'width': int_or_none(media_el.attrib.get('width')), 'height': int_or_none(media_el.attrib.get('height')), From 85b0fe7d6442d4ddb056fb5a5d15e51e8a625ae7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 21:43:35 +0800 Subject: [PATCH 0617/3599] [playwire] Use _extract_f4m_formats Related: #9531 --- youtube_dl/extractor/playwire.py | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py index 6d138ef25..7580e4a85 100644 --- a/youtube_dl/extractor/playwire.py +++ b/youtube_dl/extractor/playwire.py @@ -4,9 +4,8 @@ import re from .common import InfoExtractor from ..utils import ( - xpath_text, + dict_get, float_or_none, - int_or_none, ) @@ -23,6 +22,7 @@ class PlaywireIE(InfoExtractor): 'duration': 145.94, }, }, { + # Multiple resolutions while bitrates missing 'url': 'http://cdn.playwire.com/11625/embed/85228.html', 'only_matching': True, }, { @@ -48,25 +48,10 @@ class PlaywireIE(InfoExtractor): thumbnail = content.get('poster') src = content['media']['f4m'] - f4m = self._download_xml(src, video_id) - base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True) - formats = [] - for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'): - media_url = media.get('url') - if not media_url: - continue - tbr = int_or_none(media.get('bitrate')) - width = int_or_none(media.get('width')) - height = int_or_none(media.get('height')) - f = { - 'url': '%s/%s' % (base_url, media.attrib['url']), - 'tbr': tbr, - 'width': width, - 'height': height, - } - if not (tbr or width or height): - f['quality'] = 1 if '-hd.' in media_url else 0 - formats.append(f) + formats = self._extract_f4m_formats(src, video_id, assume_f4mv2=True) + for a_format in formats: + if not dict_get(a_format, ['tbr', 'width', 'height']): + a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 self._sort_formats(formats) return { From 240b60453e1237473dfd8deff40c9dc54661668c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 21:55:43 +0800 Subject: [PATCH 0618/3599] [common] Support m3u8 in f4m manifests Related: #9531 --- youtube_dl/extractor/common.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7eb7464ec..b5bea5904 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -987,7 +987,7 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, assume_f4mv2=False): + fatal=True, assume_f4mv2=False, m3u8_id=None): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', @@ -1001,11 +1001,12 @@ class InfoExtractor(object): return self._parse_f4m_formats( manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id, - transform_source=transform_source, fatal=fatal, assume_f4mv2=assume_f4mv2) + transform_source=transform_source, fatal=fatal, assume_f4mv2=assume_f4mv2, + m3u8_id=m3u8_id) def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, assume_f4mv2=False): + fatal=True, assume_f4mv2=False, m3u8_id=None): # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: @@ -1046,11 +1047,17 @@ class InfoExtractor(object): # since bitrates in parent manifest (this one) and media_url manifest # may differ leading to inability to resolve the format by requested # bitrate in f4m downloader - if determine_ext(manifest_url) == 'f4m': + ext = determine_ext(manifest_url) + if ext == 'f4m': formats.extend(self._extract_f4m_formats( manifest_url, video_id, preference=preference, f4m_id=f4m_id, transform_source=transform_source, fatal=fatal)) continue + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', preference=preference, + m3u8_id=m3u8_id, fatal=False)) + continue tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), From 761052db922a525d6ccaf250f9914841c9d3d66f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 21:57:06 +0800 Subject: [PATCH 0619/3599] [playwire] Add the test (closed #9531) --- youtube_dl/extractor/playwire.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py index 7580e4a85..2ee5c5aa3 100644 --- a/youtube_dl/extractor/playwire.py +++ b/youtube_dl/extractor/playwire.py @@ -21,6 +21,18 @@ class PlaywireIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.png$', 'duration': 145.94, }, + }, { + # m3u8 in f4m + 'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json', + 'info_dict': { + 'id': '4840492', + 'ext': 'mp4', + 'title': 'ITV EL SHOW FULL', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { # Multiple resolutions while bitrates missing 'url': 'http://cdn.playwire.com/11625/embed/85228.html', @@ -48,7 +60,7 @@ class PlaywireIE(InfoExtractor): thumbnail = content.get('poster') src = content['media']['f4m'] - formats = self._extract_f4m_formats(src, video_id, assume_f4mv2=True) + formats = self._extract_f4m_formats(src, video_id, assume_f4mv2=True, m3u8_id='hls') for a_format in formats: if not dict_get(a_format, ['tbr', 'width', 'height']): a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 From 5950cb1d6d8d27f7a7272895100da9652212fad6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 22:44:00 +0800 Subject: [PATCH 0620/3599] [utils] Support a new form of date Found in dw.com (#9475) --- youtube_dl/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d65f5e833..316a307e0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1035,6 +1035,7 @@ def unified_strdate(date_str, day_first=True): format_expressions.extend([ '%d-%m-%Y', '%d.%m.%Y', + '%d.%m.%y', '%d/%m/%Y', '%d/%m/%y', '%d/%m/%Y %H:%M:%S', @@ -1049,6 +1050,8 @@ def unified_strdate(date_str, day_first=True): ]) for expression in format_expressions: try: + print(expression) + print(date_str) upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') except ValueError: pass @@ -1910,7 +1913,7 @@ def parse_age_limit(s): def strip_jsonp(code): return re.sub( - r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code) + r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code) def js_to_json(code): From ac88d2316ebef5b00cf5c94d94f01c9f7e17ce51 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 22:48:47 +0800 Subject: [PATCH 0621/3599] [dw] Support documentaries (closes #9475) --- youtube_dl/extractor/dw.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dw.py b/youtube_dl/extractor/dw.py index ae7c571bd..0f0f0b8d3 100644 --- a/youtube_dl/extractor/dw.py +++ b/youtube_dl/extractor/dw.py @@ -2,13 +2,16 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + unified_strdate, +) from ..compat import compat_urlparse class DWIE(InfoExtractor): IE_NAME = 'dw' - _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+av-(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)' _TESTS = [{ # video 'url': 'http://www.dw.com/en/intelligent-light/av-19112290', @@ -31,6 +34,16 @@ class DWIE(InfoExtractor): 'description': 'md5:bc9ca6e4e063361e21c920c53af12405', 'upload_date': '20160311', } + }, { + 'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798', + 'md5': '56b6214ef463bfb9a3b71aeb886f3cf1', + 'info_dict': { + 'id': '19274438', + 'ext': 'mp4', + 'title': 'Welcome to the 90s – Hip Hop', + 'description': 'Welcome to the 90s - The Golden Decade of Hip Hop', + 'upload_date': '20160521', + }, }] def _real_extract(self, url): @@ -38,6 +51,7 @@ class DWIE(InfoExtractor): webpage = self._download_webpage(url, media_id) hidden_inputs = self._hidden_inputs(webpage) title = hidden_inputs['media_title'] + media_id = hidden_inputs.get('media_id') or media_id if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1': formats = self._extract_smil_formats( @@ -49,13 +63,20 @@ class DWIE(InfoExtractor): else: formats = [{'url': hidden_inputs['file_name']}] + upload_date = hidden_inputs.get('display_date') + if not upload_date: + upload_date = self._html_search_regex( + r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage, + 'upload date', default=None) + upload_date = unified_strdate(upload_date) + return { 'id': media_id, 'title': title, 'description': self._og_search_description(webpage), 'thumbnail': hidden_inputs.get('preview_image'), 'duration': int_or_none(hidden_inputs.get('file_duration')), - 'upload_date': hidden_inputs.get('display_date'), + 'upload_date': upload_date, 'formats': formats, } From 293c2556886c34d11919eb0af6760c52bd6a2632 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 22:54:16 +0800 Subject: [PATCH 0622/3599] [utils] Remove debugging codes --- youtube_dl/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 316a307e0..cfb2d1bf5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1050,8 +1050,6 @@ def unified_strdate(date_str, day_first=True): ]) for expression in format_expressions: try: - print(expression) - print(date_str) upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') except ValueError: pass From 448bb5f333c6c4c8084e479e1035ff674e4f8fd4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 27 May 2016 00:03:03 +0800 Subject: [PATCH 0623/3599] [common] Fix non-bootstrapped support in f4m --- youtube_dl/extractor/common.py | 19 +++++++++++++------ youtube_dl/extractor/playwire.py | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b5bea5904..e53b7ad64 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -987,7 +987,7 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, assume_f4mv2=False, m3u8_id=None): + fatal=True, m3u8_id=None): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', @@ -1001,12 +1001,11 @@ class InfoExtractor(object): return self._parse_f4m_formats( manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id, - transform_source=transform_source, fatal=fatal, assume_f4mv2=assume_f4mv2, - m3u8_id=m3u8_id) + transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id) def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), - fatal=True, assume_f4mv2=False, m3u8_id=None): + fatal=True, m3u8_id=None): # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: @@ -1036,8 +1035,16 @@ class InfoExtractor(object): 'bootstrap info', default=None) for i, media_el in enumerate(media_nodes): - if manifest_version == '2.0' or assume_f4mv2: - media_url = media_el.attrib.get('href') or media_el.attrib.get('url') + # If <bootstrapInfo> is present, the specified f4m is a + # stream-level manifest, and only set-level manifests may refer to + # external resources. See section 11.4 and section 4 of F4M spec + if bootstrap_info is None: + media_url = None + # @href is introduced in 2.0, see section 11.6 of F4M spec + if manifest_version == '2.0': + media_url = media_el.attrib.get('href') + if media_url is None: + media_url = media_el.attrib.get('url') if not media_url: continue manifest_url = ( diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py index 2ee5c5aa3..0bc743118 100644 --- a/youtube_dl/extractor/playwire.py +++ b/youtube_dl/extractor/playwire.py @@ -60,7 +60,7 @@ class PlaywireIE(InfoExtractor): thumbnail = content.get('poster') src = content['media']['f4m'] - formats = self._extract_f4m_formats(src, video_id, assume_f4mv2=True, m3u8_id='hls') + formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls') for a_format in formats: if not dict_get(a_format, ['tbr', 'width', 'height']): a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 From 6f8cb2421948fd128b3004fde7eebaa2463f5f06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 26 May 2016 22:21:55 +0600 Subject: [PATCH 0624/3599] [tvp] Expand _VALID_URL and improve naming (Closes #9602) --- youtube_dl/extractor/extractors.py | 5 +++- youtube_dl/extractor/tvp.py | 47 ++++++++++++++++-------------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 05561149a..ddf62139e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -833,7 +833,10 @@ from .tvc import ( ) from .tvigle import TvigleIE from .tvland import TVLandIE -from .tvp import TvpIE, TvpSeriesIE +from .tvp import ( + TVPIE, + TVPSeriesIE, +) from .tvplay import TVPlayIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index f57d609d4..a4997cb89 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re @@ -6,20 +6,13 @@ import re from .common import InfoExtractor -class TvpIE(InfoExtractor): - IE_NAME = 'tvp.pl' - _VALID_URL = r'https?://(?:vod|www)\.tvp\.pl/.*/(?P<id>\d+)$' +class TVPIE(InfoExtractor): + IE_NAME = 'tvp' + IE_DESC = 'Telewizja Polska' + _VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)' _TESTS = [{ - 'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035', - 'md5': 'cdd98303338b8a7f7abab5cd14092bf2', - 'info_dict': { - 'id': '4278035', - 'ext': 'wmv', - 'title': 'Ogniem i mieczem, odc. 2', - }, - }, { - 'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536', + 'url': 'http://vod.tvp.pl/194536/i-seria-odc-13', 'md5': '8aa518c15e5cc32dfe8db400dc921fbb', 'info_dict': { 'id': '194536', @@ -36,12 +29,22 @@ class TvpIE(InfoExtractor): }, }, { 'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272', - 'md5': 'c3b15ed1af288131115ff17a17c19dda', - 'info_dict': { - 'id': '17834272', - 'ext': 'mp4', - 'title': 'Na sygnale, odc. 39', - }, + 'only_matching': True, + }, { + 'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200', + 'only_matching': True, + }, { + 'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa', + 'only_matching': True, + }, { + 'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach', + 'only_matching': True, + }, { + 'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum', + 'only_matching': True, + }, { + 'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji', + 'only_matching': True, }] def _real_extract(self, url): @@ -92,8 +95,8 @@ class TvpIE(InfoExtractor): } -class TvpSeriesIE(InfoExtractor): - IE_NAME = 'tvp.pl:Series' +class TVPSeriesIE(InfoExtractor): + IE_NAME = 'tvp:series' _VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$' _TESTS = [{ @@ -127,7 +130,7 @@ class TvpSeriesIE(InfoExtractor): videos_paths = re.findall( '(?s)class="shortTitle">.*?href="(/[^"]+)', playlist) entries = [ - self.url_result('http://vod.tvp.pl%s' % v_path, ie=TvpIE.ie_key()) + self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key()) for v_path in videos_paths] return { From fac2af3c51c92b7f9abc4f229bc9351e8a301b29 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 27 May 2016 01:41:27 +0800 Subject: [PATCH 0625/3599] [common] Fix m3u8 extraction in f4m manifests --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e53b7ad64..0029c3694 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1063,7 +1063,7 @@ class InfoExtractor(object): elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( manifest_url, video_id, 'mp4', preference=preference, - m3u8_id=m3u8_id, fatal=False)) + m3u8_id=m3u8_id, fatal=fatal)) continue tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ From 3874e6ea66c738910c6a1065b2d781e04a8143ae Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister <boris-code@gmx.com> Date: Thu, 26 May 2016 16:45:14 +0200 Subject: [PATCH 0626/3599] [WDR] use single quotes for strings --- youtube_dl/extractor/wdr.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index ec81f1a28..05bfe7deb 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -17,7 +17,7 @@ from ..utils import ( class WDRIE(InfoExtractor): _CURRENT_MAUS_URL = r'https?://www.wdrmaus.de/aktuelle-sendung/(wdr|index).php5' _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' - _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + "|" + _CURRENT_MAUS_URL + _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)' @@ -116,23 +116,23 @@ class WDRIE(InfoExtractor): json_data = self._search_regex(r'\(({.*})\)', js_data, 'json') metadata = self._parse_json(json_data, display_id) - metadata_tracker_data = metadata["trackerData"] - metadata_media_resource = metadata["mediaResource"] + metadata_tracker_data = metadata['trackerData'] + metadata_media_resource = metadata['mediaResource'] formats = [] # check if the metadata contains a direct URL to a file - metadata_media_alt = metadata_media_resource.get("alt") + metadata_media_alt = metadata_media_resource.get('alt') if metadata_media_alt: - for tag_name in ["videoURL", 'audioURL']: + for tag_name in ['videoURL', 'audioURL']: if tag_name in metadata_media_alt: formats.append({ 'url': metadata_media_alt[tag_name] }) # check if there are flash-streams for this video - if "dflt" in metadata_media_resource and "videoURL" in metadata_media_resource["dflt"]: - video_url = metadata_media_resource["dflt"]["videoURL"] + if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']: + video_url = metadata_media_resource['dflt']['videoURL'] if video_url.endswith('.f4m'): full_video_url = video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' formats.extend(self._extract_f4m_formats(full_video_url, display_id, f4m_id='hds', fatal=False)) @@ -140,13 +140,13 @@ class WDRIE(InfoExtractor): formats.extend(self._extract_smil_formats(video_url, 'stream', fatal=False)) subtitles = {} - caption_url = metadata_media_resource.get("captionURL") + caption_url = metadata_media_resource.get('captionURL') if caption_url: subtitles['de'] = [{ 'url': caption_url }] - title = metadata_tracker_data.get("trackerClipTitle") + title = metadata_tracker_data.get('trackerClipTitle') is_live = url_type == 'live' if is_live: @@ -163,13 +163,13 @@ class WDRIE(InfoExtractor): self._sort_formats(formats) return { - 'id': metadata_tracker_data.get("trackerClipId", display_id), + 'id': metadata_tracker_data.get('trackerClipId', display_id), 'display_id': display_id, 'title': title, - 'alt_title': metadata_tracker_data.get("trackerClipSubcategory"), + 'alt_title': metadata_tracker_data.get('trackerClipSubcategory'), 'formats': formats, 'upload_date': upload_date, - 'description': self._html_search_meta("Description", webpage), + 'description': self._html_search_meta('Description', webpage), 'is_live': is_live, 'subtitles': subtitles, } From 37f972954da0d0f1f0c5e97da8357c4baf687ee6 Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister <boris-code@gmx.com> Date: Thu, 26 May 2016 16:59:45 +0200 Subject: [PATCH 0627/3599] [WDR] use _download_json with a strip_jsonp --- youtube_dl/extractor/wdr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 05bfe7deb..73a343c69 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -9,6 +9,7 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + strip_jsonp, unified_strdate, ExtractorError, ) @@ -112,9 +113,8 @@ class WDRIE(InfoExtractor): raise ExtractorError('No downloadable streams found', expected=True) - js_data = self._download_webpage(js_url, 'metadata') - json_data = self._search_regex(r'\(({.*})\)', js_data, 'json') - metadata = self._parse_json(json_data, display_id) + metadata = self._download_json( + js_url, 'metadata', transform_source=strip_jsonp) metadata_tracker_data = metadata['trackerData'] metadata_media_resource = metadata['mediaResource'] From bec2c14f2cf4f06f1b99e04d59779d8d103d726a Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister <boris-code@gmx.com> Date: Thu, 26 May 2016 17:30:38 +0200 Subject: [PATCH 0628/3599] [WDR] add special handling if alt-url is a m3u8 --- youtube_dl/extractor/wdr.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 73a343c69..fddcbf190 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -9,6 +9,7 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + determine_ext, strip_jsonp, unified_strdate, ExtractorError, @@ -61,7 +62,7 @@ class WDRIE(InfoExtractor): 'url': 'http://www1.wdr.de/mediathek/video/live/index.html', 'info_dict': { 'id': 'mdb-103364', - 'ext': 'flv', + 'ext': 'mp4', 'display_id': 'index', 'title': r're:^WDR Fernsehen im Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'alt_title': 'WDR Fernsehen Live', @@ -69,7 +70,10 @@ class WDRIE(InfoExtractor): 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', 'is_live': True, 'subtitles': {} - } + }, + 'params': { + 'skip_download': True, # m3u8 download + }, }, { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html', @@ -126,9 +130,16 @@ class WDRIE(InfoExtractor): if metadata_media_alt: for tag_name in ['videoURL', 'audioURL']: if tag_name in metadata_media_alt: - formats.append({ - 'url': metadata_media_alt[tag_name] - }) + alt_url = metadata_media_alt[tag_name] + if determine_ext(alt_url) == 'm3u8': + m3u_fmt = self._extract_m3u8_formats( + alt_url, display_id, 'mp4', 'm3u8_native', + m3u8_id='hls') + formats.extend(m3u_fmt) + else: + formats.append({ + 'url': alt_url + }) # check if there are flash-streams for this video if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']: From 33a1ff7113d9dd656b3c56cb404de85646caa559 Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister <boris-code@gmx.com> Date: Thu, 26 May 2016 19:08:12 +0200 Subject: [PATCH 0629/3599] [WDR] extract jsonp-url by parsing data-extension of mediaLink --- youtube_dl/extractor/wdr.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index fddcbf190..dd107ef8a 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -10,6 +10,7 @@ from ..compat import ( ) from ..utils import ( determine_ext, + js_to_json, strip_jsonp, unified_strdate, ExtractorError, @@ -21,8 +22,6 @@ class WDRIE(InfoExtractor): _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL - _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)' - _TESTS = [ { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html', @@ -102,9 +101,13 @@ class WDRIE(InfoExtractor): display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id) - js_url = self._search_regex(self._JS_URL_REGEX, webpage, 'js_url', default=None) + # for wdr.de the data-extension is in a tag with the class "mediaLink" + # for wdrmaus its in a link to the page in a multiline "videoLink"-tag + json_metadata = self._html_search_regex( + r'class=(?:"mediaLink\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', + webpage, 'media link', default=None, flags=re.MULTILINE) - if not js_url: + if not json_metadata: entries = [ self.url_result(page_url + href[0], 'WDR') for href in re.findall( @@ -117,8 +120,12 @@ class WDRIE(InfoExtractor): raise ExtractorError('No downloadable streams found', expected=True) + media_link_obj = self._parse_json(json_metadata, display_id, + transform_source=js_to_json) + jsonp_url = media_link_obj['mediaObj']['url'] + metadata = self._download_json( - js_url, 'metadata', transform_source=strip_jsonp) + jsonp_url, 'metadata', transform_source=strip_jsonp) metadata_tracker_data = metadata['trackerData'] metadata_media_resource = metadata['mediaResource'] From 949fc42e009aed5414caad280d0dc551ffcd9c14 Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister <boris-code@gmx.com> Date: Thu, 26 May 2016 19:58:55 +0200 Subject: [PATCH 0630/3599] [WDR] the other wdrmaus.de pages also changed to the new player --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/wdr.py | 89 +++++------------------------- 2 files changed, 15 insertions(+), 75 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6de3438fc..023598130 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -923,7 +923,6 @@ from .wat import WatIE from .wdr import ( WDRIE, WDRMobileIE, - WDRMausIE, ) from .webofstories import ( WebOfStoriesIE, diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index dd107ef8a..1af1e996d 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -4,10 +4,6 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) from ..utils import ( determine_ext, js_to_json, @@ -18,7 +14,7 @@ from ..utils import ( class WDRIE(InfoExtractor): - _CURRENT_MAUS_URL = r'https?://www.wdrmaus.de/aktuelle-sendung/(wdr|index).php5' + _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL @@ -92,6 +88,20 @@ class WDRIE(InfoExtractor): }, 'skip': 'The id changes from week to week because of the new episode' }, + { + 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', + 'md5': 'ca365705551e4bd5217490f3b0591290', + 'info_dict': { + 'id': 'mdb-186083', + 'ext': 'flv', + 'upload_date': '20130919', + 'title': 'Sachgeschichte - Achterbahn ', + 'description': '- Die Sendung mit der Maus -', + }, + 'params': { + 'skip_download': True, # the file has different versions :( + }, + }, ] def _real_extract(self, url): @@ -222,72 +232,3 @@ class WDRMobileIE(InfoExtractor): 'User-Agent': 'mobile', }, } - - -class WDRMausIE(InfoExtractor): - _VALID_URL = 'https?://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)((?<!index)\.php5|/(?:$|[?#]))' - IE_DESC = 'Sendung mit der Maus' - _TESTS = [{ - 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', - 'md5': '178b432d002162a14ccb3e0876741095', - 'info_dict': { - 'id': 'achterbahn', - 'ext': 'mp4', - 'thumbnail': 're:^http://.+\.jpg', - 'upload_date': '20131001', - 'title': '19.09.2013 - Achterbahn', - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - param_code = self._html_search_regex( - r'<a href="\?startVideo=1&([^"]+)"', webpage, 'parameters') - - title_date = self._search_regex( - r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>', - webpage, 'air date') - title_str = self._html_search_regex( - r'<h1>(.*?)</h1>', webpage, 'title') - title = '%s - %s' % (title_date, title_str) - upload_date = unified_strdate( - self._html_search_meta('dc.date', webpage)) - - fields = compat_parse_qs(param_code) - video_url = fields['firstVideo'][0] - thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0]) - - formats = [{ - 'format_id': 'rtmp', - 'url': video_url, - }] - - jscode = self._download_webpage( - 'http://www.wdrmaus.de/codebase/js/extended-medien.min.js', - video_id, fatal=False, - note='Downloading URL translation table', - errnote='Could not download URL translation table') - if jscode: - for m in re.finditer( - r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}", - jscode): - if video_url.startswith(m.group('stream')): - http_url = video_url.replace( - m.group('stream'), m.group('dl')) - formats.append({ - 'format_id': 'http', - 'url': http_url, - }) - break - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - } From 3a686853e1739dfc26548cdc09fe89e693e76a9f Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister <boris-code@gmx.com> Date: Thu, 26 May 2016 20:16:33 +0200 Subject: [PATCH 0631/3599] [WDR] fixed parsing of playlists --- youtube_dl/extractor/wdr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 1af1e996d..1e729cb7c 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -72,7 +72,7 @@ class WDRIE(InfoExtractor): }, { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html', - 'playlist_mincount': 10, + 'playlist_mincount': 8, 'info_dict': { 'id': 'aktuelle-stunde/aktuelle-stunde-120', }, @@ -121,7 +121,7 @@ class WDRIE(InfoExtractor): entries = [ self.url_result(page_url + href[0], 'WDR') for href in re.findall( - r'<a href="(%s)"' % self._PAGE_REGEX, + r'<a href="(%s)"[^>]+data-extension=' % self._PAGE_REGEX, webpage) ] From 2615fa758422deaaf11049e71f0c183e655c0b76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 27 May 2016 01:46:12 +0600 Subject: [PATCH 0632/3599] [downloader/f4m] Simply select format when it's the only one --- youtube_dl/downloader/f4m.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 314def4cb..8f88b0241 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -319,7 +319,7 @@ class F4mFD(FragmentFD): doc = compat_etree_fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) for f in self._get_unencrypted_media(doc)] - if requested_bitrate is None: + if requested_bitrate is None or len(formats) == 1: # get the best format formats = sorted(formats, key=lambda f: f[0]) rate, media = formats[-1] From 77b8b4e696dd5ffb1330a2de328eb9c3ecd09a15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 27 May 2016 01:47:44 +0600 Subject: [PATCH 0633/3599] [extractor/common] Borrow quality metadata from parent set-level manifest for f4m --- youtube_dl/extractor/common.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 0029c3694..57793537b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1035,6 +1035,10 @@ class InfoExtractor(object): 'bootstrap info', default=None) for i, media_el in enumerate(media_nodes): + tbr = int_or_none(media_el.attrib.get('bitrate')) + width = int_or_none(media_el.attrib.get('width')) + height = int_or_none(media_el.attrib.get('height')) + format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])) # If <bootstrapInfo> is present, the specified f4m is a # stream-level manifest, and only set-level manifests may refer to # external resources. See section 11.4 and section 4 of F4M spec @@ -1056,23 +1060,35 @@ class InfoExtractor(object): # bitrate in f4m downloader ext = determine_ext(manifest_url) if ext == 'f4m': - formats.extend(self._extract_f4m_formats( + f4m_formats = self._extract_f4m_formats( manifest_url, video_id, preference=preference, f4m_id=f4m_id, - transform_source=transform_source, fatal=fatal)) + transform_source=transform_source, fatal=fatal) + # Sometimes stream-level manifest contains single media entry that + # does not contain any quality metadata (e.g. http://matchtv.ru/#live-player). + # At the same time parent's media entry in set-level manifest may + # contain it. We will copy it from parent in such cases. + if len(f4m_formats) == 1: + f = f4m_formats[0] + f.update({ + 'tbr': f.get('tbr') or tbr, + 'width': f.get('width') or width, + 'height': f.get('height') or height, + 'format_id': f.get('format_id') if not tbr else format_id, + }) + formats.extend(f4m_formats) continue elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( manifest_url, video_id, 'mp4', preference=preference, m3u8_id=m3u8_id, fatal=fatal)) continue - tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ - 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), + 'format_id': format_id, 'url': manifest_url, 'ext': 'flv' if bootstrap_info else None, 'tbr': tbr, - 'width': int_or_none(media_el.attrib.get('width')), - 'height': int_or_none(media_el.attrib.get('height')), + 'width': width, + 'height': height, 'preference': preference, }) return formats From f36532404dedb08f103083fba931864927de369d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 27 May 2016 22:19:10 +0600 Subject: [PATCH 0634/3599] [vk] Remove superfluous code --- youtube_dl/extractor/vk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 041d93629..79c819bc3 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -217,7 +217,6 @@ class VKIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('videoid') - info_url = url if video_id: info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id # Some videos (removed?) can only be downloaded with list id specified From 11c70deba792de58c64c82d96ffcfdf295483b84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 27 May 2016 23:34:58 +0600 Subject: [PATCH 0635/3599] [coub] Add extractor (Closes #9609) --- youtube_dl/extractor/coub.py | 139 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 140 insertions(+) create mode 100644 youtube_dl/extractor/coub.py diff --git a/youtube_dl/extractor/coub.py b/youtube_dl/extractor/coub.py new file mode 100644 index 000000000..c3b09b177 --- /dev/null +++ b/youtube_dl/extractor/coub.py @@ -0,0 +1,139 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + parse_iso8601, + qualities, +) + + +class CoubIE(InfoExtractor): + _VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)' + + _TESTS = [{ + 'url': 'http://coub.com/view/5u5n1', + 'info_dict': { + 'id': '5u5n1', + 'ext': 'mp4', + 'title': 'The Matrix Moonwalk', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 4.6, + 'timestamp': 1428527772, + 'upload_date': '20150408', + 'uploader': 'Артём Лоскутников', + 'uploader_id': 'artyom.loskutnikov', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + 'age_limit': 0, + }, + }, { + 'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4', + 'only_matching': True, + }, { + 'url': 'coub:5u5n1', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + coub = self._download_json( + 'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id) + + if coub.get('error'): + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, coub['error']), expected=True) + + title = coub['title'] + + file_versions = coub['file_versions'] + + QUALITIES = ('low', 'med', 'high') + + MOBILE = 'mobile' + IPHONE = 'iphone' + HTML5 = 'html5' + + SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5) + + quality_key = qualities(QUALITIES) + preference_key = qualities(SOURCE_PREFERENCE) + + formats = [] + + for kind, items in file_versions.get(HTML5, {}).items(): + if kind not in ('video', 'audio'): + continue + if not isinstance(items, dict): + continue + for quality, item in items.items(): + if not isinstance(item, dict): + continue + item_url = item.get('url') + if not item_url: + continue + formats.append({ + 'url': item_url, + 'format_id': '%s-%s-%s' % (HTML5, kind, quality), + 'filesize': int_or_none(item.get('size')), + 'vcodec': 'none' if kind == 'audio' else None, + 'quality': quality_key(quality), + 'preference': preference_key(HTML5), + }) + + iphone_url = file_versions.get(IPHONE, {}).get('url') + if iphone_url: + formats.append({ + 'url': iphone_url, + 'format_id': IPHONE, + 'preference': preference_key(IPHONE), + }) + + mobile_url = file_versions.get(MOBILE, {}).get('audio_url') + if mobile_url: + formats.append({ + 'url': mobile_url, + 'format_id': '%s-audio' % MOBILE, + 'preference': preference_key(MOBILE), + }) + + self._sort_formats(formats) + + thumbnail = coub.get('picture') + duration = float_or_none(coub.get('duration')) + timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at')) + uploader = coub.get('channel', {}).get('title') + uploader_id = coub.get('channel', {}).get('permalink') + + view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count')) + like_count = int_or_none(coub.get('likes_count')) + repost_count = int_or_none(coub.get('recoubs_count')) + comment_count = int_or_none(coub.get('comments_count')) + + age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin')) + if age_restricted is not None: + age_limit = 18 if age_restricted is True else 0 + else: + age_limit = None + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'like_count': like_count, + 'repost_count': repost_count, + 'comment_count': comment_count, + 'age_limit': age_limit, + 'formats': formats, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ddf62139e..dd4b2b838 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -143,6 +143,7 @@ from .cnn import ( CNNBlogsIE, CNNArticleIE, ) +from .coub import CoubIE from .collegerama import CollegeRamaIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comcarcoff import ComCarCoffIE From de7d76af52c6cb462dfab967d57f5fa7cd17df50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 27 May 2016 23:38:17 +0600 Subject: [PATCH 0636/3599] [coub] Add another test --- youtube_dl/extractor/coub.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/coub.py b/youtube_dl/extractor/coub.py index c3b09b177..a901b8d22 100644 --- a/youtube_dl/extractor/coub.py +++ b/youtube_dl/extractor/coub.py @@ -38,6 +38,10 @@ class CoubIE(InfoExtractor): }, { 'url': 'coub:5u5n1', 'only_matching': True, + }, { + # longer video id + 'url': 'http://coub.com/view/237d5l5h', + 'only_matching': True, }] def _real_extract(self, url): From 807cf7b07f6ac1299f5578ea1264b43fc30d8301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 28 May 2016 21:18:24 +0600 Subject: [PATCH 0637/3599] [udemy] Fix authentication for localized layout (Closes #9594) --- youtube_dl/extractor/udemy.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 13e0cd237..89b869559 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -142,7 +142,9 @@ class UdemyIE(InfoExtractor): self._LOGIN_URL, None, 'Downloading login popup') def is_logged(webpage): - return any(p in webpage for p in ['href="https://www.udemy.com/user/logout/', '>Logout<']) + return any(re.search(p, webpage) for p in ( + r'href=["\'](?:https://www\.udemy\.com)?/user/logout/', + r'>Logout<')) # already logged in if is_logged(login_popup): From 6461f2b7ec5fa0114d4bb38ca27c0f72edff8e23 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 May 2016 01:26:00 +0800 Subject: [PATCH 0638/3599] [bilibili] Fix extraction, improve and cleanup --- youtube_dl/extractor/bilibili.py | 129 ++++++++++++++++++++----------- 1 file changed, 86 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 8baff2041..71a54b4f4 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -1,34 +1,42 @@ # coding: utf-8 from __future__ import unicode_literals +import calendar +import datetime import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_etree_fromstring, + compat_str, + compat_parse_qs, + compat_xml_parse_error, +) from ..utils import ( - int_or_none, - unescapeHTML, ExtractorError, + int_or_none, + float_or_none, xpath_text, ) class BiliBiliIE(InfoExtractor): - _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?' + _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', - 'md5': '2c301e4dab317596e837c3e7633e7d86', + 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', 'info_dict': { 'id': '1554319', 'ext': 'flv', 'title': '【金坷垃】金泡沫', - 'duration': 308313, + 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', + 'duration': 308.067, + 'timestamp': 1398012660, 'upload_date': '20140420', 'thumbnail': 're:^https?://.+\.jpg', - 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', - 'timestamp': 1397983878, 'uploader': '菊子桑', + 'uploader_id': '156160', }, }, { 'url': 'http://www.bilibili.com/video/av1041170/', @@ -36,75 +44,110 @@ class BiliBiliIE(InfoExtractor): 'id': '1041170', 'title': '【BD1080P】刀语【诸神&异域】', 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', - 'uploader': '枫叶逝去', - 'timestamp': 1396501299, }, 'playlist_count': 9, }] + # BiliBili blocks keys from time to time. The current key is extracted from + # the Android client + # TODO: find the sign algorithm used in the flash player + _APP_KEY = '86385cdc024c0f6c' + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - page_num = mobj.group('page_num') or '1' - view_data = self._download_json( - 'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num), - video_id) - if 'error' in view_data: - raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True) + webpage = self._download_webpage(url, video_id) - cid = view_data['cid'] - title = unescapeHTML(view_data['title']) + params = compat_parse_qs(self._search_regex( + [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', + r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], + webpage, 'player parameters')) + cid = params['cid'][0] - doc = self._download_xml( - 'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid, - cid, - 'Downloading page %s/%s' % (page_num, view_data['pages']) - ) + info_xml_str = self._download_webpage( + 'http://interface.bilibili.com/v_cdn_play', + cid, query={'appkey': self._APP_KEY, 'cid': cid}, + note='Downloading video info page') - if xpath_text(doc, './result') == 'error': - raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True) + err_msg = None + durls = None + info_xml = None + try: + info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8')) + except compat_xml_parse_error: + info_json = self._parse_json(info_xml_str, video_id, fatal=False) + err_msg = (info_json or {}).get('error_text') + else: + err_msg = xpath_text(info_xml, './message') + + if info_xml is not None: + durls = info_xml.findall('./durl') + if not durls: + if err_msg: + raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True) + else: + raise ExtractorError('No videos found!') entries = [] - for durl in doc.findall('./durl'): + for durl in durls: size = xpath_text(durl, ['./filesize', './size']) formats = [{ 'url': durl.find('./url').text, 'filesize': int_or_none(size), - 'ext': 'flv', }] - backup_urls = durl.find('./backup_url') - if backup_urls is not None: - for backup_url in backup_urls.findall('./url'): - formats.append({'url': backup_url.text}) - formats.reverse() + for backup_url in durl.findall('./backup_url/url'): + formats.append({ + 'url': backup_url.text, + # backup URLs have lower priorities + 'preference': -2 if 'hd.mp4' in backup_url.text else -3, + }) + + self._sort_formats(formats) entries.append({ 'id': '%s_part%s' % (cid, xpath_text(durl, './order')), - 'title': title, 'duration': int_or_none(xpath_text(durl, './length'), 1000), 'formats': formats, }) + title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title') + description = self._html_search_meta('description', webpage) + datetime_str = self._html_search_regex( + r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False) + if datetime_str: + timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple()) + + # TODO 'view_count' requires deobfuscating Javascript info = { 'id': compat_str(cid), 'title': title, - 'description': view_data.get('description'), - 'thumbnail': view_data.get('pic'), - 'uploader': view_data.get('author'), - 'timestamp': int_or_none(view_data.get('created')), - 'view_count': int_or_none(view_data.get('play')), - 'duration': int_or_none(xpath_text(doc, './timelength')), + 'description': description, + 'timestamp': timestamp, + 'thumbnail': self._html_search_meta('thumbnailUrl', webpage), + 'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000), } + uploader_mobj = re.search( + r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"', + webpage) + if uploader_mobj: + info.update({ + 'uploader': uploader_mobj.group('name'), + 'uploader_id': uploader_mobj.group('id'), + }) + + for entry in entries: + entry.update(info) + if len(entries) == 1: - entries[0].update(info) return entries[0] else: - info.update({ + return { '_type': 'multi_video', 'id': video_id, + 'title': title, + 'description': description, 'entries': entries, - }) - return info + } From 92cf872a4870482e797bcd54316a4b8dc024fcc5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 May 2016 01:58:27 +0800 Subject: [PATCH 0639/3599] [.gitignore] Ignore mp3 files [ci skip] --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d5f216b5f..d13551274 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ updates_key.pem *.mp4 *.m4a *.m4v +*.mp3 *.part *.swp test/testdata From 2bee7b25f39471c1ac5641b714e003bcf8335d15 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 May 2016 01:59:09 +0800 Subject: [PATCH 0640/3599] [Makefile] Cleanup m4a files [ci skip] --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d760e4576..3861b23d5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From 2a329110b90102720bf5de00355473586674040f Mon Sep 17 00:00:00 2001 From: venth <artur.krysiak.warszawa@gmail.com> Date: Tue, 19 Apr 2016 11:17:46 +0200 Subject: [PATCH 0641/3599] ignored intellij related files --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d13551274..a802c75a1 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,9 @@ test/testdata test/local_parameters.json .tox youtube-dl.zsh + +# IntelliJ related files .idea -.idea/* +*.iml + tmp/ From f574103d7ca08a63e0dc58fdd7efde0871b9b395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 May 2016 09:03:17 +0600 Subject: [PATCH 0642/3599] [buildserver] Fix buildserver and make python2 compatible --- devscripts/buildserver.py | 62 +++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index 7c2f49f8b..dada6bfc7 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -1,17 +1,42 @@ #!/usr/bin/python3 -from http.server import HTTPServer, BaseHTTPRequestHandler -from socketserver import ThreadingMixIn import argparse import ctypes import functools +import shutil +import subprocess import sys +import tempfile import threading import traceback import os.path +sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) +from youtube_dl.compat import ( + compat_http_server, + compat_str, + compat_urlparse, +) -class BuildHTTPServer(ThreadingMixIn, HTTPServer): +# These are not used outside of buildserver.py thus not in compat.py + +try: + import winreg as compat_winreg +except ImportError: # Python 2 + import _winreg as compat_winreg + +try: + import socketserver as compat_socketserver +except ImportError: # Python 2 + import SocketServer as compat_socketserver + +try: + compat_input = raw_input +except NameError: # Python 3 + compat_input = input + + +class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer): allow_reuse_address = True @@ -216,7 +241,7 @@ def main(args=None): srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) thr = threading.Thread(target=srv.serve_forever) thr.start() - input('Press ENTER to shut down') + compat_input('Press ENTER to shut down') srv.shutdown() thr.join() @@ -231,8 +256,6 @@ def rmtree(path): os.remove(fname) os.rmdir(path) -#============================================================================== - class BuildError(Exception): def __init__(self, output, code=500): @@ -249,15 +272,16 @@ class HTTPError(BuildError): class PythonBuilder(object): def __init__(self, **kwargs): - pythonVersion = kwargs.pop('python', '2.7') + python_version = kwargs.pop('python', '3.4') try: - key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion) + key = compat_winreg.OpenKey( + compat_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % python_version) try: - self.pythonPath, _ = _winreg.QueryValueEx(key, '') + self.pythonPath, _ = compat_winreg.QueryValueEx(key, '') finally: - _winreg.CloseKey(key) + compat_winreg.CloseKey(key) except Exception: - raise BuildError('No such Python version: %s' % pythonVersion) + raise BuildError('No such Python version: %s' % python_version) super(PythonBuilder, self).__init__(**kwargs) @@ -305,8 +329,10 @@ class YoutubeDLBuilder(object): def build(self): try: - subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], - cwd=self.buildPath) + proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath) + proc.wait() + #subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], + # cwd=self.buildPath) except subprocess.CalledProcessError as e: raise BuildError(e.output) @@ -369,12 +395,12 @@ class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, Clea pass -class BuildHTTPRequestHandler(BaseHTTPRequestHandler): +class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler): actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching. def do_GET(self): - path = urlparse.urlparse(self.path) - paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()]) + path = compat_urlparse.urlparse(self.path) + paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()]) action, _, path = path.path.strip('/').partition('/') if path: path = path.split('/') @@ -388,7 +414,7 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler): builder.close() except BuildError as e: self.send_response(e.code) - msg = unicode(e).encode('UTF-8') + msg = compat_str(e).encode('UTF-8') self.send_header('Content-Type', 'text/plain; charset=UTF-8') self.send_header('Content-Length', len(msg)) self.end_headers() @@ -400,7 +426,5 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler): else: self.send_response(500, 'Malformed URL') -#============================================================================== - if __name__ == '__main__': main() From 44c88923696d383bb1a74d9890e7e3126b846625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 May 2016 09:06:10 +0600 Subject: [PATCH 0643/3599] [devscripts/prepare_manpage] Fix manpage generation on Windows --- devscripts/prepare_manpage.py | 61 +++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 776e6556e..e3f6339b5 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -1,13 +1,46 @@ from __future__ import unicode_literals import io +import optparse import os.path -import sys import re ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) README_FILE = os.path.join(ROOT_DIR, 'README.md') +PREFIX = '''%YOUTUBE-DL(1) + +# NAME + +youtube\-dl \- download videos from youtube.com or other video platforms + +# SYNOPSIS + +**youtube-dl** \[OPTIONS\] URL [URL...] + +''' + + +def main(): + parser = optparse.OptionParser(usage='%prog OUTFILE.md') + options, args = parser.parse_args() + if len(args) != 1: + parser.error('Expected an output filename') + + outfile, = args + + with io.open(README_FILE, encoding='utf-8') as f: + readme = f.read() + + readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) + readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) + readme = PREFIX + readme + + readme = filter_options(readme) + + with io.open(outfile, 'w', encoding='utf-8') as outf: + outf.write(readme) + def filter_options(readme): ret = '' @@ -37,27 +70,5 @@ def filter_options(readme): return ret -with io.open(README_FILE, encoding='utf-8') as f: - readme = f.read() - -PREFIX = '''%YOUTUBE-DL(1) - -# NAME - -youtube\-dl \- download videos from youtube.com or other video platforms - -# SYNOPSIS - -**youtube-dl** \[OPTIONS\] URL [URL...] - -''' -readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) -readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) -readme = PREFIX + readme - -readme = filter_options(readme) - -if sys.version_info < (3, 0): - print(readme.encode('utf-8')) -else: - print(readme) +if __name__ == '__main__': + main() From 27f17c0eabde55cbaab613280f60c01f5ee01025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 May 2016 09:11:16 +0600 Subject: [PATCH 0644/3599] [Makefile] Fix youtube-dl.1 target Now it accepts output filename as argument --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3861b23d5..6ee4ba4eb 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,7 @@ README.txt: README.md pandoc -f markdown -t plain README.md -o README.txt youtube-dl.1: README.md - $(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md + $(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1 rm -f youtube-dl.1.temp.md From 165e3561e9ec8f8a1a1037e4fdebe880cdbd92fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 May 2016 10:02:00 +0600 Subject: [PATCH 0645/3599] [devscripts/buildserver] Check Wow6432Node first when searching for python This allows building releases from 64bit OS --- devscripts/buildserver.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index dada6bfc7..2bd12da50 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -273,16 +273,25 @@ class HTTPError(BuildError): class PythonBuilder(object): def __init__(self, **kwargs): python_version = kwargs.pop('python', '3.4') - try: - key = compat_winreg.OpenKey( - compat_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % python_version) + python_path = None + for node in ('Wow6432Node\\', ''): try: - self.pythonPath, _ = compat_winreg.QueryValueEx(key, '') - finally: - compat_winreg.CloseKey(key) - except Exception: + key = compat_winreg.OpenKey( + compat_winreg.HKEY_LOCAL_MACHINE, + r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version)) + try: + python_path, _ = compat_winreg.QueryValueEx(key, '') + finally: + compat_winreg.CloseKey(key) + break + except Exception: + pass + + if not python_path: raise BuildError('No such Python version: %s' % python_version) + self.pythonPath = python_path + super(PythonBuilder, self).__init__(**kwargs) From f3fb420b827ce04dff101b64d81f8658fa2e5c73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 May 2016 11:49:14 +0600 Subject: [PATCH 0646/3599] [devscripts/release.sh] Check for wheel --- devscripts/release.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 7dd391b38..c34567f4c 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -6,7 +6,7 @@ # * the git config user.signingkey is properly set # You will need -# pip install coverage nose rsa +# pip install coverage nose rsa wheel # TODO # release notes @@ -35,6 +35,7 @@ if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $us if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi +if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi /bin/echo -e "\n### First of all, testing..." make clean From 9ed6d8c6c5b0c7a411d6b97d269a3e786875d66a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 May 2016 13:54:05 +0800 Subject: [PATCH 0647/3599] [youku] Extract resolution --- youtube_dl/extractor/youku.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 349ce0941..dbccbe228 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -275,6 +275,8 @@ class YoukuIE(InfoExtractor): 'format_id': self.get_format_name(fm), 'ext': self.parse_ext_l(fm), 'filesize': int(seg['size']), + 'width': stream.get('width'), + 'height': stream.get('height'), }) return { From 681b923b5ca04338dfacd4154f627255d6e27d3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 May 2016 23:36:42 +0700 Subject: [PATCH 0648/3599] [devscripts/release.sh] Allow passing buildserver address as cli option --- devscripts/release.sh | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index c34567f4c..cde4d0a39 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -15,10 +15,28 @@ set -e skip_tests=true -if [ "$1" = '--run-tests' ]; then - skip_tests=false - shift -fi +buildserver='localhost:8142' + +while true +do +case "$1" in + --run-tests) + skip_tests=false + shift + ;; + --buildserver) + buildserver="$2" + shift 2 + ;; + --*) + echo "ERROR: unknown option $1" + exit 1 + ;; + *) + break + ;; +esac +done if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi version="$1" @@ -67,7 +85,7 @@ git push origin "$version" REV=$(git rev-parse HEAD) make youtube-dl youtube-dl.tar.gz read -p "VM running? (y/n) " -n 1 -wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe +wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe mkdir -p "build/$version" mv youtube-dl youtube-dl.exe "build/$version" mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" From 56bd028a0f4b3809403d887012bb93bbc06296a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 May 2016 00:21:18 +0700 Subject: [PATCH 0649/3599] [devscripts/buildserver] Listen on all interfaces --- devscripts/buildserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index 2bd12da50..f7979c43e 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -216,7 +216,7 @@ def main(args=None): action='store_const', dest='action', const='service', help='Run as a Windows service') parser.add_argument('-b', '--bind', metavar='<host:port>', - action='store', default='localhost:8142', + action='store', default='0.0.0.0:8142', help='Bind to host:port (default %default)') options = parser.parse_args(args=args) From 917a3196f862ff785d15e595e03363c94e9d2e5b Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Mon, 30 May 2016 01:03:40 +0700 Subject: [PATCH 0650/3599] [README.md] Update c runtime dependency FAQ entry --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ef0e265c8..2b8a0fb85 100644 --- a/README.md +++ b/README.md @@ -784,9 +784,9 @@ means you're using an outdated version of Python. Please update to Python 2.6 or Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`. -### The exe throws a *Runtime error from Visual C++* +### The exe throws an error due to missing `MSVCR100.dll` -To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29). +To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555). ### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files? From 82674236520b55893f6767362a32f74a831362ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 May 2016 01:18:23 +0700 Subject: [PATCH 0651/3599] release 2016.05.30 --- .github/ISSUE_TEMPLATE.md | 6 +++--- README.md | 15 ++++++++------- docs/supportedsites.md | 16 +++++++++++----- youtube_dl/version.py | 2 +- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2d80d45b6..03dac8244 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.2** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.21.2 +[debug] youtube-dl version 2016.05.30 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/README.md b/README.md index 2b8a0fb85..7e18112de 100644 --- a/README.md +++ b/README.md @@ -73,8 +73,8 @@ which means you can modify it, redistribute it or use it however you like. repairs broken URLs, but emits an error if this is not possible instead of searching. --ignore-config Do not read configuration files. When given - in the global configuration file /etc - /youtube-dl.conf: Do not read the user + in the global configuration file + /etc/youtube-dl.conf: Do not read the user configuration in ~/.config/youtube- dl/config (%APPDATA%/youtube-dl/config.txt on Windows) @@ -256,11 +256,12 @@ which means you can modify it, redistribute it or use it however you like. jar in --cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information - permanently. By default $XDG_CACHE_HOME - /youtube-dl or ~/.cache/youtube-dl . At the - moment, only YouTube player files (for - videos with obfuscated signatures) are - cached, but that may change. + permanently. By default + $XDG_CACHE_HOME/youtube-dl or + ~/.cache/youtube-dl . At the moment, only + YouTube player files (for videos with + obfuscated signatures) are cached, but that + may change. --no-cache-dir Disable filesystem caching --rm-cache-dir Delete all filesystem cache files diff --git a/docs/supportedsites.md b/docs/supportedsites.md index cd6bfa51c..bbc647030 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -43,8 +43,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek**: Saarländischer Rundfunk - **ARD:mediathek** + - **ARD:mediathek**: Saarländischer Rundfunk - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -136,6 +136,7 @@ - **ComedyCentral** - **ComedyCentralShows**: The Daily Show / The Colbert Report - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED + - **Coub** - **Cracked** - **Crackle** - **Criterion** @@ -205,6 +206,7 @@ - **exfm**: ex.fm - **ExpoTV** - **ExtremeTube** + - **EyedoTV** - **facebook** - **faz.net** - **fc2** @@ -326,8 +328,8 @@ - **LePlaylist** - **LetvCloud**: 乐视云 - **Libsyn** + - **life**: Life.ru - **life:embed** - - **lifenews**: LIFE | NEWS - **limelight** - **limelight:channel** - **limelight:channel_list** @@ -512,6 +514,8 @@ - **R7** - **radio.de** - **radiobremen** + - **radiocanada** + - **RadioCanadaAudioVideo** - **radiofrance** - **RadioJavan** - **Rai** @@ -521,6 +525,7 @@ - **RedTube** - **RegioTV** - **Restudy** + - **Reuters** - **ReverbNation** - **Revision3** - **RICE** @@ -682,8 +687,8 @@ - **TVCArticle** - **tvigle**: Интернет-телевидение Tvigle.ru - **tvland.com** - - **tvp.pl** - - **tvp.pl:Series** + - **tvp**: Telewizja Polska + - **tvp:series** - **TVPlay**: TV3Play and related services - **Tweakers** - **twitch:chapter** @@ -766,7 +771,8 @@ - **VuClip** - **vulture.com** - **Walla** - - **WashingtonPost** + - **washingtonpost** + - **washingtonpost:article** - **wat.tv** - **WatchIndianPorn**: Watch Indian Porn - **WDR** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 522a56669..0c38ec4cf 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.21.2' +__version__ = '2016.05.30' From 86a52881c6211dd58503480dc62b4f2404cc0f6b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 29 May 2016 21:29:38 +0200 Subject: [PATCH 0652/3599] [travis] unsubscribe @phihag --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 998995845..136c339f0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,6 @@ script: nosetests test --verbose notifications: email: - filippo.valsorda@gmail.com - - phihag@phihag.de - yasoob.khld@gmail.com # irc: # channels: From f657b1a5f29e9f5eac7ca41b6e98c38cb3128183 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 May 2016 03:03:06 +0700 Subject: [PATCH 0653/3599] release 2016.05.30.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 03dac8244..1099b0c92 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.30 +[debug] youtube-dl version 2016.05.30.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0c38ec4cf..5f8542f8e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.30' +__version__ = '2016.05.30.1' From abbb2938fa08733e3a08f6d1917aa7687633b971 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 May 2016 03:12:12 +0700 Subject: [PATCH 0654/3599] release 2016.05.30.2 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1099b0c92..e3de48eb5 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30.2** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.30.1 +[debug] youtube-dl version 2016.05.30.2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5f8542f8e..ad6fb26c6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.30.1' +__version__ = '2016.05.30.2' From 197a5da1d01179f6a2d60e3c2017b0070e5abc8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 May 2016 03:26:26 +0700 Subject: [PATCH 0655/3599] [yandexmusic] Improve captcha detection --- youtube_dl/extractor/yandexmusic.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 0f78466e6..b37d0eab6 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -20,18 +20,24 @@ class YandexMusicBaseIE(InfoExtractor): error = response.get('error') if error: raise ExtractorError(error, expected=True) + if response.get('type') == 'captcha' or 'captcha' in response: + YandexMusicBaseIE._raise_captcha() + + @staticmethod + def _raise_captcha(): + raise ExtractorError( + 'YandexMusic has considered youtube-dl requests automated and ' + 'asks you to solve a CAPTCHA. You can either wait for some ' + 'time until unblocked and optionally use --sleep-interval ' + 'in future or alternatively you can go to https://music.yandex.ru/ ' + 'solve CAPTCHA, then export cookies and pass cookie file to ' + 'youtube-dl with --cookies', + expected=True) def _download_webpage(self, *args, **kwargs): webpage = super(YandexMusicBaseIE, self)._download_webpage(*args, **kwargs) if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: - raise ExtractorError( - 'YandexMusic has considered youtube-dl requests automated and ' - 'asks you to solve a CAPTCHA. You can either wait for some ' - 'time until unblocked and optionally use --sleep-interval ' - 'in future or alternatively you can go to https://music.yandex.ru/ ' - 'solve CAPTCHA, then export cookies and pass cookie file to ' - 'youtube-dl with --cookies', - expected=True) + self._raise_captcha() return webpage def _download_json(self, *args, **kwargs): From 8ec2b2c41c7f3952ad9097085993d1f24f6b6776 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 May 2016 21:48:35 +0700 Subject: [PATCH 0656/3599] [options] Add --limit-rate alias for rate limiting option Closes #9644 In order to follow regular --verb-noun pattern and better conformity with wget and curl --- youtube_dl/options.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 38efd292d..14051b714 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -395,8 +395,8 @@ def parseOpts(overrideArguments=None): downloader = optparse.OptionGroup(parser, 'Download Options') downloader.add_option( - '-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', + '-r', '--limit-rate', '--rate-limit', + dest='ratelimit', metavar='RATE', help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)') downloader.add_option( '-R', '--retries', From e7d85c4ef7d2c74058d41ded1e2a6d6aa527dc9a Mon Sep 17 00:00:00 2001 From: Peter Rowlands <peter@pmrowla.com> Date: Tue, 31 May 2016 17:28:49 +0900 Subject: [PATCH 0657/3599] use /track/video/file to determine if video exists --- youtube_dl/extractor/afreecatv.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 0fcbea0d1..518c61f67 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -11,6 +11,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + xpath_element, xpath_text, ) @@ -84,9 +85,10 @@ class AfreecaTVIE(InfoExtractor): path='/api/video/get_video_info.php')) video_xml = self._download_xml(info_url, video_id) - if xpath_text(video_xml, './track/flag', default='FAIL') != 'SUCCEED': + if xpath_element(video_xml, './track/video/file') is None: raise ExtractorError('Specified AfreecaTV video does not exist', expected=True) + title = xpath_text(video_xml, './track/title', 'title') uploader = xpath_text(video_xml, './track/nickname', 'uploader') uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id') From 877032314fdf2d9b391325f96e3bc53a60ea067c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Jun 2016 18:37:34 +0800 Subject: [PATCH 0658/3599] [generic] Improve Kaltura detection Closes #4004 --- youtube_dl/extractor/generic.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 303e112d2..5cb188b20 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -918,6 +918,19 @@ class GenericIE(InfoExtractor): 'uploader_id': 'echojecka', }, }, + # Kaltura embed with single quotes + { + 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY', + 'info_dict': { + 'id': '0_izeg5utt', + 'ext': 'mp4', + 'title': '35871', + 'timestamp': 1355743100, + 'upload_date': '20121217', + 'uploader_id': 'batchUser', + }, + 'add_ie': ['Kaltura'], + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', @@ -1903,7 +1916,7 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'Zapiks') # Look for Kaltura embeds - mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or + mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage)) if mobj is not None: return self.url_result(smuggle_url( From 28bab13348f84ac75e4d1362ce5828429bb7993f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Jun 2016 19:18:01 +0800 Subject: [PATCH 0659/3599] [generic,viewlift] Move a test case to the specialized extractor --- youtube_dl/extractor/generic.py | 12 ------------ youtube_dl/extractor/viewlift.py | 4 ++++ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5cb188b20..e478f86a8 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -881,18 +881,6 @@ class GenericIE(InfoExtractor): 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', } }, - # Kaltura embed - { - 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15', - 'info_dict': { - 'id': '1_eergr3h1', - 'ext': 'mp4', - 'upload_date': '20150226', - 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com', - 'timestamp': int, - 'title': 'John Carlson Postgame 2/25/15', - }, - }, # Kaltura embed (different embed code) { 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014', diff --git a/youtube_dl/extractor/viewlift.py b/youtube_dl/extractor/viewlift.py index dd4a13a4a..19500eba8 100644 --- a/youtube_dl/extractor/viewlift.py +++ b/youtube_dl/extractor/viewlift.py @@ -141,6 +141,10 @@ class ViewLiftIE(ViewLiftBaseIE): }, { 'url': 'http://www.kesari.tv/news/video/1461919076414', 'only_matching': True, + }, { + # Was once Kaltura embed + 'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15', + 'only_matching': True, }] def _real_extract(self, url): From 0ff3749bfe6d149dd7250ea8df83387d3af40e0f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Jun 2016 19:23:09 +0800 Subject: [PATCH 0660/3599] [udn] Fix m3u8 and f4m extraction as well as improve --- youtube_dl/extractor/udn.py | 62 ++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index ee35b7227..57dd73aef 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -2,10 +2,13 @@ from __future__ import unicode_literals import json +import re + from .common import InfoExtractor from ..utils import ( + determine_ext, + int_or_none, js_to_json, - ExtractorError, ) from ..compat import compat_urlparse @@ -16,13 +19,16 @@ class UDNEmbedIE(InfoExtractor): _VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL _TESTS = [{ 'url': 'http://video.udn.com/embed/news/300040', - 'md5': 'de06b4c90b042c128395a88f0384817e', 'info_dict': { 'id': '300040', 'ext': 'mp4', 'title': '生物老師男變女 全校挺"做自己"', 'thumbnail': 're:^https?://.*\.jpg$', - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'https://video.udn.com/embed/news/300040', 'only_matching': True, @@ -38,39 +44,53 @@ class UDNEmbedIE(InfoExtractor): page = self._download_webpage(url, video_id) options = json.loads(js_to_json(self._html_search_regex( - r'var options\s*=\s*([^;]+);', page, 'video urls dictionary'))) + r'var\s+options\s*=\s*([^;]+);', page, 'video urls dictionary'))) video_urls = options['video'] if video_urls.get('youtube'): return self.url_result(video_urls.get('youtube'), 'Youtube') - try: - del video_urls['youtube'] - except KeyError: - pass + formats = [] + for video_type, api_url in video_urls.items(): + if not api_url: + continue - formats = [{ - 'url': self._download_webpage( + video_url = self._download_webpage( compat_urlparse.urljoin(url, api_url), video_id, - 'retrieve url for %s video' % video_type), - 'format_id': video_type, - 'preference': 0 if video_type == 'mp4' else -1, - } for video_type, api_url in video_urls.items() if api_url] + note='retrieve url for %s video' % video_type) - if not formats: - raise ExtractorError('No videos found', expected=True) + ext = determine_ext(video_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, ext='mp4', m3u8_id='hls')) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id='hds')) + else: + mobj = re.search(r'_(?P<height>\d+)p_(?P<tbr>\d+).mp4', video_url) + a_format = { + 'url': video_url, + # video_type may be 'mp4', which confuses YoutubeDL + 'format_id': 'http-' + video_type, + } + if mobj: + a_format.update({ + 'height': int_or_none(mobj.group('height')), + 'tbr': int_or_none(mobj.group('tbr')), + }) + formats.append(a_format) self._sort_formats(formats) - thumbnail = None - - if options.get('gallery') and len(options['gallery']): - thumbnail = options['gallery'][0].get('original') + thumbnails = [{ + 'url': img_url, + 'id': img_type, + } for img_type, img_url in options.get('gallery', [{}])[0].items() if img_url] return { 'id': video_id, 'formats': formats, 'title': options['title'], - 'thumbnail': thumbnail + 'thumbnails': thumbnails, } From 811586ebcfb04878ad3347706bfee020d0e3652b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Jun 2016 19:23:44 +0800 Subject: [PATCH 0661/3599] [generic] Update the UDNEmbed test case --- youtube_dl/extractor/generic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e478f86a8..b4138381d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1033,14 +1033,18 @@ class GenericIE(InfoExtractor): }, # UDN embed { - 'url': 'http://www.udn.com/news/story/7314/822787', + 'url': 'https://video.udn.com/news/300346', 'md5': 'fd2060e988c326991037b9aff9df21a6', 'info_dict': { 'id': '300346', 'ext': 'mp4', 'title': '中一中男師變性 全校師生力挺', 'thumbnail': 're:^https?://.*\.jpg$', - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, # Ooyala embed { From dde1ce7c061cae123264eb555f1da98956923301 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 1 Jun 2016 20:04:43 +0800 Subject: [PATCH 0662/3599] [tf1] Fix a regular expression (closes #9656) This is a Python bug fixed in 2.7.6 [1] [1] https://github.com/rg3/youtube-dl/issues/9656#issuecomment-222968594 --- youtube_dl/extractor/tf1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index aff5121b9..6c848dc6f 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -48,6 +48,6 @@ class TF1IE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) wat_id = self._html_search_regex( - r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:.*?)?\1', + r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8}).*?\1', webpage, 'wat id', group='id') return self.url_result('wat:%s' % wat_id, 'Wat') From 6a1df4fb5fb76710457b59195e8b530ba269f09f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Jun 2016 21:23:58 +0700 Subject: [PATCH 0663/3599] [spankwire] Add support for new URL format (Closes #9657) --- youtube_dl/extractor/spankwire.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 692fd78e8..92a7120a3 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -96,20 +96,18 @@ class SpankwireIE(InfoExtractor): formats = [] for height, video_url in zip(heights, video_urls): path = compat_urllib_parse_urlparse(video_url).path - _, quality = path.split('/')[4].split('_')[:2] - f = { - 'url': video_url, - 'height': height, - } - tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None) - if tbr: - f.update({ - 'tbr': int(tbr), - 'format_id': '%dp' % height, - }) + m = re.search(r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', path) + if m: + tbr = int(m.group('tbr')) + height = int(m.group('height')) else: - f['format_id'] = quality - formats.append(f) + tbr = None + formats.append({ + 'url': video_url, + 'format_id': '%dp' % height, + 'height': height, + 'tbr': tbr, + }) self._sort_formats(formats) age_limit = self._rta_search(webpage) From 6e6b9f600f2f447604f6108fb6486b73cc25def1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Jun 2016 01:10:23 +0700 Subject: [PATCH 0664/3599] [arte] Add support for playlists and rework tests (Closes #9632) --- youtube_dl/extractor/arte.py | 173 ++++++++++++++++++----------- youtube_dl/extractor/extractors.py | 1 + 2 files changed, 110 insertions(+), 64 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index e37fdae13..f40532929 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -61,10 +61,7 @@ class ArteTvIE(InfoExtractor): } -class ArteTVPlus7IE(InfoExtractor): - IE_NAME = 'arte.tv:+7' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)' - +class ArteTVBaseIE(InfoExtractor): @classmethod def _extract_url_info(cls, url): mobj = re.match(cls._VALID_URL, url) @@ -78,60 +75,6 @@ class ArteTVPlus7IE(InfoExtractor): video_id = mobj.group('id') return video_id, lang - def _real_extract(self, url): - video_id, lang = self._extract_url_info(url) - webpage = self._download_webpage(url, video_id) - return self._extract_from_webpage(webpage, video_id, lang) - - def _extract_from_webpage(self, webpage, video_id, lang): - patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']') - ids = (video_id, '') - # some pages contain multiple videos (like - # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D), - # so we first try to look for json URLs that contain the video id from - # the 'vid' parameter. - patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates] - json_url = self._html_search_regex( - patterns, webpage, 'json vp url', default=None) - if not json_url: - def find_iframe_url(webpage, default=NO_DEFAULT): - return self._html_search_regex( - r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1', - webpage, 'iframe url', group='url', default=default) - - iframe_url = find_iframe_url(webpage, None) - if not iframe_url: - embed_url = self._html_search_regex( - r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None) - if embed_url: - player = self._download_json( - embed_url, video_id, 'Downloading player page') - iframe_url = find_iframe_url(player['html']) - # en and es URLs produce react-based pages with different layout (e.g. - # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world) - if not iframe_url: - program = self._search_regex( - r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n', - webpage, 'program', default=None) - if program: - embed_html = self._parse_json(program, video_id) - if embed_html: - iframe_url = find_iframe_url(embed_html['embed_html']) - if iframe_url: - json_url = compat_parse_qs( - compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] - if json_url: - title = self._search_regex( - r'<h3[^>]+title=(["\'])(?P<title>.+?)\1', - webpage, 'title', default=None, group='title') - return self._extract_from_json_url(json_url, video_id, lang, title=title) - # Different kind of embed URL (e.g. - # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) - embed_url = self._search_regex( - r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', - webpage, 'embed url', group='url') - return self.url_result(embed_url) - def _extract_from_json_url(self, json_url, video_id, lang, title=None): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] @@ -235,6 +178,74 @@ class ArteTVPlus7IE(InfoExtractor): return info_dict +class ArteTVPlus7IE(ArteTVBaseIE): + IE_NAME = 'arte.tv:+7' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)' + + _TESTS = [{ + 'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url) + + def _real_extract(self, url): + video_id, lang = self._extract_url_info(url) + webpage = self._download_webpage(url, video_id) + return self._extract_from_webpage(webpage, video_id, lang) + + def _extract_from_webpage(self, webpage, video_id, lang): + patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']') + ids = (video_id, '') + # some pages contain multiple videos (like + # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D), + # so we first try to look for json URLs that contain the video id from + # the 'vid' parameter. + patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates] + json_url = self._html_search_regex( + patterns, webpage, 'json vp url', default=None) + if not json_url: + def find_iframe_url(webpage, default=NO_DEFAULT): + return self._html_search_regex( + r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1', + webpage, 'iframe url', group='url', default=default) + + iframe_url = find_iframe_url(webpage, None) + if not iframe_url: + embed_url = self._html_search_regex( + r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None) + if embed_url: + player = self._download_json( + embed_url, video_id, 'Downloading player page') + iframe_url = find_iframe_url(player['html']) + # en and es URLs produce react-based pages with different layout (e.g. + # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world) + if not iframe_url: + program = self._search_regex( + r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n', + webpage, 'program', default=None) + if program: + embed_html = self._parse_json(program, video_id) + if embed_html: + iframe_url = find_iframe_url(embed_html['embed_html']) + if iframe_url: + json_url = compat_parse_qs( + compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] + if json_url: + title = self._search_regex( + r'<h3[^>]+title=(["\'])(?P<title>.+?)\1', + webpage, 'title', default=None, group='title') + return self._extract_from_json_url(json_url, video_id, lang, title=title) + # Different kind of embed URL (e.g. + # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) + embed_url = self._search_regex( + r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', + webpage, 'embed url', group='url') + return self.url_result(embed_url) + + # It also uses the arte_vp_url url from the webpage to extract the information class ArteTVCreativeIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:creative' @@ -267,7 +278,7 @@ class ArteTVInfoIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:info' _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere', 'info_dict': { 'id': '067528-000-A', @@ -275,7 +286,7 @@ class ArteTVInfoIE(ArteTVPlus7IE): 'title': 'Service civique, un cache misère ?', 'upload_date': '20160403', }, - } + }] class ArteTVFutureIE(ArteTVPlus7IE): @@ -300,6 +311,8 @@ class ArteTVDDCIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:ddc' _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)' + _TESTS = [] + def _real_extract(self, url): video_id, lang = self._extract_url_info(url) if lang == 'folge': @@ -318,7 +331,7 @@ class ArteTVConcertIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:concert' _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde', 'md5': '9ea035b7bd69696b67aa2ccaaa218161', 'info_dict': { @@ -328,14 +341,14 @@ class ArteTVConcertIE(ArteTVPlus7IE): 'upload_date': '20140128', 'description': 'md5:486eb08f991552ade77439fe6d82c305', }, - } + }] class ArteTVCinemaIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:cinema' _VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)' - _TEST = { + _TESTS = [{ 'url': 'http://cinema.arte.tv/de/node/38291', 'md5': '6b275511a5107c60bacbeeda368c3aa1', 'info_dict': { @@ -345,7 +358,7 @@ class ArteTVCinemaIE(ArteTVPlus7IE): 'upload_date': '20160122', 'description': 'md5:7f749bbb77d800ef2be11d54529b96bc', }, - } + }] class ArteTVMagazineIE(ArteTVPlus7IE): @@ -390,9 +403,41 @@ class ArteTVEmbedIE(ArteTVPlus7IE): ) ''' + _TESTS = [] + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') lang = mobj.group('lang') json_url = mobj.group('json_url') return self._extract_from_json_url(json_url, video_id, lang) + + +class ArteTVPlaylistIE(ArteTVBaseIE): + IE_NAME = 'arte.tv:playlist' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)' + + _TESTS = [{ + 'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV', + 'info_dict': { + 'id': 'PL-013263', + 'title': 'Areva & Uramin', + }, + 'playlist_mincount': 6, + }, { + 'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id, lang = self._extract_url_info(url) + collection = self._download_json( + 'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos' + % (lang, playlist_id), playlist_id) + title = collection.get('title') + description = collection.get('shortDescription') or collection.get('teaserText') + entries = [ + self._extract_from_json_url( + video['jsonUrl'], video.get('programId') or playlist_id, lang) + for video in collection['videos'] if video.get('jsonUrl')] + return self.playlist_result(entries, playlist_id, title, description) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dd4b2b838..dc21cfed9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -56,6 +56,7 @@ from .arte import ( ArteTVDDCIE, ArteTVMagazineIE, ArteTVEmbedIE, + ArteTVPlaylistIE, ) from .atresplayer import AtresPlayerIE from .atttechchannel import ATTTechChannelIE From 5e3856a2c5af0a622b74921c0d60acde53a664ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Jun 2016 01:19:57 +0700 Subject: [PATCH 0665/3599] release 2016.06.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- README.md | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e3de48eb5..ae98e0626 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30.2** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.30.2 +[debug] youtube-dl version 2016.06.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/README.md b/README.md index 7e18112de..253d51bc8 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,7 @@ which means you can modify it, redistribute it or use it however you like. (experimental) ## Download Options: - -r, --rate-limit LIMIT Maximum download rate in bytes per second + -r, --limit-rate RATE Maximum download rate in bytes per second (e.g. 50K or 4.2M) -R, --retries RETRIES Number of retries (default is 10), or "infinite". diff --git a/docs/supportedsites.md b/docs/supportedsites.md index bbc647030..dcbc632a1 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -55,6 +55,7 @@ - **arte.tv:future** - **arte.tv:info** - **arte.tv:magazine** + - **arte.tv:playlist** - **AtresPlayer** - **ATTTechChannel** - **AudiMedia** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ad6fb26c6..fba427dde 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.30.2' +__version__ = '2016.06.02' From f4e4aa9b6b7057af400ad404efcca51669012b73 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 1 Jun 2016 21:18:57 +0100 Subject: [PATCH 0666/3599] [revision3:embed] Add new extractor --- youtube_dl/extractor/revision3.py | 132 ++++++++++++++---------------- 1 file changed, 63 insertions(+), 69 deletions(-) diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py index 99979ebe1..833d8a2f0 100644 --- a/youtube_dl/extractor/revision3.py +++ b/youtube_dl/extractor/revision3.py @@ -13,8 +13,64 @@ from ..utils import ( ) +class Revision3EmbedIE(InfoExtractor): + IE_NAME = 'revision3:embed' + _VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)' + _TEST = { + 'url': 'http://api.seekernetwork.com/player/embed?videoId=67558', + 'md5': '83bcd157cab89ad7318dd7b8c9cf1306', + 'info_dict': { + 'id': '67558', + 'ext': 'mp4', + 'title': 'The Pros & Cons Of Zoos', + 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', + 'uploader_id': 'dnews', + 'uploader': 'DNews', + } + } + _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('playlist_id') + playlist_type = mobj.group('playlist_type') or 'video_id' + video_data = self._download_json( + 'http://revision3.com/api/getPlaylist.json', playlist_id, query={ + 'api_key': self._API_KEY, + 'codecs': 'h264,vp8,theora', + playlist_type: playlist_id, + })['items'][0] + + formats = [] + for vcodec, media in video_data['media'].items(): + for quality_id, quality in media.items(): + if quality_id == 'hls': + formats.extend(self._extract_m3u8_formats( + quality['url'], playlist_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': quality['url'], + 'format_id': '%s-%s' % (vcodec, quality_id), + 'tbr': int_or_none(quality.get('bitrate')), + 'vcodec': vcodec, + }) + self._sort_formats(formats) + + return { + 'id': playlist_id, + 'title': unescapeHTML(video_data['title']), + 'description': unescapeHTML(video_data.get('summary')), + 'uploader': video_data.get('show', {}).get('name'), + 'uploader_id': video_data.get('show', {}).get('slug'), + 'duration': int_or_none(video_data.get('duration')), + 'formats': formats, + } + + class Revision3IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|testtube|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)' + IE_NAME = 'revision' + _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)' _TESTS = [{ 'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016', 'md5': 'd94a72d85d0a829766de4deb8daaf7df', @@ -32,52 +88,14 @@ class Revision3IE(InfoExtractor): } }, { # Show - 'url': 'http://testtube.com/brainstuff', - 'info_dict': { - 'id': '251', - 'title': 'BrainStuff', - 'description': 'Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff.', - }, - 'playlist_mincount': 93, - }, { - 'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial', - 'info_dict': { - 'id': '58227', - 'display_id': 'dnews/5-weird-ways-plants-can-eat-animals', - 'duration': 275, - 'ext': 'webm', - 'title': '5 Weird Ways Plants Can Eat Animals', - 'description': 'Why have some plants evolved to eat meat?', - 'upload_date': '20150120', - 'timestamp': 1421763300, - 'uploader': 'DNews', - 'uploader_id': 'dnews', - }, - }, { - 'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min', - 'info_dict': { - 'id': '71618', - 'ext': 'mp4', - 'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min', - 'title': 'The Israel-Palestine Conflict Explained in Ten Minutes', - 'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start', - 'uploader': 'Editors\' Picks', - 'uploader_id': 'tt-editors-picks', - 'timestamp': 1453309200, - 'upload_date': '20160120', - }, - 'add_ie': ['Youtube'], + 'url': 'http://revision3.com/variant', + 'only_matching': True, }, { # Tag - 'url': 'http://testtube.com/tech-news', - 'info_dict': { - 'id': '21018', - 'title': 'tech news', - }, - 'playlist_mincount': 9, + 'url': 'http://revision3.com/vr', + 'only_matching': True, }] _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s' - _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62' def _real_extract(self, url): domain, display_id = re.match(self._VALID_URL, url).groups() @@ -119,33 +137,9 @@ class Revision3IE(InfoExtractor): }) return info - video_data = self._download_json( - 'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id), - video_id)['items'][0] - - formats = [] - for vcodec, media in video_data['media'].items(): - for quality_id, quality in media.items(): - if quality_id == 'hls': - formats.extend(self._extract_m3u8_formats( - quality['url'], video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'url': quality['url'], - 'format_id': '%s-%s' % (vcodec, quality_id), - 'tbr': int_or_none(quality.get('bitrate')), - 'vcodec': vcodec, - }) - self._sort_formats(formats) - info.update({ - 'title': unescapeHTML(video_data['title']), - 'description': unescapeHTML(video_data.get('summary')), - 'uploader': video_data.get('show', {}).get('name'), - 'uploader_id': video_data.get('show', {}).get('slug'), - 'duration': int_or_none(video_data.get('duration')), - 'formats': formats, + '_type': 'url_transparent', + 'url': 'revision3:%s' % video_id, }) return info else: From 4a684895c0227bf18896eae36e693d7046aacaf4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 1 Jun 2016 21:20:02 +0100 Subject: [PATCH 0667/3599] [seeker] Add new extractor(closes #9619) --- youtube_dl/extractor/extractors.py | 6 +++- youtube_dl/extractor/seeker.py | 57 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/seeker.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dc21cfed9..9dd55bd70 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -639,7 +639,10 @@ from .regiotv import RegioTVIE from .restudy import RestudyIE from .reuters import ReutersIE from .reverbnation import ReverbNationIE -from .revision3 import Revision3IE +from .revision3 import ( + Revision3EmbedIE, + Revision3IE, +) from .rice import RICEIE from .ringtv import RingTVIE from .ro220 import Ro220IE @@ -678,6 +681,7 @@ from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .screenjunkies import ScreenJunkiesIE from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE +from .seeker import SeekerIE from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE from .servingsys import ServingSysIE diff --git a/youtube_dl/extractor/seeker.py b/youtube_dl/extractor/seeker.py new file mode 100644 index 000000000..3b9c65e7e --- /dev/null +++ b/youtube_dl/extractor/seeker.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class SeekerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html' + _TESTS = [{ + # player.loadRevision3Item + 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html', + 'md5': '30c1dc4030cc715cf05b423d0947ac18', + 'info_dict': { + 'id': '76243', + 'ext': 'webm', + 'title': 'Should Trump Be Required To Release His Tax Returns?', + 'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?', + 'uploader': 'Seeker Daily', + 'uploader_id': 'seekerdaily', + } + }, { + 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html', + 'playlist': [ + { + 'md5': '83bcd157cab89ad7318dd7b8c9cf1306', + 'info_dict': { + 'id': '67558', + 'ext': 'mp4', + 'title': 'The Pros & Cons Of Zoos', + 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', + 'uploader': 'DNews', + 'uploader_id': 'dnews', + }, + } + ], + 'info_dict': { + 'id': '1834116536', + 'title': 'After Gorilla Killing, Changes Ahead for Zoos', + 'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.', + }, + }] + + def _real_extract(self, url): + display_id, article_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, display_id) + mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage) + if mobj: + playlist_type, playlist_id = mobj.groups() + return self.url_result( + 'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id) + else: + entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall( + r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)] + return self.playlist_result( + entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage)) From 0ea590076fd3b714b6a3345a28ac61d8be5e2afd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 17:22:40 +0800 Subject: [PATCH 0668/3599] [utils] Always decode Location header escape_url is broken for bytes-like objects --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index cfb2d1bf5..6ab1747b3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -861,6 +861,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 if sys.version_info >= (3, 0): location = location.encode('iso-8859-1').decode('utf-8') + else: + location = location.decode('utf-8') location_escaped = escape_url(location) if location != location_escaped: del resp.headers['Location'] From 8c32e5dc32f401ae22fa7087f3f3b1c9d6563835 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 26 May 2016 17:24:40 +0800 Subject: [PATCH 0669/3599] [test/test_utils] Add test for #9588 --- test/test_http.py | 55 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/test/test_http.py b/test/test_http.py index 15e0ad369..6b8493e5e 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -16,6 +16,15 @@ import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) +def http_server_port(httpd): + if os.name == 'java': + # In Jython SSLSocket is not a subclass of socket.socket + sock = httpd.socket.sock + else: + sock = httpd.socket + return sock.getsockname()[1] + + class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): def log_message(self, format, *args): pass @@ -31,6 +40,22 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): self.send_header('Content-Type', 'video/mp4') self.end_headers() self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]') + elif self.path == '/302': + if sys.version_info[0] == 3: + # XXX: Python 3 http server does not allow non-ASCII header values + self.send_response(404) + self.end_headers() + return + + new_url = 'http://localhost:%d/中文.html' % http_server_port(self.server) + self.send_response(302) + self.send_header(b'Location', new_url.encode('utf-8')) + self.end_headers() + elif self.path == '/%E4%B8%AD%E6%96%87.html': + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(b'<html><video src="/vid.mp4" /></html>') else: assert False @@ -47,18 +72,32 @@ class FakeLogger(object): class TestHTTP(unittest.TestCase): + def setUp(self): + self.httpd = compat_http_server.HTTPServer( + ('localhost', 0), HTTPTestRequestHandler) + self.port = http_server_port(self.httpd) + self.server_thread = threading.Thread(target=self.httpd.serve_forever) + self.server_thread.daemon = True + self.server_thread.start() + + def test_unicode_path_redirection(self): + # XXX: Python 3 http server does not allow non-ASCII header values + if sys.version_info[0] == 3: + return + + ydl = YoutubeDL({'logger': FakeLogger()}) + r = ydl.extract_info('http://localhost:%d/302' % self.port) + self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port) + + +class TestHTTPS(unittest.TestCase): def setUp(self): certfn = os.path.join(TEST_DIR, 'testcert.pem') self.httpd = compat_http_server.HTTPServer( ('localhost', 0), HTTPTestRequestHandler) self.httpd.socket = ssl.wrap_socket( self.httpd.socket, certfile=certfn, server_side=True) - if os.name == 'java': - # In Jython SSLSocket is not a subclass of socket.socket - sock = self.httpd.socket.sock - else: - sock = self.httpd.socket - self.port = sock.getsockname()[1] + self.port = http_server_port(self.httpd) self.server_thread = threading.Thread(target=self.httpd.serve_forever) self.server_thread.daemon = True self.server_thread.start() @@ -94,14 +133,14 @@ class TestProxy(unittest.TestCase): def setUp(self): self.proxy = compat_http_server.HTTPServer( ('localhost', 0), _build_proxy_handler('normal')) - self.port = self.proxy.socket.getsockname()[1] + self.port = http_server_port(self.proxy) self.proxy_thread = threading.Thread(target=self.proxy.serve_forever) self.proxy_thread.daemon = True self.proxy_thread.start() self.cn_proxy = compat_http_server.HTTPServer( ('localhost', 0), _build_proxy_handler('cn')) - self.cn_port = self.cn_proxy.socket.getsockname()[1] + self.cn_port = http_server_port(self.cn_proxy) self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever) self.cn_proxy_thread.daemon = True self.cn_proxy_thread.start() From 54fb1996812fa09f0f81ac28f42647e7706212b2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 May 2016 19:24:28 +0800 Subject: [PATCH 0670/3599] [test/test_http] Fix getsockname() on Jython --- test/test_http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_http.py b/test/test_http.py index 6b8493e5e..5076ced51 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -17,7 +17,7 @@ TEST_DIR = os.path.dirname(os.path.abspath(__file__)) def http_server_port(httpd): - if os.name == 'java': + if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): # In Jython SSLSocket is not a subclass of socket.socket sock = httpd.socket.sock else: From 9a4aec8b7ea2c0863bc03ba8f3d3e69a61e77c80 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 May 2016 19:25:25 +0800 Subject: [PATCH 0671/3599] [utils] Use bytes-like objects as header values on Python 2 --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6ab1747b3..26f21602c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -866,6 +866,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): location_escaped = escape_url(location) if location != location_escaped: del resp.headers['Location'] + if sys.version_info < (3, 0): + location_escaped = location_escaped.encode('utf-8') resp.headers['Location'] = location_escaped return resp From b96f007eeb432cdd118fb4cfa027dfa36b0ea0f2 Mon Sep 17 00:00:00 2001 From: bzc6p <bzc6p@users.noreply.github.com> Date: Thu, 2 Jun 2016 11:39:32 +0200 Subject: [PATCH 0672/3599] =?UTF-8?q?Added=20sanitization=20support=20for?= =?UTF-8?q?=20Hungarian=20letters=20=C5=90=20and=20=C5=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index a697232a8..feef80465 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -157,8 +157,8 @@ class TestUtil(unittest.TestCase): self.assertTrue(sanitize_filename(':', restricted=True) != '') self.assertEqual(sanitize_filename( - 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', restricted=True), - 'AAAAAAAECEEEEIIIIDNOOOOOOOEUUUUYPssaaaaaaaeceeeeiiiionoooooooeuuuuypy') + 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True), + 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy') def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') From c88270271e65be591f7e194b71728256644b8684 Mon Sep 17 00:00:00 2001 From: bzc6p <bzc6p@users.noreply.github.com> Date: Thu, 2 Jun 2016 11:51:48 +0200 Subject: [PATCH 0673/3599] =?UTF-8?q?Added=20sanitization=20support=20for?= =?UTF-8?q?=20Hungarian=20letters=20=C5=90=20and=20=C5=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- youtube_dl/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 26f21602c..89234b39d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -105,9 +105,9 @@ KNOWN_EXTENSIONS = ( 'f4f', 'f4m', 'm3u8', 'smil') # needed for sanitizing filenames in restricted mode -ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØŒÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøœùúûüýþÿ', - itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOO', ['OE'], 'UUUUYP', ['ss'], - 'aaaaaa', ['ae'], 'ceeeeiiiionoooooo', ['oe'], 'uuuuypy'))) +ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', + itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], + 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) def preferredencoding(): From ad73083ff05eafa64e07500fd42306ac349bd76b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 2 Jun 2016 19:27:57 +0800 Subject: [PATCH 0674/3599] [bilibili] Add _part%d suffixes back (closes #9660) --- youtube_dl/extractor/bilibili.py | 59 ++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 71a54b4f4..910e539e4 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -46,6 +46,62 @@ class BiliBiliIE(InfoExtractor): 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', }, 'playlist_count': 9, + }, { + 'url': 'http://www.bilibili.com/video/av4808130/', + 'info_dict': { + 'id': '4808130', + 'title': '【长篇】哆啦A梦443【钉铛】', + 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', + }, + 'playlist': [{ + 'md5': '55cdadedf3254caaa0d5d27cf20a8f9c', + 'info_dict': { + 'id': '4808130_part1', + 'ext': 'flv', + 'title': '【长篇】哆啦A梦443【钉铛】', + 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', + 'timestamp': 1464564180, + 'upload_date': '20160529', + 'uploader': '喜欢拉面', + 'uploader_id': '151066', + }, + }, { + 'md5': '926f9f67d0c482091872fbd8eca7ea3d', + 'info_dict': { + 'id': '4808130_part2', + 'ext': 'flv', + 'title': '【长篇】哆啦A梦443【钉铛】', + 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', + 'timestamp': 1464564180, + 'upload_date': '20160529', + 'uploader': '喜欢拉面', + 'uploader_id': '151066', + }, + }, { + 'md5': '4b7b225b968402d7c32348c646f1fd83', + 'info_dict': { + 'id': '4808130_part3', + 'ext': 'flv', + 'title': '【长篇】哆啦A梦443【钉铛】', + 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', + 'timestamp': 1464564180, + 'upload_date': '20160529', + 'uploader': '喜欢拉面', + 'uploader_id': '151066', + }, + }, { + 'md5': '7b795e214166501e9141139eea236e91', + 'info_dict': { + 'id': '4808130_part4', + 'ext': 'flv', + 'title': '【长篇】哆啦A梦443【钉铛】', + 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', + 'timestamp': 1464564180, + 'upload_date': '20160529', + 'uploader': '喜欢拉面', + 'uploader_id': '151066', + }, + }], }] # BiliBili blocks keys from time to time. The current key is extracted from @@ -144,6 +200,9 @@ class BiliBiliIE(InfoExtractor): if len(entries) == 1: return entries[0] else: + for idx, entry in enumerate(entries): + entry['id'] = '%s_part%d' % (video_id, (idx + 1)) + return { '_type': 'multi_video', 'id': video_id, From 9c3c447eb389726d98189d972a2d772ef729132d Mon Sep 17 00:00:00 2001 From: TRox1972 <archcr8@gmail.com> Date: Tue, 17 May 2016 16:21:52 +0200 Subject: [PATCH 0675/3599] [loc] Add extractor (Closes #3188) Added extractor of loc.gov, which closes #3188. I am not an experienced programmer, so I am sure I did a bunch of mistakes, but the extractor works (for me at least). [LibraryOfCongress] don't use video_id for _search_regex() [LibraryOfCongress] Improvements --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/libraryofcongress.py | 65 +++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 youtube_dl/extractor/libraryofcongress.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9dd55bd70..3b5143ace 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -382,6 +382,7 @@ from .leeco import ( LePlaylistIE, LetvCloudIE, ) +from .libraryofcongress import LibraryOfCongressIE from .libsyn import LibsynIE from .lifenews import ( LifeNewsIE, diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py new file mode 100644 index 000000000..0c34dbce3 --- /dev/null +++ b/youtube_dl/extractor/libraryofcongress.py @@ -0,0 +1,65 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import determine_ext + + +class LibraryOfCongressIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?loc\.gov/item/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://loc.gov/item/90716351/', + 'info_dict': { + 'id': '90716351', + 'ext': 'mp4', + 'title': 'Pa\'s trip to Mars /' + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'https://www.loc.gov/item/97516576/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + self.report_extraction(video_id) + json_id = self._search_regex('media-player-([0-9A-Z]{32})', webpage, 'json id') + + data = self._parse_json(self._download_webpage( + 'https://media.loc.gov/services/v1/media?id=%s' % json_id, + video_id), video_id) + data = data['mediaObject'] + + media_url = data['derivatives'][0]['derivativeUrl'] + media_url = media_url.replace('rtmp', 'https') + + is_video = data['mediaType'].lower() == 'v' + if not determine_ext(media_url) in ('mp4', 'mp3'): + media_url += '.mp4' if is_video else '.mp3' + + if media_url.index('vod/mp4:') > -1: + media_url = media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8' + elif url.index('vod/mp3:') > -1: + media_url = media_url.replace('vod/mp3:', '') + + formats = [] + if determine_ext(media_url) == 'm3u8': + formats = self._extract_m3u8_formats(media_url, video_id, ext='mp4') + elif determine_ext(media_url) is 'mp3': + formats.append({ + 'url': media_url, + 'ext': 'mp3', + }) + + return { + 'id': video_id, + 'thumbnail': self._og_search_thumbnail(webpage), + 'title': self._og_search_title(webpage), + 'formats': formats, + } From 7f3c3dfa52769d1f44c1f1031449118c564a92bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Jun 2016 23:19:11 +0700 Subject: [PATCH 0676/3599] [loc] Improve (Closes #9521) --- youtube_dl/extractor/libraryofcongress.py | 87 ++++++++++++++--------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index 0c34dbce3..d311f9946 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -3,63 +3,82 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import determine_ext +from ..utils import ( + determine_ext, + float_or_none, + int_or_none, +) class LibraryOfCongressIE(InfoExtractor): + IE_NAME = 'loc' + IE_DESC = 'Library of Congress' _VALID_URL = r'https?://(?:www\.)?loc\.gov/item/(?P<id>[0-9]+)' - _TESTS = [{ - 'url': 'http://loc.gov/item/90716351/', + _TEST = { + 'url': 'http://loc.gov/item/90716351/', + 'md5': '353917ff7f0255aa6d4b80a034833de8', 'info_dict': { 'id': '90716351', 'ext': 'mp4', - 'title': 'Pa\'s trip to Mars /' + 'title': "Pa's trip to Mars", + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 0, + 'view_count': int, }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - 'url': 'https://www.loc.gov/item/97516576/', - 'only_matching': True, - }] + } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - self.report_extraction(video_id) - json_id = self._search_regex('media-player-([0-9A-Z]{32})', webpage, 'json id') + media_id = self._search_regex( + (r'id=(["\'])media-player-(?P<id>.+?)\1', + r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1', + r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1'), + webpage, 'media id', group='id') - data = self._parse_json(self._download_webpage( - 'https://media.loc.gov/services/v1/media?id=%s' % json_id, - video_id), video_id) - data = data['mediaObject'] + data = self._parse_json( + self._download_webpage( + 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, + video_id), + video_id)['mediaObject'] - media_url = data['derivatives'][0]['derivativeUrl'] + derivative = data['derivatives'][0] + media_url = derivative['derivativeUrl'] + + # Following algorithm was extracted from setAVSource js function + # found in webpage media_url = media_url.replace('rtmp', 'https') - is_video = data['mediaType'].lower() == 'v' - if not determine_ext(media_url) in ('mp4', 'mp3'): + is_video = data.get('mediaType', 'v').lower() == 'v' + ext = determine_ext(media_url) + if ext not in ('mp4', 'mp3'): media_url += '.mp4' if is_video else '.mp3' - if media_url.index('vod/mp4:') > -1: - media_url = media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8' - elif url.index('vod/mp3:') > -1: - media_url = media_url.replace('vod/mp3:', '') + if 'vod/mp4:' in media_url: + formats = [{ + 'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8', + 'format_id': 'hls', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + }] + elif 'vod/mp3:' in media_url: + formats = [{ + 'url': media_url.replace('vod/mp3:', ''), + 'vcodec': 'none', + }] - formats = [] - if determine_ext(media_url) == 'm3u8': - formats = self._extract_m3u8_formats(media_url, video_id, ext='mp4') - elif determine_ext(media_url) is 'mp3': - formats.append({ - 'url': media_url, - 'ext': 'mp3', - }) + self._sort_formats(formats) + + title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(webpage) + duration = float_or_none(data.get('duration')) + view_count = int_or_none(data.get('viewCount')) return { 'id': video_id, + 'title': title, 'thumbnail': self._og_search_thumbnail(webpage), - 'title': self._og_search_title(webpage), + 'duration': duration, + 'view_count': view_count, 'formats': formats, } From bf4c6a38e1a98606b269d70ccc65c7ec5d47ec07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Jun 2016 23:25:24 +0700 Subject: [PATCH 0677/3599] release 2016.06.03 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 7 +++++-- youtube_dl/version.py | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ae98e0626..e593ee78a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.03** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.02 +[debug] youtube-dl version 2016.06.03 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index dcbc632a1..619bd0825 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -43,8 +43,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek** - **ARD:mediathek**: Saarländischer Rundfunk + - **ARD:mediathek** - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -339,6 +339,7 @@ - **livestream** - **livestream:original** - **LnkGo** + - **loc**: Library of Congress - **LocalNews8** - **LoveHomePorn** - **lrt.lt** @@ -528,7 +529,8 @@ - **Restudy** - **Reuters** - **ReverbNation** - - **Revision3** + - **revision** + - **revision3:embed** - **RICE** - **RingTV** - **RottenTomatoes** @@ -567,6 +569,7 @@ - **ScreencastOMatic** - **ScreenJunkies** - **ScreenwaveMedia** + - **Seeker** - **SenateISVP** - **SendtoNews** - **ServingSys** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fba427dde..d24d06f4a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.02' +__version__ = '2016.06.03' From 76e9cd7f24f6b175e4cce85082647403266ed233 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Jun 2016 23:43:34 +0700 Subject: [PATCH 0678/3599] [loc] Add support for another URL schema and simplify --- youtube_dl/extractor/libraryofcongress.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index d311f9946..a5f22b204 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -13,8 +13,8 @@ from ..utils import ( class LibraryOfCongressIE(InfoExtractor): IE_NAME = 'loc' IE_DESC = 'Library of Congress' - _VALID_URL = r'https?://(?:www\.)?loc\.gov/item/(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)' + _TESTS = [{ 'url': 'http://loc.gov/item/90716351/', 'md5': '353917ff7f0255aa6d4b80a034833de8', 'info_dict': { @@ -25,7 +25,10 @@ class LibraryOfCongressIE(InfoExtractor): 'duration': 0, 'view_count': int, }, - } + }, { + 'url': 'https://www.loc.gov/today/cyberlc/feature_wdesc.php?rec=5578', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -34,13 +37,12 @@ class LibraryOfCongressIE(InfoExtractor): media_id = self._search_regex( (r'id=(["\'])media-player-(?P<id>.+?)\1', r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1', - r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1'), + r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1', + r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1'), webpage, 'media id', group='id') - data = self._parse_json( - self._download_webpage( - 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, - video_id), + data = self._download_json( + 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, video_id)['mediaObject'] derivative = data['derivatives'][0] @@ -77,7 +79,7 @@ class LibraryOfCongressIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'duration': duration, 'view_count': view_count, 'formats': formats, From c917106be4d6d98ce7504d71a32b58ddca2bc03d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Jun 2016 23:55:22 +0700 Subject: [PATCH 0679/3599] [loc] Extract subtites --- youtube_dl/extractor/libraryofcongress.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index a5f22b204..49351759e 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -15,6 +15,7 @@ class LibraryOfCongressIE(InfoExtractor): IE_DESC = 'Library of Congress' _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)' _TESTS = [{ + # embedded via <div class="media-player" 'url': 'http://loc.gov/item/90716351/', 'md5': '353917ff7f0255aa6d4b80a034833de8', 'info_dict': { @@ -26,8 +27,19 @@ class LibraryOfCongressIE(InfoExtractor): 'view_count': int, }, }, { + # webcast embedded via mediaObjectId 'url': 'https://www.loc.gov/today/cyberlc/feature_wdesc.php?rec=5578', - 'only_matching': True, + 'info_dict': { + 'id': '5578', + 'ext': 'mp4', + 'title': 'Help! Preservation Training Needs Here, There & Everywhere', + 'duration': 3765, + 'view_count': int, + 'subtitles': 'mincount:1', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -76,6 +88,14 @@ class LibraryOfCongressIE(InfoExtractor): duration = float_or_none(data.get('duration')) view_count = int_or_none(data.get('viewCount')) + subtitles = {} + cc_url = data.get('ccUrl') + if cc_url: + subtitles.setdefault('en', []).append({ + 'url': cc_url, + 'ext': 'ttml', + }) + return { 'id': video_id, 'title': title, @@ -83,4 +103,5 @@ class LibraryOfCongressIE(InfoExtractor): 'duration': duration, 'view_count': view_count, 'formats': formats, + 'subtitles': subtitles, } From 4d8856d511aef11b5dbeb9f6523c2a117bdbb85d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Jun 2016 00:26:03 +0700 Subject: [PATCH 0680/3599] [loc] Extract direct download links --- youtube_dl/extractor/libraryofcongress.py | 38 ++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py index 49351759e..0a94366fd 100644 --- a/youtube_dl/extractor/libraryofcongress.py +++ b/youtube_dl/extractor/libraryofcongress.py @@ -1,12 +1,15 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( determine_ext, float_or_none, int_or_none, + parse_filesize, ) @@ -40,6 +43,20 @@ class LibraryOfCongressIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # with direct download links + 'url': 'https://www.loc.gov/item/78710669/', + 'info_dict': { + 'id': '78710669', + 'ext': 'mp4', + 'title': 'La vie et la passion de Jesus-Christ', + 'duration': 0, + 'view_count': int, + 'formats': 'mincount:4', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -60,6 +77,9 @@ class LibraryOfCongressIE(InfoExtractor): derivative = data['derivatives'][0] media_url = derivative['derivativeUrl'] + title = derivative.get('shortName') or data.get('shortName') or self._og_search_title( + webpage) + # Following algorithm was extracted from setAVSource js function # found in webpage media_url = media_url.replace('rtmp', 'https') @@ -75,6 +95,7 @@ class LibraryOfCongressIE(InfoExtractor): 'format_id': 'hls', 'ext': 'mp4', 'protocol': 'm3u8_native', + 'quality': 1, }] elif 'vod/mp3:' in media_url: formats = [{ @@ -82,9 +103,24 @@ class LibraryOfCongressIE(InfoExtractor): 'vcodec': 'none', }] + download_urls = set() + for m in re.finditer( + r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?: |\s+)\((?P<size>.+?)\))?\s*<', webpage): + format_id = m.group('id').lower() + if format_id == 'gif': + continue + download_url = m.group('url') + if download_url in download_urls: + continue + download_urls.add(download_url) + formats.append({ + 'url': download_url, + 'format_id': format_id, + 'filesize_approx': parse_filesize(m.group('size')), + }) + self._sort_formats(formats) - title = derivative.get('shortName') or data.get('shortName') or self._og_search_title(webpage) duration = float_or_none(data.get('duration')) view_count = int_or_none(data.get('viewCount')) From 762d44c9567af424b2731cb643429ddd8e76d704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Jun 2016 04:57:16 +0700 Subject: [PATCH 0681/3599] [channel9] Add support for rss links (Closes #9673) --- youtube_dl/extractor/channel9.py | 123 ++++++++++++++++++------------- 1 file changed, 70 insertions(+), 53 deletions(-) diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py index c74553dcf..34d4e6156 100644 --- a/youtube_dl/extractor/channel9.py +++ b/youtube_dl/extractor/channel9.py @@ -20,54 +20,64 @@ class Channel9IE(InfoExtractor): ''' IE_DESC = 'Channel 9' IE_NAME = 'channel9' - _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+)/?' + _VALID_URL = r'https?://(?:www\.)?channel9\.msdn\.com/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)' - _TESTS = [ - { - 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', - 'md5': 'bbd75296ba47916b754e73c3a4bbdf10', - 'info_dict': { - 'id': 'Events/TechEd/Australia/2013/KOS002', - 'ext': 'mp4', - 'title': 'Developer Kick-Off Session: Stuff We Love', - 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', - 'duration': 4576, - 'thumbnail': 're:http://.*\.jpg', - 'session_code': 'KOS002', - 'session_day': 'Day 1', - 'session_room': 'Arena 1A', - 'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', 'Mads Kristensen'], - }, + _TESTS = [{ + 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', + 'md5': 'bbd75296ba47916b754e73c3a4bbdf10', + 'info_dict': { + 'id': 'Events/TechEd/Australia/2013/KOS002', + 'ext': 'mp4', + 'title': 'Developer Kick-Off Session: Stuff We Love', + 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', + 'duration': 4576, + 'thumbnail': 're:http://.*\.jpg', + 'session_code': 'KOS002', + 'session_day': 'Day 1', + 'session_room': 'Arena 1A', + 'session_speakers': ['Ed Blankenship', 'Andrew Coates', 'Brady Gaster', 'Patrick Klug', + 'Mads Kristensen'], }, - { - 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', - 'md5': 'b43ee4529d111bc37ba7ee4f34813e68', - 'info_dict': { - 'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing', - 'ext': 'mp4', - 'title': 'Self-service BI with Power BI - nuclear testing', - 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', - 'duration': 1540, - 'thumbnail': 're:http://.*\.jpg', - 'authors': ['Mike Wilmot'], - }, + }, { + 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', + 'md5': 'b43ee4529d111bc37ba7ee4f34813e68', + 'info_dict': { + 'id': 'posts/Self-service-BI-with-Power-BI-nuclear-testing', + 'ext': 'mp4', + 'title': 'Self-service BI with Power BI - nuclear testing', + 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', + 'duration': 1540, + 'thumbnail': 're:http://.*\.jpg', + 'authors': ['Mike Wilmot'], }, - { - # low quality mp4 is best - 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', - 'info_dict': { - 'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', - 'ext': 'mp4', - 'title': 'Ranges for the Standard Library', - 'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d', - 'duration': 5646, - 'thumbnail': 're:http://.*\.jpg', - }, - 'params': { - 'skip_download': True, - }, - } - ] + }, { + # low quality mp4 is best + 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', + 'info_dict': { + 'id': 'Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', + 'ext': 'mp4', + 'title': 'Ranges for the Standard Library', + 'description': 'md5:2e6b4917677af3728c5f6d63784c4c5d', + 'duration': 5646, + 'thumbnail': 're:http://.*\.jpg', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS', + 'info_dict': { + 'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b', + 'title': 'Channel 9', + }, + 'playlist_count': 2, + }, { + 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', + 'only_matching': True, + }, { + 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman', + 'only_matching': True, + }] _RSS_URL = 'http://channel9.msdn.com/%s/RSS' @@ -254,22 +264,30 @@ class Channel9IE(InfoExtractor): return self.playlist_result(contents) - def _extract_list(self, content_path): - rss = self._download_xml(self._RSS_URL % content_path, content_path, 'Downloading RSS') + def _extract_list(self, video_id, rss_url=None): + if not rss_url: + rss_url = self._RSS_URL % video_id + rss = self._download_xml(rss_url, video_id, 'Downloading RSS') entries = [self.url_result(session_url.text, 'Channel9') for session_url in rss.findall('./channel/item/link')] title_text = rss.find('./channel/title').text - return self.playlist_result(entries, content_path, title_text) + return self.playlist_result(entries, video_id, title_text) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) content_path = mobj.group('contentpath') + rss = mobj.group('rss') - webpage = self._download_webpage(url, content_path, 'Downloading web page') + if rss: + return self._extract_list(content_path, url) - page_type_m = re.search(r'<meta name="WT.entryid" content="(?P<pagetype>[^:]+)[^"]+"/>', webpage) - if page_type_m is not None: - page_type = page_type_m.group('pagetype') + webpage = self._download_webpage( + url, content_path, 'Downloading web page') + + page_type = self._search_regex( + r'<meta[^>]+name=(["\'])WT\.entryid\1[^>]+content=(["\'])(?P<pagetype>[^:]+).+?\2', + webpage, 'page type', default=None, group='pagetype') + if page_type: if page_type == 'Entry': # Any 'item'-like page, may contain downloadable content return self._extract_entry_item(webpage, content_path) elif page_type == 'Session': # Event session page, may contain downloadable content @@ -278,6 +296,5 @@ class Channel9IE(InfoExtractor): return self._extract_list(content_path) else: raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True) - else: # Assuming list return self._extract_list(content_path) From cad88f96dc8eaa845a458f0b80e92c1ba36c5491 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 4 Jun 2016 11:42:52 +0200 Subject: [PATCH 0682/3599] disable uploading to yt-dl.org for now --- devscripts/release.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index cde4d0a39..1a7b1e054 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -97,8 +97,10 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" /bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done -scp -r "build/$version" ytdl@yt-dl.org:html/tmp/ -ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/" + +echo 'TODO: upload on GitHub' +exit 1 + ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" /bin/echo -e "\n### Now switching to gh-pages..." From 7def35712a7047578643f18eaf6dda79fd8c9291 Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Sat, 21 May 2016 17:48:17 +0200 Subject: [PATCH 0683/3599] [vidio] Add extractor (Closes #7195) [Vidio] fix fallback value and wrap duration in int_or_none [Vidio] don't use video_id for _html_search_regex() --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vidio.py | 48 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/vidio.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3b5143ace..ed4e39574 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -910,6 +910,7 @@ from .videomore import ( ) from .videopremium import VideoPremiumIE from .videott import VideoTtIE +from .vidio import VidioIE from .vidme import ( VidmeIE, VidmeUserIE, diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py new file mode 100644 index 000000000..d17c663fd --- /dev/null +++ b/youtube_dl/extractor/vidio.py @@ -0,0 +1,48 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor + +from ..utils import int_or_none + + +class VidioIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d{6})-(?P<display_id>[^/?]+)' + _TEST = { + 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015', + 'info_dict': { + 'id': '165683', + 'title': 'DJ_AMBRED - Booyah (Live 2015)', + 'ext': 'mp4', + 'thumbnail': 'https://cdn0-a.production.vidio.static6.com/uploads/video/image/165683/dj_ambred-booyah-live-2015-bfb2ba.jpg', + 'description': 'md5:27dc15f819b6a78a626490881adbadf8', + 'duration': 149, + }, + 'params': { + # m3u8 download + 'skip_download': True + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id, display_id = mobj.group('id', 'display_id') + + webpage = self._download_webpage(url, display_id) + + video_data = self._parse_json(self._html_search_regex( + r'data-json-clips\s*=\s*"\[(.+)\]"', webpage, 'video data'), display_id) + + formats = self._extract_m3u8_formats( + video_data['sources'][0]['file'], + display_id, ext='mp4') + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'formats': formats, + 'thumbnail': video_data.get('image'), + 'description': self._og_search_description(webpage), + 'duration': int_or_none(video_data.get('clip_duration')), + } From 0fc832e1b2c8f48298e135d42818a16bfba4d3ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Jun 2016 16:47:43 +0700 Subject: [PATCH 0684/3599] [vidio] Improve (Closes #9562) --- youtube_dl/extractor/vidio.py | 65 ++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/vidio.py b/youtube_dl/extractor/vidio.py index d17c663fd..6898042de 100644 --- a/youtube_dl/extractor/vidio.py +++ b/youtube_dl/extractor/vidio.py @@ -2,28 +2,30 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .common import InfoExtractor from ..utils import int_or_none class VidioIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d{6})-(?P<display_id>[^/?]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?vidio\.com/watch/(?P<id>\d+)-(?P<display_id>[^/?#&]+)' + _TESTS = [{ 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015', + 'md5': 'cd2801394afc164e9775db6a140b91fe', 'info_dict': { 'id': '165683', - 'title': 'DJ_AMBRED - Booyah (Live 2015)', + 'display_id': 'dj_ambred-booyah-live-2015', 'ext': 'mp4', - 'thumbnail': 'https://cdn0-a.production.vidio.static6.com/uploads/video/image/165683/dj_ambred-booyah-live-2015-bfb2ba.jpg', + 'title': 'DJ_AMBRED - Booyah (Live 2015)', 'description': 'md5:27dc15f819b6a78a626490881adbadf8', - 'duration': 149, + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 149, + 'like_count': int, }, - 'params': { - # m3u8 download - 'skip_download': True - } - } + }, { + 'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -31,18 +33,41 @@ class VidioIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - video_data = self._parse_json(self._html_search_regex( - r'data-json-clips\s*=\s*"\[(.+)\]"', webpage, 'video data'), display_id) + title = self._og_search_title(webpage) - formats = self._extract_m3u8_formats( - video_data['sources'][0]['file'], - display_id, ext='mp4') + m3u8_url, duration, thumbnail = [None] * 3 + + clips = self._parse_json( + self._html_search_regex( + r'data-json-clips\s*=\s*(["\'])(?P<data>\[.+?\])\1', + webpage, 'video data', default='[]', group='data'), + display_id, fatal=False) + if clips: + clip = clips[0] + m3u8_url = clip.get('sources', [{}])[0].get('file') + duration = clip.get('clip_duration') + thumbnail = clip.get('image') + + m3u8_url = m3u8_url or self._search_regex( + r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>.+?)\1', webpage, 'hls url') + formats = self._extract_m3u8_formats(m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native') + + duration = int_or_none(duration or self._search_regex( + r'data-video-duration=(["\'])(?P<duartion>\d+)\1', webpage, 'duration')) + thumbnail = thumbnail or self._og_search_thumbnail(webpage) + + like_count = int_or_none(self._search_regex( + (r'<span[^>]+data-comment-vote-count=["\'](\d+)', + r'<span[^>]+class=["\'].*?\blike(?:__|-)count\b.*?["\'][^>]*>\s*(\d+)'), + webpage, 'like count', fatal=False)) return { 'id': video_id, - 'title': self._og_search_title(webpage), - 'formats': formats, - 'thumbnail': video_data.get('image'), + 'display_id': display_id, + 'title': title, 'description': self._og_search_description(webpage), - 'duration': int_or_none(video_data.get('clip_duration')), + 'thumbnail': thumbnail, + 'duration': duration, + 'like_count': like_count, + 'formats': formats, } From 1ae6c83bceb6dbc7093fe35ddafcde08dd0151a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 00:43:55 +0700 Subject: [PATCH 0685/3599] [compat] Add compat_input --- youtube_dl/compat.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 06e5f3ff6..fabac9fd2 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -482,6 +482,11 @@ if sys.version_info < (3, 0) and sys.platform == 'win32': else: compat_getpass = getpass.getpass +try: + compat_input = raw_input +except NameError: # Python 3 + compat_input = input + # Python < 2.6.5 require kwargs to be bytes try: def _testfunc(x): From e92b552a102f509066a605b26d6df38eb73764b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 00:44:51 +0700 Subject: [PATCH 0686/3599] [devscripts/buildserver] Use compat_input from compat --- devscripts/buildserver.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py index f7979c43e..fc99c3213 100644 --- a/devscripts/buildserver.py +++ b/devscripts/buildserver.py @@ -13,6 +13,7 @@ import os.path sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) from youtube_dl.compat import ( + compat_input, compat_http_server, compat_str, compat_urlparse, @@ -30,11 +31,6 @@ try: except ImportError: # Python 2 import SocketServer as compat_socketserver -try: - compat_input = raw_input -except NameError: # Python 3 - compat_input = input - class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer): allow_reuse_address = True From db56f281d9c5d57cb2c44a2ea356a9a0a12b3b4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 00:47:26 +0700 Subject: [PATCH 0687/3599] [devscripts/create-github-release] Add script for releasing on GitHub Yet only Basic authentication is supported either via .netrc or by manual input --- devscripts/create-github-release.py | 112 ++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 devscripts/create-github-release.py diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py new file mode 100644 index 000000000..f74d39490 --- /dev/null +++ b/devscripts/create-github-release.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import base64 +import json +import mimetypes +import netrc +import optparse +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.compat import ( + compat_basestring, + compat_input, + compat_getpass, + compat_print, + compat_urllib_request, +) +from youtube_dl.utils import ( + make_HTTPS_handler, + sanitized_Request, +) + + +class GitHubReleaser(object): + _API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases' + _UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s' + _NETRC_MACHINE = 'github.com' + + def __init__(self, debuglevel=0): + self._init_github_account() + https_handler = make_HTTPS_handler({}, debuglevel=debuglevel) + self._opener = compat_urllib_request.build_opener(https_handler) + + def _init_github_account(self): + try: + info = netrc.netrc().authenticators(self._NETRC_MACHINE) + if info is not None: + self._username = info[0] + self._password = info[2] + compat_print('Using GitHub credentials found in .netrc...') + return + else: + compat_print('No GitHub credentials found in .netrc') + except (IOError, netrc.NetrcParseError): + compat_print('Unable to parse .netrc') + self._username = compat_input( + 'Type your GitHub username or email address and press [Return]: ') + self._password = compat_getpass( + 'Type your GitHub password and press [Return]: ') + + def _call(self, req): + if isinstance(req, compat_basestring): + req = sanitized_Request(req) + # Authorizing manually since GitHub does not response with 401 with + # WWW-Authenticate header set (see + # https://developer.github.com/v3/#basic-authentication) + b64 = base64.b64encode( + ('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii') + req.add_header('Authorization', 'Basic %s' % b64) + response = self._opener.open(req).read().decode('utf-8') + return json.loads(response) + + def list_releases(self): + return self._call(self._API_URL) + + def create_release(self, tag_name, name=None, body='', draft=False, prerelease=False): + data = { + 'tag_name': tag_name, + 'target_commitish': 'master', + 'name': name, + 'body': body, + 'draft': draft, + 'prerelease': prerelease, + } + req = sanitized_Request(self._API_URL, json.dumps(data).encode('utf-8')) + return self._call(req) + + def create_asset(self, release_id, asset): + asset_name = os.path.basename(asset) + url = self._UPLOADS_URL % (release_id, asset_name) + # Our files are small enough to be loaded directly into memory. + data = open(asset, 'rb').read() + req = sanitized_Request(url, data) + mime_type, _ = mimetypes.guess_type(asset_name) + req.add_header('Content-Type', mime_type or 'application/octet-stream') + return self._call(req) + + +def main(): + parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH') + options, args = parser.parse_args() + if len(args) != 2: + parser.error('Expected a version and a build directory') + + version, build_path = args + + releaser = GitHubReleaser(debuglevel=0) + + new_release = releaser.create_release( + version, name='youtube-dl %s' % version, draft=True, prerelease=True) + release_id = new_release['id'] + + for asset in os.listdir(build_path): + compat_print('Uploading %s...' % asset) + releaser.create_asset(release_id, os.path.join(build_path, asset)) + + +if __name__ == '__main__': + main() From 39b32571df802ef869db1067454aa654f3f66235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 00:48:33 +0700 Subject: [PATCH 0688/3599] [devscripts/release.sh] Release to GitHub --- devscripts/release.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 1a7b1e054..87e8eda50 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -95,17 +95,16 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" (cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS) (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) -/bin/echo -e "\n### Signing and uploading the new binaries to yt-dl.org ..." +/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..." for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done -echo 'TODO: upload on GitHub' -exit 1 +ROOT=$(pwd) +python devscripts/create-github-release.py $version "$ROOT/build/$version" ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" /bin/echo -e "\n### Now switching to gh-pages..." git clone --branch gh-pages --single-branch . build/gh-pages -ROOT=$(pwd) ( set -e ORIGIN_URL=$(git config --get remote.origin.url) From 2c347352677f023678ffd488a51b19f54b97fa36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 01:44:13 +0700 Subject: [PATCH 0689/3599] [youtube] Add itags 256 and 258 --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f3f102c30..6c9f77d95 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -344,6 +344,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'}, '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'}, '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'}, + '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, + '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, # Dash webm '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, From 1e236d7e2350e055bbe230b12490e4369aaa0956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 03:16:05 +0700 Subject: [PATCH 0690/3599] [downloader/hls] Do not rely on EXT-X-PLAYLIST-TYPE:EVENT --- youtube_dl/downloader/hls.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 62136ee54..049fb78ce 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -23,11 +23,17 @@ class HlsFD(FragmentFD): UNSUPPORTED_FEATURES = ( r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1] r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] + # Live streams heuristic does not always work (e.g. geo restricted to Germany # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] - r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of - # event media playlists [4] + + # This heuristic also is not correct since segments may not be appended as well. + # Twitch vods have EXT-X-PLAYLIST-TYPE:EVENT despite no segments will definitely + # be appended to the end of the playlist. + # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of + # event media playlists [4] + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 From 631d4c87ee84183917fcdf5db59e1cd1bb48d9a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 03:19:44 +0700 Subject: [PATCH 0691/3599] [twitch:vod] Use native hls --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index f7b98e190..d898f14c3 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -260,7 +260,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'nauth': access_token['token'], 'nauthsig': access_token['sig'], })), - item_id, 'mp4') + item_id, 'mp4', entry_protocol='m3u8_native') self._prefer_source(formats) info['formats'] = formats From 51c4d85ce788497584bd056d571ed9b7b24c9651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 03:21:43 +0700 Subject: [PATCH 0692/3599] [downloader/hls] PEP 8 --- youtube_dl/downloader/hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 049fb78ce..8e4a7189a 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -32,7 +32,7 @@ class HlsFD(FragmentFD): # Twitch vods have EXT-X-PLAYLIST-TYPE:EVENT despite no segments will definitely # be appended to the end of the playlist. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of - # event media playlists [4] + # # event media playlists [4] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 From 633b444fd29aa9d8b3ba722285ae2475ae66595f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 03:31:10 +0700 Subject: [PATCH 0693/3599] [downloader/hls] Correct comment on twitch vods --- youtube_dl/downloader/hls.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8e4a7189a..54f2108e9 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -29,8 +29,8 @@ class HlsFD(FragmentFD): # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] # This heuristic also is not correct since segments may not be appended as well. - # Twitch vods have EXT-X-PLAYLIST-TYPE:EVENT despite no segments will definitely - # be appended to the end of the playlist. + # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite + # no segments will definitely be appended to the end of the playlist. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of # # event media playlists [4] From 71b9cb3107e156c7f17ec4cdf1d09421cb4dd4b1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 4 Jun 2016 22:55:15 +0200 Subject: [PATCH 0694/3599] extend FAQ (#9696) --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 253d51bc8..91078eee8 100644 --- a/README.md +++ b/README.md @@ -842,6 +842,12 @@ It is *not* possible to detect whether a URL is supported or not. That's because If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program. +# Why do I need to go through that much red tape when filing bugs? + +Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was alrady reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl. + +youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current. + # DEVELOPER INSTRUCTIONS Most users do not need to build youtube-dl and can [download the builds](http://rg3.github.io/youtube-dl/download.html) or get them from their distribution. From bc270284b56b8ce7623b22b5c5cbf8d0d390c09e Mon Sep 17 00:00:00 2001 From: Ryan Schmidt <github@ryandesign.com> Date: Sat, 4 Jun 2016 21:30:22 -0500 Subject: [PATCH 0695/3599] Update README.md to mention MacPorts --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 91078eee8..f60e7ce33 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,14 @@ If you do not have curl, you can alternatively use a recent wget: Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). -OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/). +OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/): brew install youtube-dl +Or with [MacPorts](https://www.macports.org/): + + sudo port install youtube-dl + You can also use pip: sudo pip install youtube-dl From 8f1aaa97a1e3eb60749f8046f2f0b1a0749d007c Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 5 Jun 2016 11:19:44 +0700 Subject: [PATCH 0696/3599] [README.md] Update pypi instructions --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f60e7ce33..e7240f41a 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,13 @@ If you do not have curl, you can alternatively use a recent wget: Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). -OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/): +You can also use pip: + + sudo pip install --upgrade youtube-dl + +This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information. + +OS X users can install youtube-dl with [Homebrew](http://brew.sh/): brew install youtube-dl @@ -35,10 +41,6 @@ Or with [MacPorts](https://www.macports.org/): sudo port install youtube-dl -You can also use pip: - - sudo pip install youtube-dl - Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html). # DESCRIPTION From 47f12ad3e39ebe714abec7e7588e8e411e2841b5 Mon Sep 17 00:00:00 2001 From: Tobias Salzmann <eun@su.am> Date: Sun, 5 Jun 2016 11:04:55 +0200 Subject: [PATCH 0697/3599] curl: follow redirect --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e7240f41a..205c485d0 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms To install it right away for all UNIX users (Linux, OS X, etc.), type: - sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl + sudo curl -L https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl If you do not have curl, you can alternatively use a recent wget: From 7b0d1c28597bd38567e5b4e853f669a5a601c6e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Jun 2016 21:01:20 +0700 Subject: [PATCH 0698/3599] [__init__] Use write_string instead of compat_string (Closes #9689) --- youtube_dl/__init__.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5df965191..4905674ad 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -18,7 +18,6 @@ from .options import ( from .compat import ( compat_expanduser, compat_getpass, - compat_print, compat_shlex_split, workaround_optparse_bug9161, ) @@ -76,7 +75,7 @@ def _real_main(argv=None): # Dump user agent if opts.dump_user_agent: - compat_print(std_headers['User-Agent']) + write_string(std_headers['User-Agent'] + '\n', out=sys.stdout) sys.exit(0) # Batch file verification @@ -101,10 +100,10 @@ def _real_main(argv=None): if opts.list_extractors: for ie in list_extractors(opts.age_limit): - compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '')) + write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout) matchedUrls = [url for url in all_urls if ie.suitable(url)] for mu in matchedUrls: - compat_print(' ' + mu) + write_string(' ' + mu + '\n', out=sys.stdout) sys.exit(0) if opts.list_extractor_descriptions: for ie in list_extractors(opts.age_limit): @@ -117,7 +116,7 @@ def _real_main(argv=None): _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') _COUNTS = ('', '5', '10', 'all') desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) - compat_print(desc) + write_string(desc + '\n', out=sys.stdout) sys.exit(0) # Conflicting, missing and erroneous options From 244fe977fec880f1bce55683437a711e12075b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 6 Jun 2016 02:52:58 +0700 Subject: [PATCH 0699/3599] [options] Add --load-info-json alias for symmetry with --write-info-json --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 14051b714..99ce4131f 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -668,7 +668,7 @@ def parseOpts(overrideArguments=None): action='store_true', dest='writeannotations', default=False, help='Write video annotations to a .annotations.xml file') filesystem.add_option( - '--load-info', + '--load-info-json', '--load-info', dest='load_info_filename', metavar='FILE', help='JSON file containing the video information (created with the "--write-info-json" option)') filesystem.add_option( From db59b37d0bb2bbb4894f28b6b65d1d7f5496444d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 6 Jun 2016 03:02:11 +0700 Subject: [PATCH 0700/3599] [devscripts/create-github-release] Make full published releases by default --- devscripts/create-github-release.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index f74d39490..3b8021e74 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -97,10 +97,9 @@ def main(): version, build_path = args - releaser = GitHubReleaser(debuglevel=0) + releaser = GitHubReleaser() - new_release = releaser.create_release( - version, name='youtube-dl %s' % version, draft=True, prerelease=True) + new_release = releaser.create_release(version, name='youtube-dl %s' % version) release_id = new_release['id'] for asset in os.listdir(build_path): From e67f6880257068c395d38e24a5e13f69902e1e4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 5 Jun 2016 23:16:08 +0200 Subject: [PATCH 0701/3599] [compat] Add 'compat_input' to __all__ --- youtube_dl/compat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index fabac9fd2..e3cab4dd0 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -628,6 +628,7 @@ __all__ = [ 'compat_html_entities', 'compat_http_client', 'compat_http_server', + 'compat_input', 'compat_itertools_count', 'compat_kwargs', 'compat_ord', From 345dec937fcc2b9ae106e91f4c01568c8c7e41f8 Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi <kagami@genshiken.org> Date: Tue, 7 Jun 2016 14:39:21 +0300 Subject: [PATCH 0702/3599] [vlive] Acknowledge vlive+ streams statuses Same as common statuses just with "PRODUCT_" prefix: PRODUCE_LIVE_END, PRODUCT_COMING_SOON, etc. --- youtube_dl/extractor/vlive.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 147f52d45..8d671cca7 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -9,6 +9,7 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + remove_start, ) from ..compat import compat_urllib_parse_urlencode @@ -39,6 +40,7 @@ class VLiveIE(InfoExtractor): webpage, 'video params') status, _, _, live_params, long_video_id, key = re.split( r'"\s*,\s*"', video_params)[2:8] + status = remove_start(status, 'PRODUCT_') if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR': live_params = self._parse_json('"%s"' % live_params, video_id) From 74193838f71addcb08a9f56a7fad8c2e7df298ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 7 Jun 2016 22:12:20 +0700 Subject: [PATCH 0703/3599] [canal+] Improve extraction (Closes #9718) --- youtube_dl/extractor/canalplus.py | 33 ++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 25b2d4efe..8d0f91158 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -4,11 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse from ..utils import ( ExtractorError, HEADRequest, unified_strdate, - url_basename, qualities, int_or_none, ) @@ -16,13 +16,25 @@ from ..utils import ( class CanalplusIE(InfoExtractor): IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv' - _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))' + _VALID_URL = r'''(?x) + https?:// + (?: + (?: + (?:(?:www|m)\.)?canalplus\.fr| + (?:www\.)?piwiplus\.fr| + (?:www\.)?d8\.tv| + (?:www\.)?itele\.fr + )/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?| + player\.canalplus\.fr/#/(?P<id>\d+) + ) + + ''' _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json' _SITE_ID_MAP = { - 'canalplus.fr': 'cplus', - 'piwiplus.fr': 'teletoon', - 'd8.tv': 'd8', - 'itele.fr': 'itele', + 'canalplus': 'cplus', + 'piwiplus': 'teletoon', + 'd8': 'd8', + 'itele': 'itele', } _TESTS = [{ @@ -65,16 +77,19 @@ class CanalplusIE(InfoExtractor): 'description': 'md5:8216206ec53426ea6321321f3b3c16db', 'upload_date': '20150211', }, + }, { + 'url': 'http://m.canalplus.fr/?vid=1398231', + 'only_matching': True, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.groupdict().get('id') + video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid') - site_id = self._SITE_ID_MAP[mobj.group('site') or 'canal'] + site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]] # Beware, some subclasses do not define an id group - display_id = url_basename(mobj.group('path')) + display_id = mobj.group('display_id') or video_id if video_id is None: webpage = self._download_webpage(url, display_id) From 3d9b3605a35eb48bd20e569ed9ce9d706e457ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 7 Jun 2016 22:26:18 +0700 Subject: [PATCH 0704/3599] [canal+] Update tests --- youtube_dl/extractor/canalplus.py | 48 +++++++++++++++++-------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 8d0f91158..605c5e957 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -38,14 +38,14 @@ class CanalplusIE(InfoExtractor): } _TESTS = [{ - 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092', - 'md5': '12164a6f14ff6df8bd628e8ba9b10b78', + 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814', + 'md5': '41f438a4904f7664b91b4ed0dec969dc', 'info_dict': { - 'id': '1263092', + 'id': '1192814', 'ext': 'mp4', - 'title': 'Le Zapping - 13/05/15', - 'description': 'md5:09738c0d06be4b5d06a0940edb0da73f', - 'upload_date': '20150513', + 'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014", + 'description': "Toute l'année 2014 dans un Zapping exceptionnel !", + 'upload_date': '20150105', }, }, { 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', @@ -58,24 +58,28 @@ class CanalplusIE(InfoExtractor): }, 'skip': 'Only works from France', }, { - 'url': 'http://www.d8.tv/d8-docs-mags/pid6589-d8-campagne-intime.html', + 'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231', 'info_dict': { - 'id': '966289', - 'ext': 'flv', - 'title': 'Campagne intime - Documentaire exceptionnel', - 'description': 'md5:d2643b799fb190846ae09c61e59a859f', - 'upload_date': '20131108', - }, - 'skip': 'videos get deleted after a while', - }, { - 'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559', - 'md5': '38b8f7934def74f0d6f3ba6c036a5f82', - 'info_dict': { - 'id': '1213714', + 'id': '1390231', 'ext': 'mp4', - 'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45', - 'description': 'md5:8216206ec53426ea6321321f3b3c16db', - 'upload_date': '20150211', + 'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité", + 'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6', + 'upload_date': '20160512', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224', + 'info_dict': { + 'id': '1398334', + 'ext': 'mp4', + 'title': "L'invité de Bruce Toussaint du 07/06/2016 - ", + 'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324', + 'upload_date': '20160607', + }, + 'params': { + 'skip_download': True, }, }, { 'url': 'http://m.canalplus.fr/?vid=1398231', From 57b6e9652e27aa46395dab6238e54d63746f9a0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 7 Jun 2016 22:32:08 +0700 Subject: [PATCH 0705/3599] [canal+] Add support for d17.tv --- youtube_dl/extractor/canalplus.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 605c5e957..61463f249 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -23,6 +23,7 @@ class CanalplusIE(InfoExtractor): (?:(?:www|m)\.)?canalplus\.fr| (?:www\.)?piwiplus\.fr| (?:www\.)?d8\.tv| + (?:www\.)?d17\.tv| (?:www\.)?itele\.fr )/(?:(?:[^/]+/)*(?P<display_id>[^/?#&]+))?(?:\?.*\bvid=(?P<vid>\d+))?| player\.canalplus\.fr/#/(?P<id>\d+) @@ -34,6 +35,7 @@ class CanalplusIE(InfoExtractor): 'canalplus': 'cplus', 'piwiplus': 'teletoon', 'd8': 'd8', + 'd17': 'd17', 'itele': 'itele', } @@ -84,6 +86,9 @@ class CanalplusIE(InfoExtractor): }, { 'url': 'http://m.canalplus.fr/?vid=1398231', 'only_matching': True, + }, { + 'url': 'http://www.d17.tv/emissions/pid8303-lolywood.html?vid=1397061', + 'only_matching': True, }] def _real_extract(self, url): From a6571f1073eab6c9a4cc9800a0bff31cf12fe09f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 00:19:33 +0800 Subject: [PATCH 0706/3599] [common] Fix <bootstrapInfo> detection in F4M manifests Regression since 0a5685b26fae0940f14cb063a6e4fc6986f9c124 --- youtube_dl/extractor/common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 57793537b..bfd432160 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -45,6 +45,7 @@ from ..utils import ( unescapeHTML, unified_strdate, url_basename, + xpath_element, xpath_text, xpath_with_ns, determine_protocol, @@ -1030,7 +1031,7 @@ class InfoExtractor(object): if base_url: base_url = base_url.strip() - bootstrap_info = xpath_text( + bootstrap_info = xpath_element( manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'], 'bootstrap info', default=None) @@ -1085,7 +1086,7 @@ class InfoExtractor(object): formats.append({ 'format_id': format_id, 'url': manifest_url, - 'ext': 'flv' if bootstrap_info else None, + 'ext': 'flv' if bootstrap_info is not None else None, 'tbr': tbr, 'width': width, 'height': height, From a4a8201c02d06bff384ecb66a257dbec0652ff52 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 00:25:51 +0800 Subject: [PATCH 0707/3599] [wdr] Update _TESTS --- youtube_dl/extractor/wdr.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 1e729cb7c..6174eb19f 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -21,7 +21,7 @@ class WDRIE(InfoExtractor): _TESTS = [ { 'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html', - 'md5': 'e58c39c3e30077141d258bf588700a7b', + # HDS download, MD5 is unstable 'info_dict': { 'id': 'mdb-1058683', 'ext': 'flv', @@ -35,7 +35,6 @@ class WDRIE(InfoExtractor): 'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml' }]}, }, - 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/mediathek/audio/wdr3/wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100.html', @@ -51,7 +50,6 @@ class WDRIE(InfoExtractor): 'is_live': False, 'subtitles': {} }, - 'skip': 'Page Not Found', }, { 'url': 'http://www1.wdr.de/mediathek/video/live/index.html', @@ -90,7 +88,7 @@ class WDRIE(InfoExtractor): }, { 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', - 'md5': 'ca365705551e4bd5217490f3b0591290', + # HDS download, MD5 is unstable 'info_dict': { 'id': 'mdb-186083', 'ext': 'flv', @@ -98,9 +96,6 @@ class WDRIE(InfoExtractor): 'title': 'Sachgeschichte - Achterbahn ', 'description': '- Die Sendung mit der Maus -', }, - 'params': { - 'skip_download': True, # the file has different versions :( - }, }, ] From a26a9d62396641364690974de9c859cf26f9acf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 7 Jun 2016 23:53:08 +0700 Subject: [PATCH 0708/3599] [livestream:event] Ensure video id is string (Closes #9721) --- youtube_dl/extractor/livestream.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 0edc06c43..bc7894bf1 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -203,9 +203,10 @@ class LivestreamIE(InfoExtractor): if not videos_info: break for v in videos_info: + v_id = compat_str(v['id']) entries.append(self.url_result( - 'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v['id']), - 'Livestream', v['id'], v['caption'])) + 'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v_id), + 'Livestream', v_id, v.get('caption'))) last_video = videos_info[-1]['id'] return self.playlist_result(entries, event_id, event_data['full_name']) From 33d9f3707ccccfe8d73c1b398f198792e80a259f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 8 Jun 2016 02:22:04 +0700 Subject: [PATCH 0709/3599] [thesixtyone] Relax _VALID_URL (Closes #9714) --- youtube_dl/extractor/thesixtyone.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/thesixtyone.py b/youtube_dl/extractor/thesixtyone.py index d8b1fd281..d63aef5de 100644 --- a/youtube_dl/extractor/thesixtyone.py +++ b/youtube_dl/extractor/thesixtyone.py @@ -12,7 +12,7 @@ class TheSixtyOneIE(InfoExtractor): s| song/comments/list| song - )/(?P<id>[A-Za-z0-9]+)/?$''' + )/(?:[^/]+/)?(?P<id>[A-Za-z0-9]+)/?$''' _SONG_URL_TEMPLATE = 'http://thesixtyone.com/s/{0:}' _SONG_FILE_URL_TEMPLATE = 'http://{audio_server:}/thesixtyone_production/audio/{0:}_stream' _THUMBNAIL_URL_TEMPLATE = '{photo_base_url:}_desktop' @@ -45,6 +45,10 @@ class TheSixtyOneIE(InfoExtractor): 'url': 'http://www.thesixtyone.com/song/SrE3zD7s1jt/', 'only_matching': True, }, + { + 'url': 'http://www.thesixtyone.com/maryatmidnight/song/StrawberriesandCream/yvWtLp0c4GQ/', + 'only_matching': True, + }, ] _DECODE_MAP = { From 7264e385912951167c27b40df5fd22010d594b12 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 14:29:53 +0800 Subject: [PATCH 0710/3599] [bilibili] Fix for videos without upload time (closes #9710) --- youtube_dl/extractor/bilibili.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 910e539e4..b17047b39 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -102,6 +102,22 @@ class BiliBiliIE(InfoExtractor): 'uploader_id': '151066', }, }], + }, { + # Missing upload time + 'url': 'http://www.bilibili.com/video/av1867637/', + 'info_dict': { + 'id': '2880301', + 'ext': 'flv', + 'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】', + 'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】', + 'uploader': '黑夜为猫', + 'uploader_id': '610729', + }, + 'params': { + # Just to test metadata extraction + 'skip_download': True, + }, + 'expected_warnings': ['upload time'], }] # BiliBili blocks keys from time to time. The current key is extracted from @@ -172,6 +188,7 @@ class BiliBiliIE(InfoExtractor): description = self._html_search_meta('description', webpage) datetime_str = self._html_search_regex( r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False) + timestamp = None if datetime_str: timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple()) From 50ce1c331c736d8219f3bf631ff069b9aecc48e3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 14:43:52 +0800 Subject: [PATCH 0711/3599] [downloader/external] Add another env for proxies in ffmpeg/avconv Related sources: https://git.libav.org/?p=libav.git;a=blob;f=libavformat/http.c;h=8fe8d11e1edfdbb04a8726db2c49cfef3f572aac;hb=HEAD#l152 https://git.libav.org/?p=libav.git;a=blob;f=libavformat/tls.c;h=fab243e93e20034e88e619188c13a44a5d8ccdb9;hb=HEAD#l63 https://github.com/FFmpeg/FFmpeg/blob/f8e89d8/libavformat/http.c#L191 https://github.com/FFmpeg/FFmpeg/blob/f8e89d8/libavformat/tls.c#L92 --- youtube_dl/downloader/external.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 3a73cee1c..3ff1f9ed4 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -210,6 +210,7 @@ class FFmpegFD(ExternalFD): # args += ['-http_proxy', proxy] env = os.environ.copy() compat_setenv('HTTP_PROXY', proxy, env=env) + compat_setenv('http_proxy', proxy, env=env) protocol = info_dict.get('protocol') From 22a0a95247c30b346592b6e3d464776bceb3b934 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 20:47:39 +0800 Subject: [PATCH 0712/3599] [theplatform] Some NBC videos require an additional cookie Related: #9578 --- youtube_dl/extractor/theplatform.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 02dbef913..5793ec6ef 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -14,11 +14,13 @@ from ..compat import ( compat_urllib_parse_urlparse, ) from ..utils import ( + determine_ext, ExtractorError, float_or_none, int_or_none, sanitized_Request, unsmuggle_url, + update_url_query, xpath_with_ns, mimetype2ext, find_xpath_attr, @@ -48,6 +50,12 @@ class ThePlatformBaseIE(OnceIE): if OnceIE.suitable(_format['url']): formats.extend(self._extract_once_formats(_format['url'])) else: + media_url = _format['url'] + if determine_ext(media_url) == 'm3u8': + hdnea2 = self._get_cookies(media_url).get('hdnea2') + if hdnea2: + _format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value}) + formats.append(_format) subtitles = self._parse_smil_subtitles(meta, default_ns) From e6e90515db983ca447cf7a59bbc153907d4fff4a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 20:50:01 +0800 Subject: [PATCH 0713/3599] [nbc] Add the test case from #9578 Closes #9578 --- youtube_dl/extractor/nbc.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 46504cd5f..f27c7f139 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -67,6 +67,23 @@ class NBCIE(InfoExtractor): # This video has expired but with an escaped embedURL 'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515', 'only_matching': True, + }, + { + # HLS streams requires the 'hdnea3' cookie + 'url': 'http://www.nbc.com/Kings/video/goliath/n1806', + 'info_dict': { + 'id': 'n1806', + 'ext': 'mp4', + 'title': 'Goliath', + 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.', + 'timestamp': 1237100400, + 'upload_date': '20090315', + 'uploader': 'NBCU-COM', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Only works from US', } ] From fc0a45fa416ad3e3ecf5936061efbb0328afa6b5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 21:12:14 +0800 Subject: [PATCH 0714/3599] [twitter] Detect suspended accounts and update _TESTS --- youtube_dl/extractor/twitter.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index ea673054f..129103c64 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -207,6 +207,7 @@ class TwitterIE(InfoExtractor): 'uploader_id': 'giphz', }, 'expected_warnings': ['height', 'width'], + 'skip': 'Account suspended', }, { 'url': 'https://twitter.com/starwars/status/665052190608723968', 'md5': '39b7199856dee6cd4432e72c74bc69d4', @@ -278,7 +279,11 @@ class TwitterIE(InfoExtractor): user_id = mobj.group('user_id') twid = mobj.group('id') - webpage = self._download_webpage(self._TEMPLATE_URL % (user_id, twid), twid) + webpage, urlh = self._download_webpage_handle( + self._TEMPLATE_URL % (user_id, twid), twid) + + if 'twitter.com/account/suspended' in urlh.geturl(): + raise ExtractorError('Account suspended by Twitter.', expected=True) username = remove_end(self._og_search_title(webpage), ' on Twitter') From c6308b3153acc57300f750f0061c63ffcba4d150 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 21:28:10 +0800 Subject: [PATCH 0715/3599] [twitter] Fix extraction for videos with HLS streams Closes #9623 --- youtube_dl/extractor/twitter.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 129103c64..76421e533 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + determine_ext, float_or_none, xpath_text, remove_end, @@ -116,13 +117,16 @@ class TwitterCardIE(TwitterBaseIE): video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source') if video_url: - f = { - 'url': video_url, - } + if determine_ext(video_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls')) + else: + f = { + 'url': video_url, + } - _search_dimensions_in_video_url(f, video_url) + _search_dimensions_in_video_url(f, video_url) - formats.append(f) + formats.append(f) vmap_url = config.get('vmapUrl') or config.get('vmap_url') if vmap_url: @@ -263,7 +267,6 @@ class TwitterIE(InfoExtractor): 'add_ie': ['Vine'], }, { 'url': 'https://twitter.com/captainamerica/status/719944021058060289', - # md5 constantly changes 'info_dict': { 'id': '719944021058060289', 'ext': 'mp4', @@ -272,6 +275,9 @@ class TwitterIE(InfoExtractor): 'uploader_id': 'captainamerica', 'uploader': 'Captain America', }, + 'params': { + 'skip_download': True, # requires ffmpeg + }, }] def _real_extract(self, url): From 6da8d7de69af144a96e9e50168e66f66af54129f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 21:48:12 +0800 Subject: [PATCH 0716/3599] [twitter] Update _TESTS --- youtube_dl/extractor/twitter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 76421e533..b73842986 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -53,7 +53,7 @@ class TwitterCardIE(TwitterBaseIE): 'id': 'dq4Oj5quskI', 'ext': 'mp4', 'title': 'Ubuntu 11.10 Overview', - 'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10-things-to-do-after-installing-ubuntu-11-10/', + 'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10...', 'upload_date': '20111013', 'uploader': 'OMG! Ubuntu!', 'uploader_id': 'omgubuntu', @@ -244,10 +244,10 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'jay - BEAT PROD: @suhmeduh #Damndaniel', - 'description': 'jay on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', + 'title': 'Donte The Dumbass - BEAT PROD: @suhmeduh #Damndaniel', + 'description': 'Donte The Dumbass on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', 'thumbnail': 're:^https?://.*\.jpg', - 'uploader': 'jay', + 'uploader': 'Donte The Dumbass', 'uploader_id': 'jaydingeer', }, 'params': { From 411c590a1f997f9efd71be8f434821acbf33a35f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Jun 2016 23:45:46 +0800 Subject: [PATCH 0717/3599] [youku:show] Add new extractor --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/youku.py | 52 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d107080f5..676a0400c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1013,7 +1013,10 @@ from .yesjapan import YesJapanIE from .yinyuetai import YinYueTaiIE from .ynet import YnetIE from .youjizz import YouJizzIE -from .youku import YoukuIE +from .youku import ( + YoukuIE, + YoukuShowIE, +) from .youporn import YouPornIE from .yourupload import YourUploadIE from .youtube import ( diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index dbccbe228..147608ebe 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -2,7 +2,9 @@ from __future__ import unicode_literals import base64 +import itertools import random +import re import string import time @@ -13,6 +15,7 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + get_element_by_attribute, sanitized_Request, ) @@ -285,3 +288,52 @@ class YoukuIE(InfoExtractor): 'title': title, 'entries': entries, } + + +class YoukuShowIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?youku\.com/show_page/id_(?P<id>[0-9a-z]+)\.html' + IE_NAME = 'youku:show' + + _TEST = { + 'url': 'http://www.youku.com/show_page/id_zc7c670be07ff11e48b3f.html', + 'info_dict': { + 'id': 'zc7c670be07ff11e48b3f', + 'title': '花千骨 未删减版', + 'description': 'md5:578d4f2145ae3f9128d9d4d863312910', + }, + 'playlist_count': 50, + } + + _PAGE_SIZE = 40 + + def _find_videos_in_page(self, webpage): + videos = re.findall( + r'<li><a[^>]+href="(?P<url>https?://v\.youku\.com/[^"]+)"[^>]+title="(?P<title>[^"]+)"', webpage) + return [ + self.url_result(video_url, YoukuIE.ie_key(), title) + for video_url, title in videos] + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + + entries = self._find_videos_in_page(webpage) + + playlist_title = self._html_search_regex( + r'<span[^>]+class="name">([^<]+)</span>', webpage, 'playlist title', fatal=False) + detail_div = get_element_by_attribute('class', 'detail', webpage) or '' + playlist_description = self._html_search_regex( + r'<span[^>]+style="display:none"[^>]*>([^<]+)</span>', + detail_div, 'playlist description', fatal=False) + + for idx in itertools.count(1): + episodes_page = self._download_webpage( + 'http://www.youku.com/show_episode/id_%s.html' % show_id, + show_id, query={'divid': 'reload_%d' % (idx * self._PAGE_SIZE + 1)}, + note='Downloading episodes page %d' % idx) + new_entries = self._find_videos_in_page(episodes_page) + entries.extend(new_entries) + if len(new_entries) < self._PAGE_SIZE: + break + + return self.playlist_result(entries, show_id, playlist_title, playlist_description) From 11380753b5aa9d8128ef28a968ab325973276fa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:00:47 +0700 Subject: [PATCH 0718/3599] [vessel] Add support for embed urls and improve extraction --- youtube_dl/extractor/vessel.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index 1a0ff3395..e027c018b 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import json +import re from .common import InfoExtractor from ..utils import ( @@ -12,11 +13,11 @@ from ..utils import ( class VesselIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)' _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s' _LOGIN_URL = 'https://www.vessel.com/api/account/login' _NETRC_MACHINE = 'vessel' - _TEST = { + _TESTS = [{ 'url': 'https://www.vessel.com/videos/HDN7G5UMs', 'md5': '455cdf8beb71c6dd797fd2f3818d05c4', 'info_dict': { @@ -28,7 +29,16 @@ class VesselIE(InfoExtractor): 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?', 'timestamp': int, }, - } + }, { + 'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return [url for _, url in re.findall( + r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z]+.*?)\1', + webpage)] @staticmethod def make_json_request(url, data): @@ -98,16 +108,19 @@ class VesselIE(InfoExtractor): formats = [] for f in video_asset.get('sources', []): - if f['name'] == 'hls-index': + location = f.get('location') + if not location: + continue + if f.get('name') == 'hls-index': formats.extend(self._extract_m3u8_formats( - f['location'], video_id, ext='mp4', m3u8_id='m3u8')) + location, video_id, ext='mp4', m3u8_id='m3u8')) else: formats.append({ - 'format_id': f['name'], + 'format_id': f.get('name'), 'tbr': f.get('bitrate'), 'height': f.get('height'), 'width': f.get('width'), - 'url': f['location'], + 'url': location, }) self._sort_formats(formats) From 48a5eabc487058ccaa1076b74ad9106fc6019955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:02:27 +0700 Subject: [PATCH 0719/3599] [extractor/generic] Add support vessel embeds (Closes #7083) --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b4138381d..90575ab0e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -63,6 +63,7 @@ from .instagram import InstagramIE from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE from .theplatform import ThePlatformIE +from .vessel import VesselIE class GenericIE(InfoExtractor): @@ -1533,6 +1534,11 @@ class GenericIE(InfoExtractor): if tp_urls: return _playlist_from_matches(tp_urls, ie='ThePlatform') + # Look for Vessel embeds + vessel_urls = VesselIE._extract_urls(webpage) + if vessel_urls: + return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key()) + # Look for embedded rtl.nl player matches = re.findall( r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', From a479b8f687245a9cb1b5c25ed9ece28c4710981f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:09:32 +0700 Subject: [PATCH 0720/3599] [vessel] Use native hls by default --- youtube_dl/extractor/vessel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index e027c018b..59f2b4ba4 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -113,7 +113,8 @@ class VesselIE(InfoExtractor): continue if f.get('name') == 'hls-index': formats.extend(self._extract_m3u8_formats( - location, video_id, ext='mp4', m3u8_id='m3u8')) + location, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='m3u8')) else: formats.append({ 'format_id': f.get('name'), From 39da509f6712b6b0e9d52a9c9e990a5b5cd6c2ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:12:48 +0700 Subject: [PATCH 0721/3599] [vessel] Extract DASH formats --- youtube_dl/extractor/vessel.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index 59f2b4ba4..c53f44584 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -111,13 +111,17 @@ class VesselIE(InfoExtractor): location = f.get('location') if not location: continue - if f.get('name') == 'hls-index': + name = f.get('name') + if name == 'hls-index': formats.extend(self._extract_m3u8_formats( location, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='m3u8')) + elif name == 'dash-index': + formats.extend(self._extract_mpd_formats( + location, video_id, mpd_id='dash', fatal=False)) else: formats.append({ - 'format_id': f.get('name'), + 'format_id': name, 'tbr': f.get('bitrate'), 'height': f.get('height'), 'width': f.get('width'), From 9d51a0a9a19f07997cfb3ff1bb9fc9c1669a455c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 04:13:38 +0700 Subject: [PATCH 0722/3599] [vessel] Make hls formats non fatal --- youtube_dl/extractor/vessel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index c53f44584..2cd617b91 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -115,7 +115,7 @@ class VesselIE(InfoExtractor): if name == 'hls-index': formats.extend(self._extract_m3u8_formats( location, video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='m3u8')) + entry_protocol='m3u8_native', m3u8_id='m3u8', fatal=False)) elif name == 'dash-index': formats.extend(self._extract_mpd_formats( location, video_id, mpd_id='dash', fatal=False)) From be6217b26142491232fb697b125015d45437832d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 05:34:19 +0700 Subject: [PATCH 0723/3599] [YoutubeDL] Force string conversion on non string video ids --- youtube_dl/YoutubeDL.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 3917ca9dc..5036289b0 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1223,6 +1223,10 @@ class YoutubeDL(object): if 'title' not in info_dict: raise ExtractorError('Missing "title" field in extractor result') + if not isinstance(info_dict['id'], compat_str): + self.report_warning('"id" field is not a string - forcing string conversion') + info_dict['id'] = compat_str(info_dict['id']) + if 'playlist' not in info_dict: # It isn't part of a playlist info_dict['playlist'] = None From 6c33d24b46ecfb1f2ce790e21f2410149fdfb095 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 12:58:24 +0800 Subject: [PATCH 0724/3599] [utils] Add audio/mpeg to mimetype2ext() Used in WDR live radios (#6147) --- youtube_dl/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 89234b39d..229de4b39 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2020,6 +2020,9 @@ def mimetype2ext(mt): ext = { 'audio/mp4': 'm4a', + # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as + # it's the most popular one + 'audio/mpeg': 'mp3', }.get(mt) if ext is not None: return ext From 50918c4ee01be6c1218a72bef35838216b2bf8d1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 13:04:30 +0800 Subject: [PATCH 0725/3599] [wdr] Support radio players (closes #6147) --- youtube_dl/extractor/wdr.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 6174eb19f..059e2aa08 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -10,12 +10,13 @@ from ..utils import ( strip_jsonp, unified_strdate, ExtractorError, + urlhandle_detect_ext, ) class WDRIE(InfoExtractor): _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' - _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' + _PAGE_REGEX = r'/(?:mediathek/)?(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL _TESTS = [ @@ -97,6 +98,16 @@ class WDRIE(InfoExtractor): 'description': '- Die Sendung mit der Maus -', }, }, + { + 'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html', + 'info_dict': { + 'id': 'mdb-869971', + 'ext': 'mp3', + 'title': 'Funkhaus Europa Livestream', + 'description': 'md5:2309992a6716c347891c045be50992e4', + 'upload_date': '20160101', + }, + } ] def _real_extract(self, url): @@ -107,9 +118,10 @@ class WDRIE(InfoExtractor): webpage = self._download_webpage(url, display_id) # for wdr.de the data-extension is in a tag with the class "mediaLink" + # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" # for wdrmaus its in a link to the page in a multiline "videoLink"-tag json_metadata = self._html_search_regex( - r'class=(?:"mediaLink\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', + r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', webpage, 'media link', default=None, flags=re.MULTILINE) if not json_metadata: @@ -143,15 +155,22 @@ class WDRIE(InfoExtractor): for tag_name in ['videoURL', 'audioURL']: if tag_name in metadata_media_alt: alt_url = metadata_media_alt[tag_name] - if determine_ext(alt_url) == 'm3u8': + ext = determine_ext(alt_url) + if ext == 'm3u8': m3u_fmt = self._extract_m3u8_formats( alt_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') formats.extend(m3u_fmt) else: - formats.append({ + a_format = { 'url': alt_url - }) + } + if ext == 'unknown_video': + urlh = self._request_webpage( + alt_url, display_id, note='Determining extension') + ext = urlhandle_detect_ext(urlh) + a_format['ext'] = ext + formats.append(a_format) # check if there are flash-streams for this video if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']: From 6869d634c6d7482dd53034dec8a8f2f0b8e1f9b0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 13:41:12 +0800 Subject: [PATCH 0726/3599] [wdr] Simplify extraction --- youtube_dl/extractor/wdr.py | 64 ++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 059e2aa08..88369d3f2 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -10,6 +10,7 @@ from ..utils import ( strip_jsonp, unified_strdate, ExtractorError, + update_url_query, urlhandle_detect_ext, ) @@ -100,9 +101,10 @@ class WDRIE(InfoExtractor): }, { 'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html', + # Live stream, MD5 unstable 'info_dict': { 'id': 'mdb-869971', - 'ext': 'mp3', + 'ext': 'flv', 'title': 'Funkhaus Europa Livestream', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20160101', @@ -150,36 +152,38 @@ class WDRIE(InfoExtractor): formats = [] # check if the metadata contains a direct URL to a file - metadata_media_alt = metadata_media_resource.get('alt') - if metadata_media_alt: - for tag_name in ['videoURL', 'audioURL']: - if tag_name in metadata_media_alt: - alt_url = metadata_media_alt[tag_name] - ext = determine_ext(alt_url) - if ext == 'm3u8': - m3u_fmt = self._extract_m3u8_formats( - alt_url, display_id, 'mp4', 'm3u8_native', - m3u8_id='hls') - formats.extend(m3u_fmt) - else: - a_format = { - 'url': alt_url - } - if ext == 'unknown_video': - urlh = self._request_webpage( - alt_url, display_id, note='Determining extension') - ext = urlhandle_detect_ext(urlh) - a_format['ext'] = ext - formats.append(a_format) + for kind, media_resource in metadata_media_resource.items(): + if kind not in ('dflt', 'alt'): + continue - # check if there are flash-streams for this video - if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']: - video_url = metadata_media_resource['dflt']['videoURL'] - if video_url.endswith('.f4m'): - full_video_url = video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' - formats.extend(self._extract_f4m_formats(full_video_url, display_id, f4m_id='hds', fatal=False)) - elif video_url.endswith('.smil'): - formats.extend(self._extract_smil_formats(video_url, 'stream', fatal=False)) + for tag_name, medium_url in media_resource.items(): + if tag_name not in ('videoURL', 'audioURL'): + continue + + ext = determine_ext(medium_url) + if ext == 'm3u8': + m3u_fmt = self._extract_m3u8_formats( + medium_url, display_id, 'mp4', 'm3u8_native', + m3u8_id='hls') + formats.extend(m3u_fmt) + elif ext == 'f4m': + manifest_url = update_url_query( + medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) + formats.extend(self._extract_f4m_formats( + manifest_url, display_id, f4m_id='hds', fatal=False)) + elif ext == 'smil': + formats.extend(self._extract_smil_formats( + medium_url, 'stream', fatal=False)) + else: + a_format = { + 'url': medium_url + } + if ext == 'unknown_video': + urlh = self._request_webpage( + medium_url, display_id, note='Determining extension') + ext = urlhandle_detect_ext(urlh) + a_format['ext'] = ext + formats.append(a_format) subtitles = {} caption_url = metadata_media_resource.get('captionURL') From 1594a4932f7e94287c32b5d4d63a60b57ffee96a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 13:49:35 +0800 Subject: [PATCH 0727/3599] [wdr] Misc changes --- youtube_dl/extractor/wdr.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 88369d3f2..a9238cbeb 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -6,10 +6,10 @@ import re from .common import InfoExtractor from ..utils import ( determine_ext, + ExtractorError, js_to_json, strip_jsonp, unified_strdate, - ExtractorError, update_url_query, urlhandle_detect_ext, ) @@ -17,7 +17,7 @@ from ..utils import ( class WDRIE(InfoExtractor): _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' - _PAGE_REGEX = r'/(?:mediathek/)?(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' + _PAGE_REGEX = r'/(?:mediathek/)?[^/]+/(?P<type>[^/]+)/(?P<display_id>.+)\.html' _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL _TESTS = [ @@ -162,10 +162,9 @@ class WDRIE(InfoExtractor): ext = determine_ext(medium_url) if ext == 'm3u8': - m3u_fmt = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( medium_url, display_id, 'mp4', 'm3u8_native', - m3u8_id='hls') - formats.extend(m3u_fmt) + m3u8_id='hls')) elif ext == 'f4m': manifest_url = update_url_query( medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) @@ -185,6 +184,8 @@ class WDRIE(InfoExtractor): a_format['ext'] = ext formats.append(a_format) + self._sort_formats(formats) + subtitles = {} caption_url = metadata_media_resource.get('captionURL') if caption_url: @@ -206,8 +207,6 @@ class WDRIE(InfoExtractor): if upload_date: upload_date = unified_strdate(upload_date) - self._sort_formats(formats) - return { 'id': metadata_tracker_data.get('trackerClipId', display_id), 'display_id': display_id, From e2713d32f49f1bfa830cc755a96691c39da88290 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 19:00:13 +0800 Subject: [PATCH 0728/3599] [openload] Fix extraction. Thanks @perron375 for the solution Closes #9706 --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 5049b870e..1b57462b5 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -100,7 +100,7 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) code = self._search_regex( - r'</video>\s*</div>\s*<script[^>]+>([^<]+)</script>', + r'</video>\s*</div>\s*<script[^>]+>[^>]+</script>\s*<script[^>]+>([^<]+)</script>', webpage, 'JS code') decoded = self.openload_decode(code) From 21efee5f8bc8daf0cbb5fc3408a1fc5b9d5eadcb Mon Sep 17 00:00:00 2001 From: N1k145 <N1k145@users.noreply.github.com> Date: Thu, 9 Jun 2016 12:13:15 +0200 Subject: [PATCH 0729/3599] [openload] Relax _VALID_URL [openload] added to _TESTS, removed escape --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 1b57462b5..6415b8fdc 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -14,7 +14,7 @@ from ..utils import ( class OpenloadIE(InfoExtractor): - _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-]+)' + _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -31,6 +31,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://openload.io/f/ZAn6oz-VZGE/', 'only_matching': True, + }, { + 'url': 'https://openload.co/f/_-ztPaZtMhM/', + 'only_matching': True, }, { # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout # for title and ext From bb1e44cc8ee7937422fb5635f3431feb6d5fd918 Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Fri, 27 May 2016 13:37:40 +0200 Subject: [PATCH 0730/3599] [godtv] Add extractor [GodTV] Improvements --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/godtv.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 youtube_dl/extractor/godtv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index aa98782a5..40dcfcde3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -292,6 +292,7 @@ from .globo import ( GloboArticleIE, ) from .godtube import GodTubeIE +from .godtv import GodTVIE from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE from .googledrive import GoogleDriveIE diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py new file mode 100644 index 000000000..50f093ace --- /dev/null +++ b/youtube_dl/extractor/godtv.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .ooyala import OoyalaIE + + +class GodTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)+/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham', + 'info_dict': { + 'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3', + 'ext': 'mp4', + 'title': 'Randy Needham', + 'duration': 3615.08, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + ooyala_id = self._search_regex(r'"content_id"\s*:\s*"([\w-]{32})"', webpage, display_id) + + return OoyalaIE._build_url_result(ooyala_id) From c0fed3bda50f77d063f3817cfbc3d8b81c18afa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 21:29:41 +0700 Subject: [PATCH 0731/3599] [godtv] Improve and add support for playlists (Closes #9608) --- youtube_dl/extractor/godtv.py | 36 +++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py index 50f093ace..78d638cf0 100644 --- a/youtube_dl/extractor/godtv.py +++ b/youtube_dl/extractor/godtv.py @@ -1,13 +1,13 @@ -# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor from .ooyala import OoyalaIE +from ..utils import js_to_json class GodTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)+/(?P<id>[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham', 'info_dict': { 'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3', @@ -18,12 +18,40 @@ class GodTVIE(InfoExtractor): 'params': { 'skip_download': True, } - } + }, { + 'url': 'http://god.tv/playlist/bible-study', + 'info_dict': { + 'id': 'bible-study', + }, + 'playlist_mincount': 37, + }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - ooyala_id = self._search_regex(r'"content_id"\s*:\s*"([\w-]{32})"', webpage, display_id) + + settings = self._parse_json( + self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'settings', default='{}'), + display_id, transform_source=js_to_json, fatal=False) + + ooyala_id = None + + if settings: + playlist = settings.get('playlist') + if playlist and isinstance(playlist, list): + entries = [ + OoyalaIE._build_url_result(video['content_id']) + for video in playlist if video.get('content_id')] + if entries: + return self.playlist_result(entries, display_id) + ooyala_id = settings.get('ooyala', {}).get('content_id') + + if not ooyala_id: + ooyala_id = self._search_regex( + r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1', + webpage, 'ooyala id', group='id') return OoyalaIE._build_url_result(ooyala_id) From 416878f41f3b33cf1b10b0b30093dcd7a90bdbfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 21:33:51 +0700 Subject: [PATCH 0732/3599] [godtv] Add more tests --- youtube_dl/extractor/godtv.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py index 78d638cf0..7deca00aa 100644 --- a/youtube_dl/extractor/godtv.py +++ b/youtube_dl/extractor/godtv.py @@ -24,6 +24,12 @@ class GodTVIE(InfoExtractor): 'id': 'bible-study', }, 'playlist_mincount': 37, + }, { + 'url': 'http://god.tv/node/15097', + 'only_matching': True, + }, { + 'url': 'http://god.tv/live/africa', + 'only_matching': True, }] def _real_extract(self, url): From b0aebe702c538010fd92cd0807963293f112adcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Jun 2016 21:34:47 +0700 Subject: [PATCH 0733/3599] [godtv] Relax _VALID_URL --- youtube_dl/extractor/godtv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py index 7deca00aa..c5d3b4e6a 100644 --- a/youtube_dl/extractor/godtv.py +++ b/youtube_dl/extractor/godtv.py @@ -6,7 +6,7 @@ from ..utils import js_to_json class GodTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)+/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham', 'info_dict': { @@ -30,6 +30,9 @@ class GodTVIE(InfoExtractor): }, { 'url': 'http://god.tv/live/africa', 'only_matching': True, + }, { + 'url': 'http://god.tv/liveevents', + 'only_matching': True, }] def _real_extract(self, url): From bc7e7adf5154f15b74b2df3e2989f630667778ce Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 9 Jun 2016 21:40:16 +0800 Subject: [PATCH 0734/3599] [wdr] Subtitles are TTML --- youtube_dl/extractor/wdr.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index a9238cbeb..6b83a2a04 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -34,7 +34,8 @@ class WDRIE(InfoExtractor): 'description': 'md5:87be8ff14d8dfd7a7ee46f0299b52318', 'is_live': False, 'subtitles': {'de': [{ - 'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml' + 'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml', + 'ext': 'ttml', }]}, }, }, @@ -190,7 +191,8 @@ class WDRIE(InfoExtractor): caption_url = metadata_media_resource.get('captionURL') if caption_url: subtitles['de'] = [{ - 'url': caption_url + 'url': caption_url, + 'ext': 'ttml', }] title = metadata_tracker_data.get('trackerClipTitle') From 55290788d352168844c8e64d64428a76baa63eea Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 12:28:09 +0800 Subject: [PATCH 0735/3599] [yahoo] Yahoo doesn't like region names in lower cases Fix test_Yahoo_7 --- youtube_dl/extractor/yahoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b376f2b93..927a964a4 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -343,7 +343,7 @@ class YahooIE(InfoExtractor): webpage, 'region', fatal=False, default='US') data = compat_urllib_parse_urlencode({ 'protocol': 'http', - 'region': region, + 'region': region.upper(), }) query_url = ( 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' From 506d0e96936f84c2b21c7ed37f4a7fca2eec86a2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 12:29:58 +0800 Subject: [PATCH 0736/3599] [xuite] Skip the invalid test --- youtube_dl/extractor/xuite.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index 2466410fa..0be8932ad 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -66,6 +66,7 @@ class XuiteIE(InfoExtractor): 'uploader_id': '242127761', 'categories': ['電玩動漫'], }, + 'skip': 'Video removed', }, { 'url': 'http://vlog.xuite.net/play/S1dDUjdyLTMyOTc3NjcuZmx2/%E5%AD%AB%E7%87%95%E5%A7%BF-%E7%9C%BC%E6%B7%9A%E6%88%90%E8%A9%A9', 'only_matching': True, From 436214baf70c1a50fbaf1fbfca4b48f33695590c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 12:31:06 +0800 Subject: [PATCH 0737/3599] [xfileshare] Skip an invalid test --- youtube_dl/extractor/xfileshare.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 769003735..ee4d04c20 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -62,7 +62,8 @@ class XFileShareIE(InfoExtractor): 'ext': 'mp4', 'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4', 'thumbnail': 're:http://.*\.jpg', - } + }, + 'skip': 'Video removed', }, { 'url': 'http://vidto.me/ku5glz52nqe1.html', 'info_dict': { From e1e0a10c567e8457bf83f6b54e65963447e17a8f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 12:33:31 +0800 Subject: [PATCH 0738/3599] [weibo] Remove the extractor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Weibo weishipin (微視頻, tiny videos) service is dead and now all videos are hosted on Sina videos, which is covered by sina.py --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/weibo.py | 49 ------------------------------ 2 files changed, 50 deletions(-) delete mode 100644 youtube_dl/extractor/weibo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 40dcfcde3..0789e4a6e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -974,7 +974,6 @@ from .webofstories import ( WebOfStoriesIE, WebOfStoriesPlaylistIE, ) -from .weibo import WeiboIE from .weiqitv import WeiqiTVIE from .wimp import WimpIE from .wistia import WistiaIE diff --git a/youtube_dl/extractor/weibo.py b/youtube_dl/extractor/weibo.py deleted file mode 100644 index 20bb039d3..000000000 --- a/youtube_dl/extractor/weibo.py +++ /dev/null @@ -1,49 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class WeiboIE(InfoExtractor): - """ - The videos in Weibo come from different sites, this IE just finds the link - to the external video and returns it. - """ - _VALID_URL = r'https?://video\.weibo\.com/v/weishipin/t_(?P<id>.+?)\.htm' - - _TEST = { - 'url': 'http://video.weibo.com/v/weishipin/t_zjUw2kZ.htm', - 'info_dict': { - 'id': '98322879', - 'ext': 'flv', - 'title': '魔声耳机最新广告“All Eyes On Us”', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Sina'], - } - - # Additional example videos from different sites - # Youku: http://video.weibo.com/v/weishipin/t_zQGDWQ8.htm - # 56.com: http://video.weibo.com/v/weishipin/t_zQ44HxN.htm - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) - video_id = mobj.group('id') - info_url = 'http://video.weibo.com/?s=v&a=play_list&format=json&mix_video_id=t_%s' % video_id - info = self._download_json(info_url, video_id) - - videos_urls = map(lambda v: v['play_page_url'], info['result']['data']) - # Prefer sina video since they have thumbnails - videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u) - player_url = videos_urls[-1] - m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html', - player_url) - if m_sina is not None: - self.to_screen('Sina video detected') - sina_id = m_sina.group(1) - player_url = 'http://you.video.sina.com.cn/swf/quotePlayer.swf?vid=%s' % sina_id - return self.url_result(player_url) From 3e74b444e7324fdda956aa816240b938eabf9c93 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 13:13:59 +0800 Subject: [PATCH 0739/3599] [vulture] Remove the extractor The first 10 URLs in google search "site:http://video.vulture.com/video" is dead. I guess Vulture does not host videos on their own anymore. --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/vulture.py | 69 ------------------------------ 2 files changed, 70 deletions(-) delete mode 100644 youtube_dl/extractor/vulture.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0789e4a6e..38708294a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -958,7 +958,6 @@ from .vporn import VpornIE from .vrt import VRTIE from .vube import VubeIE from .vuclip import VuClipIE -from .vulture import VultureIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py deleted file mode 100644 index faa167e65..000000000 --- a/youtube_dl/extractor/vulture.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import unicode_literals - -import json -import os.path -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_iso8601, -) - - -class VultureIE(InfoExtractor): - IE_NAME = 'vulture.com' - _VALID_URL = r'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/' - _TEST = { - 'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1', - 'md5': '8d997845642a2b5152820f7257871bc8', - 'info_dict': { - 'id': '6GHRQL3RV7MSD1H4', - 'ext': 'mp4', - 'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED', - 'uploader_id': 'Sarah', - 'thumbnail': 're:^http://.*\.jpg$', - 'timestamp': 1401288564, - 'upload_date': '20140528', - 'description': 'Uplifting and witty, as predicted.', - 'duration': 1015, - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - query_string = self._search_regex( - r"queryString\s*=\s*'([^']+)'", webpage, 'query string') - video_id = self._search_regex( - r'content=([^&]+)', query_string, 'video ID') - query_url = 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string - - query_webpage = self._download_webpage( - query_url, display_id, note='Downloading query page') - params_json = self._search_regex( - r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n', - query_webpage, - 'player params') - params = json.loads(params_json) - - upload_timestamp = parse_iso8601(params['posted'].replace(' ', 'T')) - uploader_id = params.get('user', {}).get('handle') - - media_item = params['media_item'] - title = os.path.splitext(media_item['title'])[0] - duration = int_or_none(media_item.get('duration_seconds')) - - return { - 'id': video_id, - 'display_id': display_id, - 'url': media_item['pipeline_xid'], - 'title': title, - 'timestamp': upload_timestamp, - 'thumbnail': params.get('thumbnail_url'), - 'uploader_id': uploader_id, - 'description': params.get('description'), - 'duration': duration, - } From 5de008e8c3e4058c20956d19f69ac3347a2722e0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 13:31:55 +0800 Subject: [PATCH 0740/3599] [nbcnews] Support embed widgets Used in some Vulture videos --- youtube_dl/extractor/nbc.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index f27c7f139..6b7da1149 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -266,6 +266,11 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952', 'only_matching': True, }, + { + # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html + 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682', + 'only_matching': True, + }, ] def _real_extract(self, url): @@ -289,18 +294,17 @@ class NBCNewsIE(ThePlatformIE): webpage = self._download_webpage(url, display_id) info = None bootstrap_json = self._search_regex( - r'(?m)var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$', + [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', + r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'], webpage, 'bootstrap json', default=None) - if bootstrap_json: - bootstrap = self._parse_json(bootstrap_json, display_id) + bootstrap = self._parse_json( + bootstrap_json, display_id, transform_source=unescapeHTML) + if 'results' in bootstrap: info = bootstrap['results'][0]['video'] + elif 'video' in bootstrap: + info = bootstrap['video'] else: - player_instance_json = self._search_regex( - r'videoObj\s*:\s*({.+})', webpage, 'player instance', default=None) - if not player_instance_json: - player_instance_json = self._html_search_regex( - r'data-video="([^"]+)"', webpage, 'video json') - info = self._parse_json(player_instance_json, display_id) + info = bootstrap video_id = info['mpxId'] title = info['title'] From de3eb07ed64e3d50164a6db59385a94f2675b0b4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 13:32:59 +0800 Subject: [PATCH 0741/3599] [generic] Detect NBC News embeds --- youtube_dl/extractor/generic.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 90575ab0e..36a3d91fc 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1032,6 +1032,17 @@ class GenericIE(InfoExtractor): 'timestamp': 1389118457, }, }, + # NBC News embed + { + 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html', + 'md5': '1aa589c675898ae6d37a17913cf68d66', + 'info_dict': { + 'id': '701714499682', + 'ext': 'mp4', + 'title': 'PREVIEW: On Assignment: David Letterman', + 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.', + }, + }, # UDN embed { 'url': 'https://video.udn.com/news/300346', @@ -1966,6 +1977,12 @@ class GenericIE(InfoExtractor): if nbc_sports_url: return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') + # Look for NBC News embeds + nbc_news_embed_url = re.search( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage) + if nbc_news_embed_url: + return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews') + # Look for Google Drive embeds google_drive_url = GoogleDriveIE._extract_url(webpage) if google_drive_url: From cc4444662c54c24f6f82efd3ba5e60e9556d88b8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 13:33:59 +0800 Subject: [PATCH 0742/3599] [generic] Remove Vulture embed detection Vulture.com videos now hosts on YouTube, Vimeo, MTV, NBC News or Hulu. Here's an example of Hulu: http://www.vulture.com/2016/06/kimmel-interviews-mariah-carey-in-a-bathtub.html --- youtube_dl/extractor/generic.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 36a3d91fc..798c109c6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1857,14 +1857,6 @@ class GenericIE(InfoExtractor): url = unescapeHTML(mobj.group('url')) return self.url_result(url) - # Look for embedded vulture.com player - mobj = re.search( - r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"', - webpage) - if mobj is not None: - url = unescapeHTML(mobj.group('url')) - return self.url_result(url, ie='Vulture') - # Look for embedded mtvservices player mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage) if mtvservices_url: From 9631a94fb5e5ee9b92135f938df00866535fc6c6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 15:05:24 +0800 Subject: [PATCH 0743/3599] [compat] Add compat_html_entities_html5 Used in tset_Vporn_1. Also Related to #9270 --- youtube_dl/compat.py | 2240 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 2239 insertions(+), 1 deletion(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index e3cab4dd0..0243949a4 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -63,6 +63,2244 @@ try: except ImportError: # Python 2 import htmlentitydefs as compat_html_entities +try: # Python >= 3.3 + from compat_html_entities import html as compat_html_entities_html5 +except ImportError: + # Copied from CPython 3.5.1 html/entities.py + compat_html_entities_html5 = { + 'Aacute': '\xc1', + 'aacute': '\xe1', + 'Aacute;': '\xc1', + 'aacute;': '\xe1', + 'Abreve;': '\u0102', + 'abreve;': '\u0103', + 'ac;': '\u223e', + 'acd;': '\u223f', + 'acE;': '\u223e\u0333', + 'Acirc': '\xc2', + 'acirc': '\xe2', + 'Acirc;': '\xc2', + 'acirc;': '\xe2', + 'acute': '\xb4', + 'acute;': '\xb4', + 'Acy;': '\u0410', + 'acy;': '\u0430', + 'AElig': '\xc6', + 'aelig': '\xe6', + 'AElig;': '\xc6', + 'aelig;': '\xe6', + 'af;': '\u2061', + 'Afr;': '\U0001d504', + 'afr;': '\U0001d51e', + 'Agrave': '\xc0', + 'agrave': '\xe0', + 'Agrave;': '\xc0', + 'agrave;': '\xe0', + 'alefsym;': '\u2135', + 'aleph;': '\u2135', + 'Alpha;': '\u0391', + 'alpha;': '\u03b1', + 'Amacr;': '\u0100', + 'amacr;': '\u0101', + 'amalg;': '\u2a3f', + 'AMP': '&', + 'amp': '&', + 'AMP;': '&', + 'amp;': '&', + 'And;': '\u2a53', + 'and;': '\u2227', + 'andand;': '\u2a55', + 'andd;': '\u2a5c', + 'andslope;': '\u2a58', + 'andv;': '\u2a5a', + 'ang;': '\u2220', + 'ange;': '\u29a4', + 'angle;': '\u2220', + 'angmsd;': '\u2221', + 'angmsdaa;': '\u29a8', + 'angmsdab;': '\u29a9', + 'angmsdac;': '\u29aa', + 'angmsdad;': '\u29ab', + 'angmsdae;': '\u29ac', + 'angmsdaf;': '\u29ad', + 'angmsdag;': '\u29ae', + 'angmsdah;': '\u29af', + 'angrt;': '\u221f', + 'angrtvb;': '\u22be', + 'angrtvbd;': '\u299d', + 'angsph;': '\u2222', + 'angst;': '\xc5', + 'angzarr;': '\u237c', + 'Aogon;': '\u0104', + 'aogon;': '\u0105', + 'Aopf;': '\U0001d538', + 'aopf;': '\U0001d552', + 'ap;': '\u2248', + 'apacir;': '\u2a6f', + 'apE;': '\u2a70', + 'ape;': '\u224a', + 'apid;': '\u224b', + 'apos;': "'", + 'ApplyFunction;': '\u2061', + 'approx;': '\u2248', + 'approxeq;': '\u224a', + 'Aring': '\xc5', + 'aring': '\xe5', + 'Aring;': '\xc5', + 'aring;': '\xe5', + 'Ascr;': '\U0001d49c', + 'ascr;': '\U0001d4b6', + 'Assign;': '\u2254', + 'ast;': '*', + 'asymp;': '\u2248', + 'asympeq;': '\u224d', + 'Atilde': '\xc3', + 'atilde': '\xe3', + 'Atilde;': '\xc3', + 'atilde;': '\xe3', + 'Auml': '\xc4', + 'auml': '\xe4', + 'Auml;': '\xc4', + 'auml;': '\xe4', + 'awconint;': '\u2233', + 'awint;': '\u2a11', + 'backcong;': '\u224c', + 'backepsilon;': '\u03f6', + 'backprime;': '\u2035', + 'backsim;': '\u223d', + 'backsimeq;': '\u22cd', + 'Backslash;': '\u2216', + 'Barv;': '\u2ae7', + 'barvee;': '\u22bd', + 'Barwed;': '\u2306', + 'barwed;': '\u2305', + 'barwedge;': '\u2305', + 'bbrk;': '\u23b5', + 'bbrktbrk;': '\u23b6', + 'bcong;': '\u224c', + 'Bcy;': '\u0411', + 'bcy;': '\u0431', + 'bdquo;': '\u201e', + 'becaus;': '\u2235', + 'Because;': '\u2235', + 'because;': '\u2235', + 'bemptyv;': '\u29b0', + 'bepsi;': '\u03f6', + 'bernou;': '\u212c', + 'Bernoullis;': '\u212c', + 'Beta;': '\u0392', + 'beta;': '\u03b2', + 'beth;': '\u2136', + 'between;': '\u226c', + 'Bfr;': '\U0001d505', + 'bfr;': '\U0001d51f', + 'bigcap;': '\u22c2', + 'bigcirc;': '\u25ef', + 'bigcup;': '\u22c3', + 'bigodot;': '\u2a00', + 'bigoplus;': '\u2a01', + 'bigotimes;': '\u2a02', + 'bigsqcup;': '\u2a06', + 'bigstar;': '\u2605', + 'bigtriangledown;': '\u25bd', + 'bigtriangleup;': '\u25b3', + 'biguplus;': '\u2a04', + 'bigvee;': '\u22c1', + 'bigwedge;': '\u22c0', + 'bkarow;': '\u290d', + 'blacklozenge;': '\u29eb', + 'blacksquare;': '\u25aa', + 'blacktriangle;': '\u25b4', + 'blacktriangledown;': '\u25be', + 'blacktriangleleft;': '\u25c2', + 'blacktriangleright;': '\u25b8', + 'blank;': '\u2423', + 'blk12;': '\u2592', + 'blk14;': '\u2591', + 'blk34;': '\u2593', + 'block;': '\u2588', + 'bne;': '=\u20e5', + 'bnequiv;': '\u2261\u20e5', + 'bNot;': '\u2aed', + 'bnot;': '\u2310', + 'Bopf;': '\U0001d539', + 'bopf;': '\U0001d553', + 'bot;': '\u22a5', + 'bottom;': '\u22a5', + 'bowtie;': '\u22c8', + 'boxbox;': '\u29c9', + 'boxDL;': '\u2557', + 'boxDl;': '\u2556', + 'boxdL;': '\u2555', + 'boxdl;': '\u2510', + 'boxDR;': '\u2554', + 'boxDr;': '\u2553', + 'boxdR;': '\u2552', + 'boxdr;': '\u250c', + 'boxH;': '\u2550', + 'boxh;': '\u2500', + 'boxHD;': '\u2566', + 'boxHd;': '\u2564', + 'boxhD;': '\u2565', + 'boxhd;': '\u252c', + 'boxHU;': '\u2569', + 'boxHu;': '\u2567', + 'boxhU;': '\u2568', + 'boxhu;': '\u2534', + 'boxminus;': '\u229f', + 'boxplus;': '\u229e', + 'boxtimes;': '\u22a0', + 'boxUL;': '\u255d', + 'boxUl;': '\u255c', + 'boxuL;': '\u255b', + 'boxul;': '\u2518', + 'boxUR;': '\u255a', + 'boxUr;': '\u2559', + 'boxuR;': '\u2558', + 'boxur;': '\u2514', + 'boxV;': '\u2551', + 'boxv;': '\u2502', + 'boxVH;': '\u256c', + 'boxVh;': '\u256b', + 'boxvH;': '\u256a', + 'boxvh;': '\u253c', + 'boxVL;': '\u2563', + 'boxVl;': '\u2562', + 'boxvL;': '\u2561', + 'boxvl;': '\u2524', + 'boxVR;': '\u2560', + 'boxVr;': '\u255f', + 'boxvR;': '\u255e', + 'boxvr;': '\u251c', + 'bprime;': '\u2035', + 'Breve;': '\u02d8', + 'breve;': '\u02d8', + 'brvbar': '\xa6', + 'brvbar;': '\xa6', + 'Bscr;': '\u212c', + 'bscr;': '\U0001d4b7', + 'bsemi;': '\u204f', + 'bsim;': '\u223d', + 'bsime;': '\u22cd', + 'bsol;': '\\', + 'bsolb;': '\u29c5', + 'bsolhsub;': '\u27c8', + 'bull;': '\u2022', + 'bullet;': '\u2022', + 'bump;': '\u224e', + 'bumpE;': '\u2aae', + 'bumpe;': '\u224f', + 'Bumpeq;': '\u224e', + 'bumpeq;': '\u224f', + 'Cacute;': '\u0106', + 'cacute;': '\u0107', + 'Cap;': '\u22d2', + 'cap;': '\u2229', + 'capand;': '\u2a44', + 'capbrcup;': '\u2a49', + 'capcap;': '\u2a4b', + 'capcup;': '\u2a47', + 'capdot;': '\u2a40', + 'CapitalDifferentialD;': '\u2145', + 'caps;': '\u2229\ufe00', + 'caret;': '\u2041', + 'caron;': '\u02c7', + 'Cayleys;': '\u212d', + 'ccaps;': '\u2a4d', + 'Ccaron;': '\u010c', + 'ccaron;': '\u010d', + 'Ccedil': '\xc7', + 'ccedil': '\xe7', + 'Ccedil;': '\xc7', + 'ccedil;': '\xe7', + 'Ccirc;': '\u0108', + 'ccirc;': '\u0109', + 'Cconint;': '\u2230', + 'ccups;': '\u2a4c', + 'ccupssm;': '\u2a50', + 'Cdot;': '\u010a', + 'cdot;': '\u010b', + 'cedil': '\xb8', + 'cedil;': '\xb8', + 'Cedilla;': '\xb8', + 'cemptyv;': '\u29b2', + 'cent': '\xa2', + 'cent;': '\xa2', + 'CenterDot;': '\xb7', + 'centerdot;': '\xb7', + 'Cfr;': '\u212d', + 'cfr;': '\U0001d520', + 'CHcy;': '\u0427', + 'chcy;': '\u0447', + 'check;': '\u2713', + 'checkmark;': '\u2713', + 'Chi;': '\u03a7', + 'chi;': '\u03c7', + 'cir;': '\u25cb', + 'circ;': '\u02c6', + 'circeq;': '\u2257', + 'circlearrowleft;': '\u21ba', + 'circlearrowright;': '\u21bb', + 'circledast;': '\u229b', + 'circledcirc;': '\u229a', + 'circleddash;': '\u229d', + 'CircleDot;': '\u2299', + 'circledR;': '\xae', + 'circledS;': '\u24c8', + 'CircleMinus;': '\u2296', + 'CirclePlus;': '\u2295', + 'CircleTimes;': '\u2297', + 'cirE;': '\u29c3', + 'cire;': '\u2257', + 'cirfnint;': '\u2a10', + 'cirmid;': '\u2aef', + 'cirscir;': '\u29c2', + 'ClockwiseContourIntegral;': '\u2232', + 'CloseCurlyDoubleQuote;': '\u201d', + 'CloseCurlyQuote;': '\u2019', + 'clubs;': '\u2663', + 'clubsuit;': '\u2663', + 'Colon;': '\u2237', + 'colon;': ':', + 'Colone;': '\u2a74', + 'colone;': '\u2254', + 'coloneq;': '\u2254', + 'comma;': ',', + 'commat;': '@', + 'comp;': '\u2201', + 'compfn;': '\u2218', + 'complement;': '\u2201', + 'complexes;': '\u2102', + 'cong;': '\u2245', + 'congdot;': '\u2a6d', + 'Congruent;': '\u2261', + 'Conint;': '\u222f', + 'conint;': '\u222e', + 'ContourIntegral;': '\u222e', + 'Copf;': '\u2102', + 'copf;': '\U0001d554', + 'coprod;': '\u2210', + 'Coproduct;': '\u2210', + 'COPY': '\xa9', + 'copy': '\xa9', + 'COPY;': '\xa9', + 'copy;': '\xa9', + 'copysr;': '\u2117', + 'CounterClockwiseContourIntegral;': '\u2233', + 'crarr;': '\u21b5', + 'Cross;': '\u2a2f', + 'cross;': '\u2717', + 'Cscr;': '\U0001d49e', + 'cscr;': '\U0001d4b8', + 'csub;': '\u2acf', + 'csube;': '\u2ad1', + 'csup;': '\u2ad0', + 'csupe;': '\u2ad2', + 'ctdot;': '\u22ef', + 'cudarrl;': '\u2938', + 'cudarrr;': '\u2935', + 'cuepr;': '\u22de', + 'cuesc;': '\u22df', + 'cularr;': '\u21b6', + 'cularrp;': '\u293d', + 'Cup;': '\u22d3', + 'cup;': '\u222a', + 'cupbrcap;': '\u2a48', + 'CupCap;': '\u224d', + 'cupcap;': '\u2a46', + 'cupcup;': '\u2a4a', + 'cupdot;': '\u228d', + 'cupor;': '\u2a45', + 'cups;': '\u222a\ufe00', + 'curarr;': '\u21b7', + 'curarrm;': '\u293c', + 'curlyeqprec;': '\u22de', + 'curlyeqsucc;': '\u22df', + 'curlyvee;': '\u22ce', + 'curlywedge;': '\u22cf', + 'curren': '\xa4', + 'curren;': '\xa4', + 'curvearrowleft;': '\u21b6', + 'curvearrowright;': '\u21b7', + 'cuvee;': '\u22ce', + 'cuwed;': '\u22cf', + 'cwconint;': '\u2232', + 'cwint;': '\u2231', + 'cylcty;': '\u232d', + 'Dagger;': '\u2021', + 'dagger;': '\u2020', + 'daleth;': '\u2138', + 'Darr;': '\u21a1', + 'dArr;': '\u21d3', + 'darr;': '\u2193', + 'dash;': '\u2010', + 'Dashv;': '\u2ae4', + 'dashv;': '\u22a3', + 'dbkarow;': '\u290f', + 'dblac;': '\u02dd', + 'Dcaron;': '\u010e', + 'dcaron;': '\u010f', + 'Dcy;': '\u0414', + 'dcy;': '\u0434', + 'DD;': '\u2145', + 'dd;': '\u2146', + 'ddagger;': '\u2021', + 'ddarr;': '\u21ca', + 'DDotrahd;': '\u2911', + 'ddotseq;': '\u2a77', + 'deg': '\xb0', + 'deg;': '\xb0', + 'Del;': '\u2207', + 'Delta;': '\u0394', + 'delta;': '\u03b4', + 'demptyv;': '\u29b1', + 'dfisht;': '\u297f', + 'Dfr;': '\U0001d507', + 'dfr;': '\U0001d521', + 'dHar;': '\u2965', + 'dharl;': '\u21c3', + 'dharr;': '\u21c2', + 'DiacriticalAcute;': '\xb4', + 'DiacriticalDot;': '\u02d9', + 'DiacriticalDoubleAcute;': '\u02dd', + 'DiacriticalGrave;': '`', + 'DiacriticalTilde;': '\u02dc', + 'diam;': '\u22c4', + 'Diamond;': '\u22c4', + 'diamond;': '\u22c4', + 'diamondsuit;': '\u2666', + 'diams;': '\u2666', + 'die;': '\xa8', + 'DifferentialD;': '\u2146', + 'digamma;': '\u03dd', + 'disin;': '\u22f2', + 'div;': '\xf7', + 'divide': '\xf7', + 'divide;': '\xf7', + 'divideontimes;': '\u22c7', + 'divonx;': '\u22c7', + 'DJcy;': '\u0402', + 'djcy;': '\u0452', + 'dlcorn;': '\u231e', + 'dlcrop;': '\u230d', + 'dollar;': '$', + 'Dopf;': '\U0001d53b', + 'dopf;': '\U0001d555', + 'Dot;': '\xa8', + 'dot;': '\u02d9', + 'DotDot;': '\u20dc', + 'doteq;': '\u2250', + 'doteqdot;': '\u2251', + 'DotEqual;': '\u2250', + 'dotminus;': '\u2238', + 'dotplus;': '\u2214', + 'dotsquare;': '\u22a1', + 'doublebarwedge;': '\u2306', + 'DoubleContourIntegral;': '\u222f', + 'DoubleDot;': '\xa8', + 'DoubleDownArrow;': '\u21d3', + 'DoubleLeftArrow;': '\u21d0', + 'DoubleLeftRightArrow;': '\u21d4', + 'DoubleLeftTee;': '\u2ae4', + 'DoubleLongLeftArrow;': '\u27f8', + 'DoubleLongLeftRightArrow;': '\u27fa', + 'DoubleLongRightArrow;': '\u27f9', + 'DoubleRightArrow;': '\u21d2', + 'DoubleRightTee;': '\u22a8', + 'DoubleUpArrow;': '\u21d1', + 'DoubleUpDownArrow;': '\u21d5', + 'DoubleVerticalBar;': '\u2225', + 'DownArrow;': '\u2193', + 'Downarrow;': '\u21d3', + 'downarrow;': '\u2193', + 'DownArrowBar;': '\u2913', + 'DownArrowUpArrow;': '\u21f5', + 'DownBreve;': '\u0311', + 'downdownarrows;': '\u21ca', + 'downharpoonleft;': '\u21c3', + 'downharpoonright;': '\u21c2', + 'DownLeftRightVector;': '\u2950', + 'DownLeftTeeVector;': '\u295e', + 'DownLeftVector;': '\u21bd', + 'DownLeftVectorBar;': '\u2956', + 'DownRightTeeVector;': '\u295f', + 'DownRightVector;': '\u21c1', + 'DownRightVectorBar;': '\u2957', + 'DownTee;': '\u22a4', + 'DownTeeArrow;': '\u21a7', + 'drbkarow;': '\u2910', + 'drcorn;': '\u231f', + 'drcrop;': '\u230c', + 'Dscr;': '\U0001d49f', + 'dscr;': '\U0001d4b9', + 'DScy;': '\u0405', + 'dscy;': '\u0455', + 'dsol;': '\u29f6', + 'Dstrok;': '\u0110', + 'dstrok;': '\u0111', + 'dtdot;': '\u22f1', + 'dtri;': '\u25bf', + 'dtrif;': '\u25be', + 'duarr;': '\u21f5', + 'duhar;': '\u296f', + 'dwangle;': '\u29a6', + 'DZcy;': '\u040f', + 'dzcy;': '\u045f', + 'dzigrarr;': '\u27ff', + 'Eacute': '\xc9', + 'eacute': '\xe9', + 'Eacute;': '\xc9', + 'eacute;': '\xe9', + 'easter;': '\u2a6e', + 'Ecaron;': '\u011a', + 'ecaron;': '\u011b', + 'ecir;': '\u2256', + 'Ecirc': '\xca', + 'ecirc': '\xea', + 'Ecirc;': '\xca', + 'ecirc;': '\xea', + 'ecolon;': '\u2255', + 'Ecy;': '\u042d', + 'ecy;': '\u044d', + 'eDDot;': '\u2a77', + 'Edot;': '\u0116', + 'eDot;': '\u2251', + 'edot;': '\u0117', + 'ee;': '\u2147', + 'efDot;': '\u2252', + 'Efr;': '\U0001d508', + 'efr;': '\U0001d522', + 'eg;': '\u2a9a', + 'Egrave': '\xc8', + 'egrave': '\xe8', + 'Egrave;': '\xc8', + 'egrave;': '\xe8', + 'egs;': '\u2a96', + 'egsdot;': '\u2a98', + 'el;': '\u2a99', + 'Element;': '\u2208', + 'elinters;': '\u23e7', + 'ell;': '\u2113', + 'els;': '\u2a95', + 'elsdot;': '\u2a97', + 'Emacr;': '\u0112', + 'emacr;': '\u0113', + 'empty;': '\u2205', + 'emptyset;': '\u2205', + 'EmptySmallSquare;': '\u25fb', + 'emptyv;': '\u2205', + 'EmptyVerySmallSquare;': '\u25ab', + 'emsp13;': '\u2004', + 'emsp14;': '\u2005', + 'emsp;': '\u2003', + 'ENG;': '\u014a', + 'eng;': '\u014b', + 'ensp;': '\u2002', + 'Eogon;': '\u0118', + 'eogon;': '\u0119', + 'Eopf;': '\U0001d53c', + 'eopf;': '\U0001d556', + 'epar;': '\u22d5', + 'eparsl;': '\u29e3', + 'eplus;': '\u2a71', + 'epsi;': '\u03b5', + 'Epsilon;': '\u0395', + 'epsilon;': '\u03b5', + 'epsiv;': '\u03f5', + 'eqcirc;': '\u2256', + 'eqcolon;': '\u2255', + 'eqsim;': '\u2242', + 'eqslantgtr;': '\u2a96', + 'eqslantless;': '\u2a95', + 'Equal;': '\u2a75', + 'equals;': '=', + 'EqualTilde;': '\u2242', + 'equest;': '\u225f', + 'Equilibrium;': '\u21cc', + 'equiv;': '\u2261', + 'equivDD;': '\u2a78', + 'eqvparsl;': '\u29e5', + 'erarr;': '\u2971', + 'erDot;': '\u2253', + 'Escr;': '\u2130', + 'escr;': '\u212f', + 'esdot;': '\u2250', + 'Esim;': '\u2a73', + 'esim;': '\u2242', + 'Eta;': '\u0397', + 'eta;': '\u03b7', + 'ETH': '\xd0', + 'eth': '\xf0', + 'ETH;': '\xd0', + 'eth;': '\xf0', + 'Euml': '\xcb', + 'euml': '\xeb', + 'Euml;': '\xcb', + 'euml;': '\xeb', + 'euro;': '\u20ac', + 'excl;': '!', + 'exist;': '\u2203', + 'Exists;': '\u2203', + 'expectation;': '\u2130', + 'ExponentialE;': '\u2147', + 'exponentiale;': '\u2147', + 'fallingdotseq;': '\u2252', + 'Fcy;': '\u0424', + 'fcy;': '\u0444', + 'female;': '\u2640', + 'ffilig;': '\ufb03', + 'fflig;': '\ufb00', + 'ffllig;': '\ufb04', + 'Ffr;': '\U0001d509', + 'ffr;': '\U0001d523', + 'filig;': '\ufb01', + 'FilledSmallSquare;': '\u25fc', + 'FilledVerySmallSquare;': '\u25aa', + 'fjlig;': 'fj', + 'flat;': '\u266d', + 'fllig;': '\ufb02', + 'fltns;': '\u25b1', + 'fnof;': '\u0192', + 'Fopf;': '\U0001d53d', + 'fopf;': '\U0001d557', + 'ForAll;': '\u2200', + 'forall;': '\u2200', + 'fork;': '\u22d4', + 'forkv;': '\u2ad9', + 'Fouriertrf;': '\u2131', + 'fpartint;': '\u2a0d', + 'frac12': '\xbd', + 'frac12;': '\xbd', + 'frac13;': '\u2153', + 'frac14': '\xbc', + 'frac14;': '\xbc', + 'frac15;': '\u2155', + 'frac16;': '\u2159', + 'frac18;': '\u215b', + 'frac23;': '\u2154', + 'frac25;': '\u2156', + 'frac34': '\xbe', + 'frac34;': '\xbe', + 'frac35;': '\u2157', + 'frac38;': '\u215c', + 'frac45;': '\u2158', + 'frac56;': '\u215a', + 'frac58;': '\u215d', + 'frac78;': '\u215e', + 'frasl;': '\u2044', + 'frown;': '\u2322', + 'Fscr;': '\u2131', + 'fscr;': '\U0001d4bb', + 'gacute;': '\u01f5', + 'Gamma;': '\u0393', + 'gamma;': '\u03b3', + 'Gammad;': '\u03dc', + 'gammad;': '\u03dd', + 'gap;': '\u2a86', + 'Gbreve;': '\u011e', + 'gbreve;': '\u011f', + 'Gcedil;': '\u0122', + 'Gcirc;': '\u011c', + 'gcirc;': '\u011d', + 'Gcy;': '\u0413', + 'gcy;': '\u0433', + 'Gdot;': '\u0120', + 'gdot;': '\u0121', + 'gE;': '\u2267', + 'ge;': '\u2265', + 'gEl;': '\u2a8c', + 'gel;': '\u22db', + 'geq;': '\u2265', + 'geqq;': '\u2267', + 'geqslant;': '\u2a7e', + 'ges;': '\u2a7e', + 'gescc;': '\u2aa9', + 'gesdot;': '\u2a80', + 'gesdoto;': '\u2a82', + 'gesdotol;': '\u2a84', + 'gesl;': '\u22db\ufe00', + 'gesles;': '\u2a94', + 'Gfr;': '\U0001d50a', + 'gfr;': '\U0001d524', + 'Gg;': '\u22d9', + 'gg;': '\u226b', + 'ggg;': '\u22d9', + 'gimel;': '\u2137', + 'GJcy;': '\u0403', + 'gjcy;': '\u0453', + 'gl;': '\u2277', + 'gla;': '\u2aa5', + 'glE;': '\u2a92', + 'glj;': '\u2aa4', + 'gnap;': '\u2a8a', + 'gnapprox;': '\u2a8a', + 'gnE;': '\u2269', + 'gne;': '\u2a88', + 'gneq;': '\u2a88', + 'gneqq;': '\u2269', + 'gnsim;': '\u22e7', + 'Gopf;': '\U0001d53e', + 'gopf;': '\U0001d558', + 'grave;': '`', + 'GreaterEqual;': '\u2265', + 'GreaterEqualLess;': '\u22db', + 'GreaterFullEqual;': '\u2267', + 'GreaterGreater;': '\u2aa2', + 'GreaterLess;': '\u2277', + 'GreaterSlantEqual;': '\u2a7e', + 'GreaterTilde;': '\u2273', + 'Gscr;': '\U0001d4a2', + 'gscr;': '\u210a', + 'gsim;': '\u2273', + 'gsime;': '\u2a8e', + 'gsiml;': '\u2a90', + 'GT': '>', + 'gt': '>', + 'GT;': '>', + 'Gt;': '\u226b', + 'gt;': '>', + 'gtcc;': '\u2aa7', + 'gtcir;': '\u2a7a', + 'gtdot;': '\u22d7', + 'gtlPar;': '\u2995', + 'gtquest;': '\u2a7c', + 'gtrapprox;': '\u2a86', + 'gtrarr;': '\u2978', + 'gtrdot;': '\u22d7', + 'gtreqless;': '\u22db', + 'gtreqqless;': '\u2a8c', + 'gtrless;': '\u2277', + 'gtrsim;': '\u2273', + 'gvertneqq;': '\u2269\ufe00', + 'gvnE;': '\u2269\ufe00', + 'Hacek;': '\u02c7', + 'hairsp;': '\u200a', + 'half;': '\xbd', + 'hamilt;': '\u210b', + 'HARDcy;': '\u042a', + 'hardcy;': '\u044a', + 'hArr;': '\u21d4', + 'harr;': '\u2194', + 'harrcir;': '\u2948', + 'harrw;': '\u21ad', + 'Hat;': '^', + 'hbar;': '\u210f', + 'Hcirc;': '\u0124', + 'hcirc;': '\u0125', + 'hearts;': '\u2665', + 'heartsuit;': '\u2665', + 'hellip;': '\u2026', + 'hercon;': '\u22b9', + 'Hfr;': '\u210c', + 'hfr;': '\U0001d525', + 'HilbertSpace;': '\u210b', + 'hksearow;': '\u2925', + 'hkswarow;': '\u2926', + 'hoarr;': '\u21ff', + 'homtht;': '\u223b', + 'hookleftarrow;': '\u21a9', + 'hookrightarrow;': '\u21aa', + 'Hopf;': '\u210d', + 'hopf;': '\U0001d559', + 'horbar;': '\u2015', + 'HorizontalLine;': '\u2500', + 'Hscr;': '\u210b', + 'hscr;': '\U0001d4bd', + 'hslash;': '\u210f', + 'Hstrok;': '\u0126', + 'hstrok;': '\u0127', + 'HumpDownHump;': '\u224e', + 'HumpEqual;': '\u224f', + 'hybull;': '\u2043', + 'hyphen;': '\u2010', + 'Iacute': '\xcd', + 'iacute': '\xed', + 'Iacute;': '\xcd', + 'iacute;': '\xed', + 'ic;': '\u2063', + 'Icirc': '\xce', + 'icirc': '\xee', + 'Icirc;': '\xce', + 'icirc;': '\xee', + 'Icy;': '\u0418', + 'icy;': '\u0438', + 'Idot;': '\u0130', + 'IEcy;': '\u0415', + 'iecy;': '\u0435', + 'iexcl': '\xa1', + 'iexcl;': '\xa1', + 'iff;': '\u21d4', + 'Ifr;': '\u2111', + 'ifr;': '\U0001d526', + 'Igrave': '\xcc', + 'igrave': '\xec', + 'Igrave;': '\xcc', + 'igrave;': '\xec', + 'ii;': '\u2148', + 'iiiint;': '\u2a0c', + 'iiint;': '\u222d', + 'iinfin;': '\u29dc', + 'iiota;': '\u2129', + 'IJlig;': '\u0132', + 'ijlig;': '\u0133', + 'Im;': '\u2111', + 'Imacr;': '\u012a', + 'imacr;': '\u012b', + 'image;': '\u2111', + 'ImaginaryI;': '\u2148', + 'imagline;': '\u2110', + 'imagpart;': '\u2111', + 'imath;': '\u0131', + 'imof;': '\u22b7', + 'imped;': '\u01b5', + 'Implies;': '\u21d2', + 'in;': '\u2208', + 'incare;': '\u2105', + 'infin;': '\u221e', + 'infintie;': '\u29dd', + 'inodot;': '\u0131', + 'Int;': '\u222c', + 'int;': '\u222b', + 'intcal;': '\u22ba', + 'integers;': '\u2124', + 'Integral;': '\u222b', + 'intercal;': '\u22ba', + 'Intersection;': '\u22c2', + 'intlarhk;': '\u2a17', + 'intprod;': '\u2a3c', + 'InvisibleComma;': '\u2063', + 'InvisibleTimes;': '\u2062', + 'IOcy;': '\u0401', + 'iocy;': '\u0451', + 'Iogon;': '\u012e', + 'iogon;': '\u012f', + 'Iopf;': '\U0001d540', + 'iopf;': '\U0001d55a', + 'Iota;': '\u0399', + 'iota;': '\u03b9', + 'iprod;': '\u2a3c', + 'iquest': '\xbf', + 'iquest;': '\xbf', + 'Iscr;': '\u2110', + 'iscr;': '\U0001d4be', + 'isin;': '\u2208', + 'isindot;': '\u22f5', + 'isinE;': '\u22f9', + 'isins;': '\u22f4', + 'isinsv;': '\u22f3', + 'isinv;': '\u2208', + 'it;': '\u2062', + 'Itilde;': '\u0128', + 'itilde;': '\u0129', + 'Iukcy;': '\u0406', + 'iukcy;': '\u0456', + 'Iuml': '\xcf', + 'iuml': '\xef', + 'Iuml;': '\xcf', + 'iuml;': '\xef', + 'Jcirc;': '\u0134', + 'jcirc;': '\u0135', + 'Jcy;': '\u0419', + 'jcy;': '\u0439', + 'Jfr;': '\U0001d50d', + 'jfr;': '\U0001d527', + 'jmath;': '\u0237', + 'Jopf;': '\U0001d541', + 'jopf;': '\U0001d55b', + 'Jscr;': '\U0001d4a5', + 'jscr;': '\U0001d4bf', + 'Jsercy;': '\u0408', + 'jsercy;': '\u0458', + 'Jukcy;': '\u0404', + 'jukcy;': '\u0454', + 'Kappa;': '\u039a', + 'kappa;': '\u03ba', + 'kappav;': '\u03f0', + 'Kcedil;': '\u0136', + 'kcedil;': '\u0137', + 'Kcy;': '\u041a', + 'kcy;': '\u043a', + 'Kfr;': '\U0001d50e', + 'kfr;': '\U0001d528', + 'kgreen;': '\u0138', + 'KHcy;': '\u0425', + 'khcy;': '\u0445', + 'KJcy;': '\u040c', + 'kjcy;': '\u045c', + 'Kopf;': '\U0001d542', + 'kopf;': '\U0001d55c', + 'Kscr;': '\U0001d4a6', + 'kscr;': '\U0001d4c0', + 'lAarr;': '\u21da', + 'Lacute;': '\u0139', + 'lacute;': '\u013a', + 'laemptyv;': '\u29b4', + 'lagran;': '\u2112', + 'Lambda;': '\u039b', + 'lambda;': '\u03bb', + 'Lang;': '\u27ea', + 'lang;': '\u27e8', + 'langd;': '\u2991', + 'langle;': '\u27e8', + 'lap;': '\u2a85', + 'Laplacetrf;': '\u2112', + 'laquo': '\xab', + 'laquo;': '\xab', + 'Larr;': '\u219e', + 'lArr;': '\u21d0', + 'larr;': '\u2190', + 'larrb;': '\u21e4', + 'larrbfs;': '\u291f', + 'larrfs;': '\u291d', + 'larrhk;': '\u21a9', + 'larrlp;': '\u21ab', + 'larrpl;': '\u2939', + 'larrsim;': '\u2973', + 'larrtl;': '\u21a2', + 'lat;': '\u2aab', + 'lAtail;': '\u291b', + 'latail;': '\u2919', + 'late;': '\u2aad', + 'lates;': '\u2aad\ufe00', + 'lBarr;': '\u290e', + 'lbarr;': '\u290c', + 'lbbrk;': '\u2772', + 'lbrace;': '{', + 'lbrack;': '[', + 'lbrke;': '\u298b', + 'lbrksld;': '\u298f', + 'lbrkslu;': '\u298d', + 'Lcaron;': '\u013d', + 'lcaron;': '\u013e', + 'Lcedil;': '\u013b', + 'lcedil;': '\u013c', + 'lceil;': '\u2308', + 'lcub;': '{', + 'Lcy;': '\u041b', + 'lcy;': '\u043b', + 'ldca;': '\u2936', + 'ldquo;': '\u201c', + 'ldquor;': '\u201e', + 'ldrdhar;': '\u2967', + 'ldrushar;': '\u294b', + 'ldsh;': '\u21b2', + 'lE;': '\u2266', + 'le;': '\u2264', + 'LeftAngleBracket;': '\u27e8', + 'LeftArrow;': '\u2190', + 'Leftarrow;': '\u21d0', + 'leftarrow;': '\u2190', + 'LeftArrowBar;': '\u21e4', + 'LeftArrowRightArrow;': '\u21c6', + 'leftarrowtail;': '\u21a2', + 'LeftCeiling;': '\u2308', + 'LeftDoubleBracket;': '\u27e6', + 'LeftDownTeeVector;': '\u2961', + 'LeftDownVector;': '\u21c3', + 'LeftDownVectorBar;': '\u2959', + 'LeftFloor;': '\u230a', + 'leftharpoondown;': '\u21bd', + 'leftharpoonup;': '\u21bc', + 'leftleftarrows;': '\u21c7', + 'LeftRightArrow;': '\u2194', + 'Leftrightarrow;': '\u21d4', + 'leftrightarrow;': '\u2194', + 'leftrightarrows;': '\u21c6', + 'leftrightharpoons;': '\u21cb', + 'leftrightsquigarrow;': '\u21ad', + 'LeftRightVector;': '\u294e', + 'LeftTee;': '\u22a3', + 'LeftTeeArrow;': '\u21a4', + 'LeftTeeVector;': '\u295a', + 'leftthreetimes;': '\u22cb', + 'LeftTriangle;': '\u22b2', + 'LeftTriangleBar;': '\u29cf', + 'LeftTriangleEqual;': '\u22b4', + 'LeftUpDownVector;': '\u2951', + 'LeftUpTeeVector;': '\u2960', + 'LeftUpVector;': '\u21bf', + 'LeftUpVectorBar;': '\u2958', + 'LeftVector;': '\u21bc', + 'LeftVectorBar;': '\u2952', + 'lEg;': '\u2a8b', + 'leg;': '\u22da', + 'leq;': '\u2264', + 'leqq;': '\u2266', + 'leqslant;': '\u2a7d', + 'les;': '\u2a7d', + 'lescc;': '\u2aa8', + 'lesdot;': '\u2a7f', + 'lesdoto;': '\u2a81', + 'lesdotor;': '\u2a83', + 'lesg;': '\u22da\ufe00', + 'lesges;': '\u2a93', + 'lessapprox;': '\u2a85', + 'lessdot;': '\u22d6', + 'lesseqgtr;': '\u22da', + 'lesseqqgtr;': '\u2a8b', + 'LessEqualGreater;': '\u22da', + 'LessFullEqual;': '\u2266', + 'LessGreater;': '\u2276', + 'lessgtr;': '\u2276', + 'LessLess;': '\u2aa1', + 'lesssim;': '\u2272', + 'LessSlantEqual;': '\u2a7d', + 'LessTilde;': '\u2272', + 'lfisht;': '\u297c', + 'lfloor;': '\u230a', + 'Lfr;': '\U0001d50f', + 'lfr;': '\U0001d529', + 'lg;': '\u2276', + 'lgE;': '\u2a91', + 'lHar;': '\u2962', + 'lhard;': '\u21bd', + 'lharu;': '\u21bc', + 'lharul;': '\u296a', + 'lhblk;': '\u2584', + 'LJcy;': '\u0409', + 'ljcy;': '\u0459', + 'Ll;': '\u22d8', + 'll;': '\u226a', + 'llarr;': '\u21c7', + 'llcorner;': '\u231e', + 'Lleftarrow;': '\u21da', + 'llhard;': '\u296b', + 'lltri;': '\u25fa', + 'Lmidot;': '\u013f', + 'lmidot;': '\u0140', + 'lmoust;': '\u23b0', + 'lmoustache;': '\u23b0', + 'lnap;': '\u2a89', + 'lnapprox;': '\u2a89', + 'lnE;': '\u2268', + 'lne;': '\u2a87', + 'lneq;': '\u2a87', + 'lneqq;': '\u2268', + 'lnsim;': '\u22e6', + 'loang;': '\u27ec', + 'loarr;': '\u21fd', + 'lobrk;': '\u27e6', + 'LongLeftArrow;': '\u27f5', + 'Longleftarrow;': '\u27f8', + 'longleftarrow;': '\u27f5', + 'LongLeftRightArrow;': '\u27f7', + 'Longleftrightarrow;': '\u27fa', + 'longleftrightarrow;': '\u27f7', + 'longmapsto;': '\u27fc', + 'LongRightArrow;': '\u27f6', + 'Longrightarrow;': '\u27f9', + 'longrightarrow;': '\u27f6', + 'looparrowleft;': '\u21ab', + 'looparrowright;': '\u21ac', + 'lopar;': '\u2985', + 'Lopf;': '\U0001d543', + 'lopf;': '\U0001d55d', + 'loplus;': '\u2a2d', + 'lotimes;': '\u2a34', + 'lowast;': '\u2217', + 'lowbar;': '_', + 'LowerLeftArrow;': '\u2199', + 'LowerRightArrow;': '\u2198', + 'loz;': '\u25ca', + 'lozenge;': '\u25ca', + 'lozf;': '\u29eb', + 'lpar;': '(', + 'lparlt;': '\u2993', + 'lrarr;': '\u21c6', + 'lrcorner;': '\u231f', + 'lrhar;': '\u21cb', + 'lrhard;': '\u296d', + 'lrm;': '\u200e', + 'lrtri;': '\u22bf', + 'lsaquo;': '\u2039', + 'Lscr;': '\u2112', + 'lscr;': '\U0001d4c1', + 'Lsh;': '\u21b0', + 'lsh;': '\u21b0', + 'lsim;': '\u2272', + 'lsime;': '\u2a8d', + 'lsimg;': '\u2a8f', + 'lsqb;': '[', + 'lsquo;': '\u2018', + 'lsquor;': '\u201a', + 'Lstrok;': '\u0141', + 'lstrok;': '\u0142', + 'LT': '<', + 'lt': '<', + 'LT;': '<', + 'Lt;': '\u226a', + 'lt;': '<', + 'ltcc;': '\u2aa6', + 'ltcir;': '\u2a79', + 'ltdot;': '\u22d6', + 'lthree;': '\u22cb', + 'ltimes;': '\u22c9', + 'ltlarr;': '\u2976', + 'ltquest;': '\u2a7b', + 'ltri;': '\u25c3', + 'ltrie;': '\u22b4', + 'ltrif;': '\u25c2', + 'ltrPar;': '\u2996', + 'lurdshar;': '\u294a', + 'luruhar;': '\u2966', + 'lvertneqq;': '\u2268\ufe00', + 'lvnE;': '\u2268\ufe00', + 'macr': '\xaf', + 'macr;': '\xaf', + 'male;': '\u2642', + 'malt;': '\u2720', + 'maltese;': '\u2720', + 'Map;': '\u2905', + 'map;': '\u21a6', + 'mapsto;': '\u21a6', + 'mapstodown;': '\u21a7', + 'mapstoleft;': '\u21a4', + 'mapstoup;': '\u21a5', + 'marker;': '\u25ae', + 'mcomma;': '\u2a29', + 'Mcy;': '\u041c', + 'mcy;': '\u043c', + 'mdash;': '\u2014', + 'mDDot;': '\u223a', + 'measuredangle;': '\u2221', + 'MediumSpace;': '\u205f', + 'Mellintrf;': '\u2133', + 'Mfr;': '\U0001d510', + 'mfr;': '\U0001d52a', + 'mho;': '\u2127', + 'micro': '\xb5', + 'micro;': '\xb5', + 'mid;': '\u2223', + 'midast;': '*', + 'midcir;': '\u2af0', + 'middot': '\xb7', + 'middot;': '\xb7', + 'minus;': '\u2212', + 'minusb;': '\u229f', + 'minusd;': '\u2238', + 'minusdu;': '\u2a2a', + 'MinusPlus;': '\u2213', + 'mlcp;': '\u2adb', + 'mldr;': '\u2026', + 'mnplus;': '\u2213', + 'models;': '\u22a7', + 'Mopf;': '\U0001d544', + 'mopf;': '\U0001d55e', + 'mp;': '\u2213', + 'Mscr;': '\u2133', + 'mscr;': '\U0001d4c2', + 'mstpos;': '\u223e', + 'Mu;': '\u039c', + 'mu;': '\u03bc', + 'multimap;': '\u22b8', + 'mumap;': '\u22b8', + 'nabla;': '\u2207', + 'Nacute;': '\u0143', + 'nacute;': '\u0144', + 'nang;': '\u2220\u20d2', + 'nap;': '\u2249', + 'napE;': '\u2a70\u0338', + 'napid;': '\u224b\u0338', + 'napos;': '\u0149', + 'napprox;': '\u2249', + 'natur;': '\u266e', + 'natural;': '\u266e', + 'naturals;': '\u2115', + 'nbsp': '\xa0', + 'nbsp;': '\xa0', + 'nbump;': '\u224e\u0338', + 'nbumpe;': '\u224f\u0338', + 'ncap;': '\u2a43', + 'Ncaron;': '\u0147', + 'ncaron;': '\u0148', + 'Ncedil;': '\u0145', + 'ncedil;': '\u0146', + 'ncong;': '\u2247', + 'ncongdot;': '\u2a6d\u0338', + 'ncup;': '\u2a42', + 'Ncy;': '\u041d', + 'ncy;': '\u043d', + 'ndash;': '\u2013', + 'ne;': '\u2260', + 'nearhk;': '\u2924', + 'neArr;': '\u21d7', + 'nearr;': '\u2197', + 'nearrow;': '\u2197', + 'nedot;': '\u2250\u0338', + 'NegativeMediumSpace;': '\u200b', + 'NegativeThickSpace;': '\u200b', + 'NegativeThinSpace;': '\u200b', + 'NegativeVeryThinSpace;': '\u200b', + 'nequiv;': '\u2262', + 'nesear;': '\u2928', + 'nesim;': '\u2242\u0338', + 'NestedGreaterGreater;': '\u226b', + 'NestedLessLess;': '\u226a', + 'NewLine;': '\n', + 'nexist;': '\u2204', + 'nexists;': '\u2204', + 'Nfr;': '\U0001d511', + 'nfr;': '\U0001d52b', + 'ngE;': '\u2267\u0338', + 'nge;': '\u2271', + 'ngeq;': '\u2271', + 'ngeqq;': '\u2267\u0338', + 'ngeqslant;': '\u2a7e\u0338', + 'nges;': '\u2a7e\u0338', + 'nGg;': '\u22d9\u0338', + 'ngsim;': '\u2275', + 'nGt;': '\u226b\u20d2', + 'ngt;': '\u226f', + 'ngtr;': '\u226f', + 'nGtv;': '\u226b\u0338', + 'nhArr;': '\u21ce', + 'nharr;': '\u21ae', + 'nhpar;': '\u2af2', + 'ni;': '\u220b', + 'nis;': '\u22fc', + 'nisd;': '\u22fa', + 'niv;': '\u220b', + 'NJcy;': '\u040a', + 'njcy;': '\u045a', + 'nlArr;': '\u21cd', + 'nlarr;': '\u219a', + 'nldr;': '\u2025', + 'nlE;': '\u2266\u0338', + 'nle;': '\u2270', + 'nLeftarrow;': '\u21cd', + 'nleftarrow;': '\u219a', + 'nLeftrightarrow;': '\u21ce', + 'nleftrightarrow;': '\u21ae', + 'nleq;': '\u2270', + 'nleqq;': '\u2266\u0338', + 'nleqslant;': '\u2a7d\u0338', + 'nles;': '\u2a7d\u0338', + 'nless;': '\u226e', + 'nLl;': '\u22d8\u0338', + 'nlsim;': '\u2274', + 'nLt;': '\u226a\u20d2', + 'nlt;': '\u226e', + 'nltri;': '\u22ea', + 'nltrie;': '\u22ec', + 'nLtv;': '\u226a\u0338', + 'nmid;': '\u2224', + 'NoBreak;': '\u2060', + 'NonBreakingSpace;': '\xa0', + 'Nopf;': '\u2115', + 'nopf;': '\U0001d55f', + 'not': '\xac', + 'Not;': '\u2aec', + 'not;': '\xac', + 'NotCongruent;': '\u2262', + 'NotCupCap;': '\u226d', + 'NotDoubleVerticalBar;': '\u2226', + 'NotElement;': '\u2209', + 'NotEqual;': '\u2260', + 'NotEqualTilde;': '\u2242\u0338', + 'NotExists;': '\u2204', + 'NotGreater;': '\u226f', + 'NotGreaterEqual;': '\u2271', + 'NotGreaterFullEqual;': '\u2267\u0338', + 'NotGreaterGreater;': '\u226b\u0338', + 'NotGreaterLess;': '\u2279', + 'NotGreaterSlantEqual;': '\u2a7e\u0338', + 'NotGreaterTilde;': '\u2275', + 'NotHumpDownHump;': '\u224e\u0338', + 'NotHumpEqual;': '\u224f\u0338', + 'notin;': '\u2209', + 'notindot;': '\u22f5\u0338', + 'notinE;': '\u22f9\u0338', + 'notinva;': '\u2209', + 'notinvb;': '\u22f7', + 'notinvc;': '\u22f6', + 'NotLeftTriangle;': '\u22ea', + 'NotLeftTriangleBar;': '\u29cf\u0338', + 'NotLeftTriangleEqual;': '\u22ec', + 'NotLess;': '\u226e', + 'NotLessEqual;': '\u2270', + 'NotLessGreater;': '\u2278', + 'NotLessLess;': '\u226a\u0338', + 'NotLessSlantEqual;': '\u2a7d\u0338', + 'NotLessTilde;': '\u2274', + 'NotNestedGreaterGreater;': '\u2aa2\u0338', + 'NotNestedLessLess;': '\u2aa1\u0338', + 'notni;': '\u220c', + 'notniva;': '\u220c', + 'notnivb;': '\u22fe', + 'notnivc;': '\u22fd', + 'NotPrecedes;': '\u2280', + 'NotPrecedesEqual;': '\u2aaf\u0338', + 'NotPrecedesSlantEqual;': '\u22e0', + 'NotReverseElement;': '\u220c', + 'NotRightTriangle;': '\u22eb', + 'NotRightTriangleBar;': '\u29d0\u0338', + 'NotRightTriangleEqual;': '\u22ed', + 'NotSquareSubset;': '\u228f\u0338', + 'NotSquareSubsetEqual;': '\u22e2', + 'NotSquareSuperset;': '\u2290\u0338', + 'NotSquareSupersetEqual;': '\u22e3', + 'NotSubset;': '\u2282\u20d2', + 'NotSubsetEqual;': '\u2288', + 'NotSucceeds;': '\u2281', + 'NotSucceedsEqual;': '\u2ab0\u0338', + 'NotSucceedsSlantEqual;': '\u22e1', + 'NotSucceedsTilde;': '\u227f\u0338', + 'NotSuperset;': '\u2283\u20d2', + 'NotSupersetEqual;': '\u2289', + 'NotTilde;': '\u2241', + 'NotTildeEqual;': '\u2244', + 'NotTildeFullEqual;': '\u2247', + 'NotTildeTilde;': '\u2249', + 'NotVerticalBar;': '\u2224', + 'npar;': '\u2226', + 'nparallel;': '\u2226', + 'nparsl;': '\u2afd\u20e5', + 'npart;': '\u2202\u0338', + 'npolint;': '\u2a14', + 'npr;': '\u2280', + 'nprcue;': '\u22e0', + 'npre;': '\u2aaf\u0338', + 'nprec;': '\u2280', + 'npreceq;': '\u2aaf\u0338', + 'nrArr;': '\u21cf', + 'nrarr;': '\u219b', + 'nrarrc;': '\u2933\u0338', + 'nrarrw;': '\u219d\u0338', + 'nRightarrow;': '\u21cf', + 'nrightarrow;': '\u219b', + 'nrtri;': '\u22eb', + 'nrtrie;': '\u22ed', + 'nsc;': '\u2281', + 'nsccue;': '\u22e1', + 'nsce;': '\u2ab0\u0338', + 'Nscr;': '\U0001d4a9', + 'nscr;': '\U0001d4c3', + 'nshortmid;': '\u2224', + 'nshortparallel;': '\u2226', + 'nsim;': '\u2241', + 'nsime;': '\u2244', + 'nsimeq;': '\u2244', + 'nsmid;': '\u2224', + 'nspar;': '\u2226', + 'nsqsube;': '\u22e2', + 'nsqsupe;': '\u22e3', + 'nsub;': '\u2284', + 'nsubE;': '\u2ac5\u0338', + 'nsube;': '\u2288', + 'nsubset;': '\u2282\u20d2', + 'nsubseteq;': '\u2288', + 'nsubseteqq;': '\u2ac5\u0338', + 'nsucc;': '\u2281', + 'nsucceq;': '\u2ab0\u0338', + 'nsup;': '\u2285', + 'nsupE;': '\u2ac6\u0338', + 'nsupe;': '\u2289', + 'nsupset;': '\u2283\u20d2', + 'nsupseteq;': '\u2289', + 'nsupseteqq;': '\u2ac6\u0338', + 'ntgl;': '\u2279', + 'Ntilde': '\xd1', + 'ntilde': '\xf1', + 'Ntilde;': '\xd1', + 'ntilde;': '\xf1', + 'ntlg;': '\u2278', + 'ntriangleleft;': '\u22ea', + 'ntrianglelefteq;': '\u22ec', + 'ntriangleright;': '\u22eb', + 'ntrianglerighteq;': '\u22ed', + 'Nu;': '\u039d', + 'nu;': '\u03bd', + 'num;': '#', + 'numero;': '\u2116', + 'numsp;': '\u2007', + 'nvap;': '\u224d\u20d2', + 'nVDash;': '\u22af', + 'nVdash;': '\u22ae', + 'nvDash;': '\u22ad', + 'nvdash;': '\u22ac', + 'nvge;': '\u2265\u20d2', + 'nvgt;': '>\u20d2', + 'nvHarr;': '\u2904', + 'nvinfin;': '\u29de', + 'nvlArr;': '\u2902', + 'nvle;': '\u2264\u20d2', + 'nvlt;': '<\u20d2', + 'nvltrie;': '\u22b4\u20d2', + 'nvrArr;': '\u2903', + 'nvrtrie;': '\u22b5\u20d2', + 'nvsim;': '\u223c\u20d2', + 'nwarhk;': '\u2923', + 'nwArr;': '\u21d6', + 'nwarr;': '\u2196', + 'nwarrow;': '\u2196', + 'nwnear;': '\u2927', + 'Oacute': '\xd3', + 'oacute': '\xf3', + 'Oacute;': '\xd3', + 'oacute;': '\xf3', + 'oast;': '\u229b', + 'ocir;': '\u229a', + 'Ocirc': '\xd4', + 'ocirc': '\xf4', + 'Ocirc;': '\xd4', + 'ocirc;': '\xf4', + 'Ocy;': '\u041e', + 'ocy;': '\u043e', + 'odash;': '\u229d', + 'Odblac;': '\u0150', + 'odblac;': '\u0151', + 'odiv;': '\u2a38', + 'odot;': '\u2299', + 'odsold;': '\u29bc', + 'OElig;': '\u0152', + 'oelig;': '\u0153', + 'ofcir;': '\u29bf', + 'Ofr;': '\U0001d512', + 'ofr;': '\U0001d52c', + 'ogon;': '\u02db', + 'Ograve': '\xd2', + 'ograve': '\xf2', + 'Ograve;': '\xd2', + 'ograve;': '\xf2', + 'ogt;': '\u29c1', + 'ohbar;': '\u29b5', + 'ohm;': '\u03a9', + 'oint;': '\u222e', + 'olarr;': '\u21ba', + 'olcir;': '\u29be', + 'olcross;': '\u29bb', + 'oline;': '\u203e', + 'olt;': '\u29c0', + 'Omacr;': '\u014c', + 'omacr;': '\u014d', + 'Omega;': '\u03a9', + 'omega;': '\u03c9', + 'Omicron;': '\u039f', + 'omicron;': '\u03bf', + 'omid;': '\u29b6', + 'ominus;': '\u2296', + 'Oopf;': '\U0001d546', + 'oopf;': '\U0001d560', + 'opar;': '\u29b7', + 'OpenCurlyDoubleQuote;': '\u201c', + 'OpenCurlyQuote;': '\u2018', + 'operp;': '\u29b9', + 'oplus;': '\u2295', + 'Or;': '\u2a54', + 'or;': '\u2228', + 'orarr;': '\u21bb', + 'ord;': '\u2a5d', + 'order;': '\u2134', + 'orderof;': '\u2134', + 'ordf': '\xaa', + 'ordf;': '\xaa', + 'ordm': '\xba', + 'ordm;': '\xba', + 'origof;': '\u22b6', + 'oror;': '\u2a56', + 'orslope;': '\u2a57', + 'orv;': '\u2a5b', + 'oS;': '\u24c8', + 'Oscr;': '\U0001d4aa', + 'oscr;': '\u2134', + 'Oslash': '\xd8', + 'oslash': '\xf8', + 'Oslash;': '\xd8', + 'oslash;': '\xf8', + 'osol;': '\u2298', + 'Otilde': '\xd5', + 'otilde': '\xf5', + 'Otilde;': '\xd5', + 'otilde;': '\xf5', + 'Otimes;': '\u2a37', + 'otimes;': '\u2297', + 'otimesas;': '\u2a36', + 'Ouml': '\xd6', + 'ouml': '\xf6', + 'Ouml;': '\xd6', + 'ouml;': '\xf6', + 'ovbar;': '\u233d', + 'OverBar;': '\u203e', + 'OverBrace;': '\u23de', + 'OverBracket;': '\u23b4', + 'OverParenthesis;': '\u23dc', + 'par;': '\u2225', + 'para': '\xb6', + 'para;': '\xb6', + 'parallel;': '\u2225', + 'parsim;': '\u2af3', + 'parsl;': '\u2afd', + 'part;': '\u2202', + 'PartialD;': '\u2202', + 'Pcy;': '\u041f', + 'pcy;': '\u043f', + 'percnt;': '%', + 'period;': '.', + 'permil;': '\u2030', + 'perp;': '\u22a5', + 'pertenk;': '\u2031', + 'Pfr;': '\U0001d513', + 'pfr;': '\U0001d52d', + 'Phi;': '\u03a6', + 'phi;': '\u03c6', + 'phiv;': '\u03d5', + 'phmmat;': '\u2133', + 'phone;': '\u260e', + 'Pi;': '\u03a0', + 'pi;': '\u03c0', + 'pitchfork;': '\u22d4', + 'piv;': '\u03d6', + 'planck;': '\u210f', + 'planckh;': '\u210e', + 'plankv;': '\u210f', + 'plus;': '+', + 'plusacir;': '\u2a23', + 'plusb;': '\u229e', + 'pluscir;': '\u2a22', + 'plusdo;': '\u2214', + 'plusdu;': '\u2a25', + 'pluse;': '\u2a72', + 'PlusMinus;': '\xb1', + 'plusmn': '\xb1', + 'plusmn;': '\xb1', + 'plussim;': '\u2a26', + 'plustwo;': '\u2a27', + 'pm;': '\xb1', + 'Poincareplane;': '\u210c', + 'pointint;': '\u2a15', + 'Popf;': '\u2119', + 'popf;': '\U0001d561', + 'pound': '\xa3', + 'pound;': '\xa3', + 'Pr;': '\u2abb', + 'pr;': '\u227a', + 'prap;': '\u2ab7', + 'prcue;': '\u227c', + 'prE;': '\u2ab3', + 'pre;': '\u2aaf', + 'prec;': '\u227a', + 'precapprox;': '\u2ab7', + 'preccurlyeq;': '\u227c', + 'Precedes;': '\u227a', + 'PrecedesEqual;': '\u2aaf', + 'PrecedesSlantEqual;': '\u227c', + 'PrecedesTilde;': '\u227e', + 'preceq;': '\u2aaf', + 'precnapprox;': '\u2ab9', + 'precneqq;': '\u2ab5', + 'precnsim;': '\u22e8', + 'precsim;': '\u227e', + 'Prime;': '\u2033', + 'prime;': '\u2032', + 'primes;': '\u2119', + 'prnap;': '\u2ab9', + 'prnE;': '\u2ab5', + 'prnsim;': '\u22e8', + 'prod;': '\u220f', + 'Product;': '\u220f', + 'profalar;': '\u232e', + 'profline;': '\u2312', + 'profsurf;': '\u2313', + 'prop;': '\u221d', + 'Proportion;': '\u2237', + 'Proportional;': '\u221d', + 'propto;': '\u221d', + 'prsim;': '\u227e', + 'prurel;': '\u22b0', + 'Pscr;': '\U0001d4ab', + 'pscr;': '\U0001d4c5', + 'Psi;': '\u03a8', + 'psi;': '\u03c8', + 'puncsp;': '\u2008', + 'Qfr;': '\U0001d514', + 'qfr;': '\U0001d52e', + 'qint;': '\u2a0c', + 'Qopf;': '\u211a', + 'qopf;': '\U0001d562', + 'qprime;': '\u2057', + 'Qscr;': '\U0001d4ac', + 'qscr;': '\U0001d4c6', + 'quaternions;': '\u210d', + 'quatint;': '\u2a16', + 'quest;': '?', + 'questeq;': '\u225f', + 'QUOT': '"', + 'quot': '"', + 'QUOT;': '"', + 'quot;': '"', + 'rAarr;': '\u21db', + 'race;': '\u223d\u0331', + 'Racute;': '\u0154', + 'racute;': '\u0155', + 'radic;': '\u221a', + 'raemptyv;': '\u29b3', + 'Rang;': '\u27eb', + 'rang;': '\u27e9', + 'rangd;': '\u2992', + 'range;': '\u29a5', + 'rangle;': '\u27e9', + 'raquo': '\xbb', + 'raquo;': '\xbb', + 'Rarr;': '\u21a0', + 'rArr;': '\u21d2', + 'rarr;': '\u2192', + 'rarrap;': '\u2975', + 'rarrb;': '\u21e5', + 'rarrbfs;': '\u2920', + 'rarrc;': '\u2933', + 'rarrfs;': '\u291e', + 'rarrhk;': '\u21aa', + 'rarrlp;': '\u21ac', + 'rarrpl;': '\u2945', + 'rarrsim;': '\u2974', + 'Rarrtl;': '\u2916', + 'rarrtl;': '\u21a3', + 'rarrw;': '\u219d', + 'rAtail;': '\u291c', + 'ratail;': '\u291a', + 'ratio;': '\u2236', + 'rationals;': '\u211a', + 'RBarr;': '\u2910', + 'rBarr;': '\u290f', + 'rbarr;': '\u290d', + 'rbbrk;': '\u2773', + 'rbrace;': '}', + 'rbrack;': ']', + 'rbrke;': '\u298c', + 'rbrksld;': '\u298e', + 'rbrkslu;': '\u2990', + 'Rcaron;': '\u0158', + 'rcaron;': '\u0159', + 'Rcedil;': '\u0156', + 'rcedil;': '\u0157', + 'rceil;': '\u2309', + 'rcub;': '}', + 'Rcy;': '\u0420', + 'rcy;': '\u0440', + 'rdca;': '\u2937', + 'rdldhar;': '\u2969', + 'rdquo;': '\u201d', + 'rdquor;': '\u201d', + 'rdsh;': '\u21b3', + 'Re;': '\u211c', + 'real;': '\u211c', + 'realine;': '\u211b', + 'realpart;': '\u211c', + 'reals;': '\u211d', + 'rect;': '\u25ad', + 'REG': '\xae', + 'reg': '\xae', + 'REG;': '\xae', + 'reg;': '\xae', + 'ReverseElement;': '\u220b', + 'ReverseEquilibrium;': '\u21cb', + 'ReverseUpEquilibrium;': '\u296f', + 'rfisht;': '\u297d', + 'rfloor;': '\u230b', + 'Rfr;': '\u211c', + 'rfr;': '\U0001d52f', + 'rHar;': '\u2964', + 'rhard;': '\u21c1', + 'rharu;': '\u21c0', + 'rharul;': '\u296c', + 'Rho;': '\u03a1', + 'rho;': '\u03c1', + 'rhov;': '\u03f1', + 'RightAngleBracket;': '\u27e9', + 'RightArrow;': '\u2192', + 'Rightarrow;': '\u21d2', + 'rightarrow;': '\u2192', + 'RightArrowBar;': '\u21e5', + 'RightArrowLeftArrow;': '\u21c4', + 'rightarrowtail;': '\u21a3', + 'RightCeiling;': '\u2309', + 'RightDoubleBracket;': '\u27e7', + 'RightDownTeeVector;': '\u295d', + 'RightDownVector;': '\u21c2', + 'RightDownVectorBar;': '\u2955', + 'RightFloor;': '\u230b', + 'rightharpoondown;': '\u21c1', + 'rightharpoonup;': '\u21c0', + 'rightleftarrows;': '\u21c4', + 'rightleftharpoons;': '\u21cc', + 'rightrightarrows;': '\u21c9', + 'rightsquigarrow;': '\u219d', + 'RightTee;': '\u22a2', + 'RightTeeArrow;': '\u21a6', + 'RightTeeVector;': '\u295b', + 'rightthreetimes;': '\u22cc', + 'RightTriangle;': '\u22b3', + 'RightTriangleBar;': '\u29d0', + 'RightTriangleEqual;': '\u22b5', + 'RightUpDownVector;': '\u294f', + 'RightUpTeeVector;': '\u295c', + 'RightUpVector;': '\u21be', + 'RightUpVectorBar;': '\u2954', + 'RightVector;': '\u21c0', + 'RightVectorBar;': '\u2953', + 'ring;': '\u02da', + 'risingdotseq;': '\u2253', + 'rlarr;': '\u21c4', + 'rlhar;': '\u21cc', + 'rlm;': '\u200f', + 'rmoust;': '\u23b1', + 'rmoustache;': '\u23b1', + 'rnmid;': '\u2aee', + 'roang;': '\u27ed', + 'roarr;': '\u21fe', + 'robrk;': '\u27e7', + 'ropar;': '\u2986', + 'Ropf;': '\u211d', + 'ropf;': '\U0001d563', + 'roplus;': '\u2a2e', + 'rotimes;': '\u2a35', + 'RoundImplies;': '\u2970', + 'rpar;': ')', + 'rpargt;': '\u2994', + 'rppolint;': '\u2a12', + 'rrarr;': '\u21c9', + 'Rrightarrow;': '\u21db', + 'rsaquo;': '\u203a', + 'Rscr;': '\u211b', + 'rscr;': '\U0001d4c7', + 'Rsh;': '\u21b1', + 'rsh;': '\u21b1', + 'rsqb;': ']', + 'rsquo;': '\u2019', + 'rsquor;': '\u2019', + 'rthree;': '\u22cc', + 'rtimes;': '\u22ca', + 'rtri;': '\u25b9', + 'rtrie;': '\u22b5', + 'rtrif;': '\u25b8', + 'rtriltri;': '\u29ce', + 'RuleDelayed;': '\u29f4', + 'ruluhar;': '\u2968', + 'rx;': '\u211e', + 'Sacute;': '\u015a', + 'sacute;': '\u015b', + 'sbquo;': '\u201a', + 'Sc;': '\u2abc', + 'sc;': '\u227b', + 'scap;': '\u2ab8', + 'Scaron;': '\u0160', + 'scaron;': '\u0161', + 'sccue;': '\u227d', + 'scE;': '\u2ab4', + 'sce;': '\u2ab0', + 'Scedil;': '\u015e', + 'scedil;': '\u015f', + 'Scirc;': '\u015c', + 'scirc;': '\u015d', + 'scnap;': '\u2aba', + 'scnE;': '\u2ab6', + 'scnsim;': '\u22e9', + 'scpolint;': '\u2a13', + 'scsim;': '\u227f', + 'Scy;': '\u0421', + 'scy;': '\u0441', + 'sdot;': '\u22c5', + 'sdotb;': '\u22a1', + 'sdote;': '\u2a66', + 'searhk;': '\u2925', + 'seArr;': '\u21d8', + 'searr;': '\u2198', + 'searrow;': '\u2198', + 'sect': '\xa7', + 'sect;': '\xa7', + 'semi;': ';', + 'seswar;': '\u2929', + 'setminus;': '\u2216', + 'setmn;': '\u2216', + 'sext;': '\u2736', + 'Sfr;': '\U0001d516', + 'sfr;': '\U0001d530', + 'sfrown;': '\u2322', + 'sharp;': '\u266f', + 'SHCHcy;': '\u0429', + 'shchcy;': '\u0449', + 'SHcy;': '\u0428', + 'shcy;': '\u0448', + 'ShortDownArrow;': '\u2193', + 'ShortLeftArrow;': '\u2190', + 'shortmid;': '\u2223', + 'shortparallel;': '\u2225', + 'ShortRightArrow;': '\u2192', + 'ShortUpArrow;': '\u2191', + 'shy': '\xad', + 'shy;': '\xad', + 'Sigma;': '\u03a3', + 'sigma;': '\u03c3', + 'sigmaf;': '\u03c2', + 'sigmav;': '\u03c2', + 'sim;': '\u223c', + 'simdot;': '\u2a6a', + 'sime;': '\u2243', + 'simeq;': '\u2243', + 'simg;': '\u2a9e', + 'simgE;': '\u2aa0', + 'siml;': '\u2a9d', + 'simlE;': '\u2a9f', + 'simne;': '\u2246', + 'simplus;': '\u2a24', + 'simrarr;': '\u2972', + 'slarr;': '\u2190', + 'SmallCircle;': '\u2218', + 'smallsetminus;': '\u2216', + 'smashp;': '\u2a33', + 'smeparsl;': '\u29e4', + 'smid;': '\u2223', + 'smile;': '\u2323', + 'smt;': '\u2aaa', + 'smte;': '\u2aac', + 'smtes;': '\u2aac\ufe00', + 'SOFTcy;': '\u042c', + 'softcy;': '\u044c', + 'sol;': '/', + 'solb;': '\u29c4', + 'solbar;': '\u233f', + 'Sopf;': '\U0001d54a', + 'sopf;': '\U0001d564', + 'spades;': '\u2660', + 'spadesuit;': '\u2660', + 'spar;': '\u2225', + 'sqcap;': '\u2293', + 'sqcaps;': '\u2293\ufe00', + 'sqcup;': '\u2294', + 'sqcups;': '\u2294\ufe00', + 'Sqrt;': '\u221a', + 'sqsub;': '\u228f', + 'sqsube;': '\u2291', + 'sqsubset;': '\u228f', + 'sqsubseteq;': '\u2291', + 'sqsup;': '\u2290', + 'sqsupe;': '\u2292', + 'sqsupset;': '\u2290', + 'sqsupseteq;': '\u2292', + 'squ;': '\u25a1', + 'Square;': '\u25a1', + 'square;': '\u25a1', + 'SquareIntersection;': '\u2293', + 'SquareSubset;': '\u228f', + 'SquareSubsetEqual;': '\u2291', + 'SquareSuperset;': '\u2290', + 'SquareSupersetEqual;': '\u2292', + 'SquareUnion;': '\u2294', + 'squarf;': '\u25aa', + 'squf;': '\u25aa', + 'srarr;': '\u2192', + 'Sscr;': '\U0001d4ae', + 'sscr;': '\U0001d4c8', + 'ssetmn;': '\u2216', + 'ssmile;': '\u2323', + 'sstarf;': '\u22c6', + 'Star;': '\u22c6', + 'star;': '\u2606', + 'starf;': '\u2605', + 'straightepsilon;': '\u03f5', + 'straightphi;': '\u03d5', + 'strns;': '\xaf', + 'Sub;': '\u22d0', + 'sub;': '\u2282', + 'subdot;': '\u2abd', + 'subE;': '\u2ac5', + 'sube;': '\u2286', + 'subedot;': '\u2ac3', + 'submult;': '\u2ac1', + 'subnE;': '\u2acb', + 'subne;': '\u228a', + 'subplus;': '\u2abf', + 'subrarr;': '\u2979', + 'Subset;': '\u22d0', + 'subset;': '\u2282', + 'subseteq;': '\u2286', + 'subseteqq;': '\u2ac5', + 'SubsetEqual;': '\u2286', + 'subsetneq;': '\u228a', + 'subsetneqq;': '\u2acb', + 'subsim;': '\u2ac7', + 'subsub;': '\u2ad5', + 'subsup;': '\u2ad3', + 'succ;': '\u227b', + 'succapprox;': '\u2ab8', + 'succcurlyeq;': '\u227d', + 'Succeeds;': '\u227b', + 'SucceedsEqual;': '\u2ab0', + 'SucceedsSlantEqual;': '\u227d', + 'SucceedsTilde;': '\u227f', + 'succeq;': '\u2ab0', + 'succnapprox;': '\u2aba', + 'succneqq;': '\u2ab6', + 'succnsim;': '\u22e9', + 'succsim;': '\u227f', + 'SuchThat;': '\u220b', + 'Sum;': '\u2211', + 'sum;': '\u2211', + 'sung;': '\u266a', + 'sup1': '\xb9', + 'sup1;': '\xb9', + 'sup2': '\xb2', + 'sup2;': '\xb2', + 'sup3': '\xb3', + 'sup3;': '\xb3', + 'Sup;': '\u22d1', + 'sup;': '\u2283', + 'supdot;': '\u2abe', + 'supdsub;': '\u2ad8', + 'supE;': '\u2ac6', + 'supe;': '\u2287', + 'supedot;': '\u2ac4', + 'Superset;': '\u2283', + 'SupersetEqual;': '\u2287', + 'suphsol;': '\u27c9', + 'suphsub;': '\u2ad7', + 'suplarr;': '\u297b', + 'supmult;': '\u2ac2', + 'supnE;': '\u2acc', + 'supne;': '\u228b', + 'supplus;': '\u2ac0', + 'Supset;': '\u22d1', + 'supset;': '\u2283', + 'supseteq;': '\u2287', + 'supseteqq;': '\u2ac6', + 'supsetneq;': '\u228b', + 'supsetneqq;': '\u2acc', + 'supsim;': '\u2ac8', + 'supsub;': '\u2ad4', + 'supsup;': '\u2ad6', + 'swarhk;': '\u2926', + 'swArr;': '\u21d9', + 'swarr;': '\u2199', + 'swarrow;': '\u2199', + 'swnwar;': '\u292a', + 'szlig': '\xdf', + 'szlig;': '\xdf', + 'Tab;': '\t', + 'target;': '\u2316', + 'Tau;': '\u03a4', + 'tau;': '\u03c4', + 'tbrk;': '\u23b4', + 'Tcaron;': '\u0164', + 'tcaron;': '\u0165', + 'Tcedil;': '\u0162', + 'tcedil;': '\u0163', + 'Tcy;': '\u0422', + 'tcy;': '\u0442', + 'tdot;': '\u20db', + 'telrec;': '\u2315', + 'Tfr;': '\U0001d517', + 'tfr;': '\U0001d531', + 'there4;': '\u2234', + 'Therefore;': '\u2234', + 'therefore;': '\u2234', + 'Theta;': '\u0398', + 'theta;': '\u03b8', + 'thetasym;': '\u03d1', + 'thetav;': '\u03d1', + 'thickapprox;': '\u2248', + 'thicksim;': '\u223c', + 'ThickSpace;': '\u205f\u200a', + 'thinsp;': '\u2009', + 'ThinSpace;': '\u2009', + 'thkap;': '\u2248', + 'thksim;': '\u223c', + 'THORN': '\xde', + 'thorn': '\xfe', + 'THORN;': '\xde', + 'thorn;': '\xfe', + 'Tilde;': '\u223c', + 'tilde;': '\u02dc', + 'TildeEqual;': '\u2243', + 'TildeFullEqual;': '\u2245', + 'TildeTilde;': '\u2248', + 'times': '\xd7', + 'times;': '\xd7', + 'timesb;': '\u22a0', + 'timesbar;': '\u2a31', + 'timesd;': '\u2a30', + 'tint;': '\u222d', + 'toea;': '\u2928', + 'top;': '\u22a4', + 'topbot;': '\u2336', + 'topcir;': '\u2af1', + 'Topf;': '\U0001d54b', + 'topf;': '\U0001d565', + 'topfork;': '\u2ada', + 'tosa;': '\u2929', + 'tprime;': '\u2034', + 'TRADE;': '\u2122', + 'trade;': '\u2122', + 'triangle;': '\u25b5', + 'triangledown;': '\u25bf', + 'triangleleft;': '\u25c3', + 'trianglelefteq;': '\u22b4', + 'triangleq;': '\u225c', + 'triangleright;': '\u25b9', + 'trianglerighteq;': '\u22b5', + 'tridot;': '\u25ec', + 'trie;': '\u225c', + 'triminus;': '\u2a3a', + 'TripleDot;': '\u20db', + 'triplus;': '\u2a39', + 'trisb;': '\u29cd', + 'tritime;': '\u2a3b', + 'trpezium;': '\u23e2', + 'Tscr;': '\U0001d4af', + 'tscr;': '\U0001d4c9', + 'TScy;': '\u0426', + 'tscy;': '\u0446', + 'TSHcy;': '\u040b', + 'tshcy;': '\u045b', + 'Tstrok;': '\u0166', + 'tstrok;': '\u0167', + 'twixt;': '\u226c', + 'twoheadleftarrow;': '\u219e', + 'twoheadrightarrow;': '\u21a0', + 'Uacute': '\xda', + 'uacute': '\xfa', + 'Uacute;': '\xda', + 'uacute;': '\xfa', + 'Uarr;': '\u219f', + 'uArr;': '\u21d1', + 'uarr;': '\u2191', + 'Uarrocir;': '\u2949', + 'Ubrcy;': '\u040e', + 'ubrcy;': '\u045e', + 'Ubreve;': '\u016c', + 'ubreve;': '\u016d', + 'Ucirc': '\xdb', + 'ucirc': '\xfb', + 'Ucirc;': '\xdb', + 'ucirc;': '\xfb', + 'Ucy;': '\u0423', + 'ucy;': '\u0443', + 'udarr;': '\u21c5', + 'Udblac;': '\u0170', + 'udblac;': '\u0171', + 'udhar;': '\u296e', + 'ufisht;': '\u297e', + 'Ufr;': '\U0001d518', + 'ufr;': '\U0001d532', + 'Ugrave': '\xd9', + 'ugrave': '\xf9', + 'Ugrave;': '\xd9', + 'ugrave;': '\xf9', + 'uHar;': '\u2963', + 'uharl;': '\u21bf', + 'uharr;': '\u21be', + 'uhblk;': '\u2580', + 'ulcorn;': '\u231c', + 'ulcorner;': '\u231c', + 'ulcrop;': '\u230f', + 'ultri;': '\u25f8', + 'Umacr;': '\u016a', + 'umacr;': '\u016b', + 'uml': '\xa8', + 'uml;': '\xa8', + 'UnderBar;': '_', + 'UnderBrace;': '\u23df', + 'UnderBracket;': '\u23b5', + 'UnderParenthesis;': '\u23dd', + 'Union;': '\u22c3', + 'UnionPlus;': '\u228e', + 'Uogon;': '\u0172', + 'uogon;': '\u0173', + 'Uopf;': '\U0001d54c', + 'uopf;': '\U0001d566', + 'UpArrow;': '\u2191', + 'Uparrow;': '\u21d1', + 'uparrow;': '\u2191', + 'UpArrowBar;': '\u2912', + 'UpArrowDownArrow;': '\u21c5', + 'UpDownArrow;': '\u2195', + 'Updownarrow;': '\u21d5', + 'updownarrow;': '\u2195', + 'UpEquilibrium;': '\u296e', + 'upharpoonleft;': '\u21bf', + 'upharpoonright;': '\u21be', + 'uplus;': '\u228e', + 'UpperLeftArrow;': '\u2196', + 'UpperRightArrow;': '\u2197', + 'Upsi;': '\u03d2', + 'upsi;': '\u03c5', + 'upsih;': '\u03d2', + 'Upsilon;': '\u03a5', + 'upsilon;': '\u03c5', + 'UpTee;': '\u22a5', + 'UpTeeArrow;': '\u21a5', + 'upuparrows;': '\u21c8', + 'urcorn;': '\u231d', + 'urcorner;': '\u231d', + 'urcrop;': '\u230e', + 'Uring;': '\u016e', + 'uring;': '\u016f', + 'urtri;': '\u25f9', + 'Uscr;': '\U0001d4b0', + 'uscr;': '\U0001d4ca', + 'utdot;': '\u22f0', + 'Utilde;': '\u0168', + 'utilde;': '\u0169', + 'utri;': '\u25b5', + 'utrif;': '\u25b4', + 'uuarr;': '\u21c8', + 'Uuml': '\xdc', + 'uuml': '\xfc', + 'Uuml;': '\xdc', + 'uuml;': '\xfc', + 'uwangle;': '\u29a7', + 'vangrt;': '\u299c', + 'varepsilon;': '\u03f5', + 'varkappa;': '\u03f0', + 'varnothing;': '\u2205', + 'varphi;': '\u03d5', + 'varpi;': '\u03d6', + 'varpropto;': '\u221d', + 'vArr;': '\u21d5', + 'varr;': '\u2195', + 'varrho;': '\u03f1', + 'varsigma;': '\u03c2', + 'varsubsetneq;': '\u228a\ufe00', + 'varsubsetneqq;': '\u2acb\ufe00', + 'varsupsetneq;': '\u228b\ufe00', + 'varsupsetneqq;': '\u2acc\ufe00', + 'vartheta;': '\u03d1', + 'vartriangleleft;': '\u22b2', + 'vartriangleright;': '\u22b3', + 'Vbar;': '\u2aeb', + 'vBar;': '\u2ae8', + 'vBarv;': '\u2ae9', + 'Vcy;': '\u0412', + 'vcy;': '\u0432', + 'VDash;': '\u22ab', + 'Vdash;': '\u22a9', + 'vDash;': '\u22a8', + 'vdash;': '\u22a2', + 'Vdashl;': '\u2ae6', + 'Vee;': '\u22c1', + 'vee;': '\u2228', + 'veebar;': '\u22bb', + 'veeeq;': '\u225a', + 'vellip;': '\u22ee', + 'Verbar;': '\u2016', + 'verbar;': '|', + 'Vert;': '\u2016', + 'vert;': '|', + 'VerticalBar;': '\u2223', + 'VerticalLine;': '|', + 'VerticalSeparator;': '\u2758', + 'VerticalTilde;': '\u2240', + 'VeryThinSpace;': '\u200a', + 'Vfr;': '\U0001d519', + 'vfr;': '\U0001d533', + 'vltri;': '\u22b2', + 'vnsub;': '\u2282\u20d2', + 'vnsup;': '\u2283\u20d2', + 'Vopf;': '\U0001d54d', + 'vopf;': '\U0001d567', + 'vprop;': '\u221d', + 'vrtri;': '\u22b3', + 'Vscr;': '\U0001d4b1', + 'vscr;': '\U0001d4cb', + 'vsubnE;': '\u2acb\ufe00', + 'vsubne;': '\u228a\ufe00', + 'vsupnE;': '\u2acc\ufe00', + 'vsupne;': '\u228b\ufe00', + 'Vvdash;': '\u22aa', + 'vzigzag;': '\u299a', + 'Wcirc;': '\u0174', + 'wcirc;': '\u0175', + 'wedbar;': '\u2a5f', + 'Wedge;': '\u22c0', + 'wedge;': '\u2227', + 'wedgeq;': '\u2259', + 'weierp;': '\u2118', + 'Wfr;': '\U0001d51a', + 'wfr;': '\U0001d534', + 'Wopf;': '\U0001d54e', + 'wopf;': '\U0001d568', + 'wp;': '\u2118', + 'wr;': '\u2240', + 'wreath;': '\u2240', + 'Wscr;': '\U0001d4b2', + 'wscr;': '\U0001d4cc', + 'xcap;': '\u22c2', + 'xcirc;': '\u25ef', + 'xcup;': '\u22c3', + 'xdtri;': '\u25bd', + 'Xfr;': '\U0001d51b', + 'xfr;': '\U0001d535', + 'xhArr;': '\u27fa', + 'xharr;': '\u27f7', + 'Xi;': '\u039e', + 'xi;': '\u03be', + 'xlArr;': '\u27f8', + 'xlarr;': '\u27f5', + 'xmap;': '\u27fc', + 'xnis;': '\u22fb', + 'xodot;': '\u2a00', + 'Xopf;': '\U0001d54f', + 'xopf;': '\U0001d569', + 'xoplus;': '\u2a01', + 'xotime;': '\u2a02', + 'xrArr;': '\u27f9', + 'xrarr;': '\u27f6', + 'Xscr;': '\U0001d4b3', + 'xscr;': '\U0001d4cd', + 'xsqcup;': '\u2a06', + 'xuplus;': '\u2a04', + 'xutri;': '\u25b3', + 'xvee;': '\u22c1', + 'xwedge;': '\u22c0', + 'Yacute': '\xdd', + 'yacute': '\xfd', + 'Yacute;': '\xdd', + 'yacute;': '\xfd', + 'YAcy;': '\u042f', + 'yacy;': '\u044f', + 'Ycirc;': '\u0176', + 'ycirc;': '\u0177', + 'Ycy;': '\u042b', + 'ycy;': '\u044b', + 'yen': '\xa5', + 'yen;': '\xa5', + 'Yfr;': '\U0001d51c', + 'yfr;': '\U0001d536', + 'YIcy;': '\u0407', + 'yicy;': '\u0457', + 'Yopf;': '\U0001d550', + 'yopf;': '\U0001d56a', + 'Yscr;': '\U0001d4b4', + 'yscr;': '\U0001d4ce', + 'YUcy;': '\u042e', + 'yucy;': '\u044e', + 'yuml': '\xff', + 'Yuml;': '\u0178', + 'yuml;': '\xff', + 'Zacute;': '\u0179', + 'zacute;': '\u017a', + 'Zcaron;': '\u017d', + 'zcaron;': '\u017e', + 'Zcy;': '\u0417', + 'zcy;': '\u0437', + 'Zdot;': '\u017b', + 'zdot;': '\u017c', + 'zeetrf;': '\u2128', + 'ZeroWidthSpace;': '\u200b', + 'Zeta;': '\u0396', + 'zeta;': '\u03b6', + 'Zfr;': '\u2128', + 'zfr;': '\U0001d537', + 'ZHcy;': '\u0416', + 'zhcy;': '\u0436', + 'zigrarr;': '\u21dd', + 'Zopf;': '\u2124', + 'zopf;': '\U0001d56b', + 'Zscr;': '\U0001d4b5', + 'zscr;': '\U0001d4cf', + 'zwj;': '\u200d', + 'zwnj;': '\u200c', + } + try: import http.client as compat_http_client except ImportError: # Python 2 @@ -83,7 +2321,6 @@ try: except ImportError: # Python 2 from HTMLParser import HTMLParser as compat_HTMLParser - try: from subprocess import DEVNULL compat_subprocess_get_DEVNULL = lambda: DEVNULL @@ -626,6 +2863,7 @@ __all__ = [ 'compat_getenv', 'compat_getpass', 'compat_html_entities', + 'compat_html_entities_html5', 'compat_http_client', 'compat_http_server', 'compat_input', From 55b2f099c0c820d6c4b46609b175a44a6d7f97bf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 15:11:55 +0800 Subject: [PATCH 0744/3599] [utils] Decode HTML5 entities Used in test_Vporn_1. Also related to #9270 --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index feef80465..0e25de6b7 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -249,6 +249,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('é'), 'é') self.assertEqual(unescapeHTML('�'), '�') + # HTML5 entities + self.assertEqual(unescapeHTML('.''), '.\'') def test_date_from_str(self): self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day')) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 229de4b39..f77ab8650 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -39,6 +39,7 @@ from .compat import ( compat_chr, compat_etree_fromstring, compat_html_entities, + compat_html_entities_html5, compat_http_client, compat_kwargs, compat_parse_qs, @@ -456,12 +457,19 @@ def orderedSet(iterable): return res -def _htmlentity_transform(entity): +def _htmlentity_transform(entity_with_semicolon): """Transforms an HTML entity to a character.""" + entity = entity_with_semicolon[:-1] + # Known non-numeric HTML entity if entity in compat_html_entities.name2codepoint: return compat_chr(compat_html_entities.name2codepoint[entity]) + # TODO: HTML5 allows entities without a semicolon. For example, + # 'Éric' should be decoded as 'Éric'. + if entity_with_semicolon in compat_html_entities_html5: + return compat_html_entities_html5[entity_with_semicolon] + mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity) if mobj is not None: numstr = mobj.group(1) @@ -486,7 +494,7 @@ def unescapeHTML(s): assert type(s) == compat_str return re.sub( - r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s) + r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) def get_subprocess_encoding(): From a2252385308898074f5006ed737aeb98bb8b0402 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 15:12:53 +0800 Subject: [PATCH 0745/3599] [vporn] Improve error detection and update _TESTS --- youtube_dl/extractor/vporn.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py index 92c90e517..1557a0e04 100644 --- a/youtube_dl/extractor/vporn.py +++ b/youtube_dl/extractor/vporn.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, parse_duration, str_to_int, ) @@ -27,7 +28,8 @@ class VpornIE(InfoExtractor): 'duration': 393, 'age_limit': 18, 'view_count': int, - } + }, + 'skip': 'video removed', }, { 'url': 'http://www.vporn.com/female/hana-shower/523564/', @@ -40,7 +42,7 @@ class VpornIE(InfoExtractor): 'description': 'Hana showers at the bathroom.', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': 'Hmmmmm', - 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'], + 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'], 'duration': 588, 'age_limit': 18, 'view_count': int, @@ -55,6 +57,10 @@ class VpornIE(InfoExtractor): webpage = self._download_webpage(url, display_id) + errmsg = 'This video has been deleted due to Copyright Infringement or by the account owner!' + if errmsg in webpage: + raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) + title = self._html_search_regex( r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip() description = self._html_search_regex( From c16f8a4659566fd7421226b0d5ddb871425b392b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:04:28 +0800 Subject: [PATCH 0746/3599] [voicerepublic] Force video_id to be strings Related: be6217b26142491232fb697b125015d45437832d --- youtube_dl/extractor/voicerepublic.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py index 93d15a556..4f1a99a89 100644 --- a/youtube_dl/extractor/voicerepublic.py +++ b/youtube_dl/extractor/voicerepublic.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( ExtractorError, determine_ext, @@ -16,13 +19,13 @@ class VoiceRepublicIE(InfoExtractor): _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)' _TESTS = [{ 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', - 'md5': '0554a24d1657915aa8e8f84e15dc9353', + 'md5': 'b9174d651323f17783000876347116e3', 'info_dict': { 'id': '2296', 'display_id': 'watching-the-watchers-building-a-sousveillance-state', 'ext': 'm4a', 'title': 'Watching the Watchers: Building a Sousveillance State', - 'description': 'md5:715ba964958afa2398df615809cfecb1', + 'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.', 'thumbnail': 're:^https?://.*\.(?:png|jpg)$', 'duration': 1800, 'view_count': int, @@ -52,7 +55,7 @@ class VoiceRepublicIE(InfoExtractor): if data: title = data['title'] description = data.get('teaser') - talk_id = data.get('talk_id') or display_id + talk_id = compat_str(data.get('talk_id') or display_id) talk = data['talk'] duration = int_or_none(talk.get('duration')) formats = [{ From 09728d5fbc93c769b3f8971c06e9ed0bfb168b37 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:11:28 +0800 Subject: [PATCH 0747/3599] [audiomack:album] Force video_id to be strings Related: be6217b26142491232fb697b125015d45437832d --- youtube_dl/extractor/audiomack.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py index a52d26cec..f3bd4d444 100644 --- a/youtube_dl/extractor/audiomack.py +++ b/youtube_dl/extractor/audiomack.py @@ -6,6 +6,7 @@ import time from .common import InfoExtractor from .soundcloud import SoundcloudIE +from ..compat import compat_str from ..utils import ( ExtractorError, url_basename, @@ -136,7 +137,7 @@ class AudiomackAlbumIE(InfoExtractor): result[resultkey] = api_response[apikey] song_id = url_basename(api_response['url']).rpartition('.')[0] result['entries'].append({ - 'id': api_response.get('id', song_id), + 'id': compat_str(api_response.get('id', song_id)), 'uploader': api_response.get('artist'), 'title': api_response.get('title', song_id), 'url': api_response['url'], From daa0df9e8beac1325e5fb55d828e7a3a38e74bf6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:37:12 +0800 Subject: [PATCH 0748/3599] [youtube:user] Support another URL form Such an URL comes from http://www.gametrailers.com/. This is originally a test case in GenericIE, but now seems all GameTrailers videos are on YouTube. --- youtube_dl/extractor/youtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6c9f77d95..00dd602ff 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1988,7 +1988,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): class YoutubeUserIE(YoutubeChannelIE): IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/|c/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)' _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos' IE_NAME = 'youtube:user' @@ -2001,6 +2001,9 @@ class YoutubeUserIE(YoutubeChannelIE): }, { 'url': 'ytuser:phihag', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/c/gametrailers', + 'only_matching': True, }] @classmethod From 1fa309da40bfc5e7e72639e80cf6556b3839fc81 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:39:31 +0800 Subject: [PATCH 0749/3599] [generic] Update test_Generic_40 The original link now redirects to an YouTube user channel. --- youtube_dl/extractor/generic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 798c109c6..ef18ce3dc 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -627,13 +627,13 @@ class GenericIE(InfoExtractor): }, # MTVSercices embed { - 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too', - 'md5': '35727f82f58c76d996fc188f9755b0d5', + 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html', + 'md5': 'ca1aef97695ef2c1d6973256a57e5252', 'info_dict': { - 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9', + 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1', 'ext': 'mp4', - 'title': 'Review', - 'description': 'Mario\'s life in the fast lane has never looked so good.', + 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored', + 'description': 'Two valets share their love for movie star Liam Neesons.', }, }, # YouTube embed via <data-embed-url=""> From 6c0376fe4f16f53fd87f5e6a56531fc153922980 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 16:53:40 +0800 Subject: [PATCH 0750/3599] [dw] Skip an invalid test DW documentaries only last for one or two weeks. See #9475 --- youtube_dl/extractor/dw.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/dw.py b/youtube_dl/extractor/dw.py index 0f0f0b8d3..d740652f1 100644 --- a/youtube_dl/extractor/dw.py +++ b/youtube_dl/extractor/dw.py @@ -35,6 +35,7 @@ class DWIE(InfoExtractor): 'upload_date': '20160311', } }, { + # DW documentaries, only last for one or two weeks 'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798', 'md5': '56b6214ef463bfb9a3b71aeb886f3cf1', 'info_dict': { @@ -44,6 +45,7 @@ class DWIE(InfoExtractor): 'description': 'Welcome to the 90s - The Golden Decade of Hip Hop', 'upload_date': '20160521', }, + 'skip': 'Video removed', }] def _real_extract(self, url): From 836ab0c554f13751adff02d3987f6f3f79e2db09 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 10 Jun 2016 18:12:57 +0800 Subject: [PATCH 0751/3599] [compat] Import html5 entities correctly --- youtube_dl/compat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 0243949a4..67db1c7c6 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -64,8 +64,8 @@ except ImportError: # Python 2 import htmlentitydefs as compat_html_entities try: # Python >= 3.3 - from compat_html_entities import html as compat_html_entities_html5 -except ImportError: + compat_html_entities_html5 = compat_html_entities.html5 +except AttributeError: # Copied from CPython 3.5.1 html/entities.py compat_html_entities_html5 = { 'Aacute': '\xc1', From bdf16f81403c036a0f40d10a136a46aa7d2f6f0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 10 Jun 2016 22:40:18 +0700 Subject: [PATCH 0752/3599] [lynda] Add support for new authentication (Closes #9740) --- youtube_dl/extractor/lynda.py | 115 ++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 55 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 86d47266f..c2678652e 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -1,84 +1,89 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_HTTPError, + compat_str, + compat_urlparse, +) from ..utils import ( ExtractorError, - clean_html, int_or_none, - sanitized_Request, urlencode_postdata, ) class LyndaBaseIE(InfoExtractor): - _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' + _SIGNIN_URL = 'https://www.lynda.com/signin' + _PASSWORD_URL = 'https://www.lynda.com/signin/password' + _USER_URL = 'https://www.lynda.com/signin/user' _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' _NETRC_MACHINE = 'lynda' def _real_initialize(self): self._login() + @staticmethod + def _check_error(json_string, key_or_keys): + keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys + for key in keys: + error = json_string.get(key) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + + def _login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url): + action_url = self._search_regex( + r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_html, + 'post url', default=fallback_action_url, group='url') + + if not action_url.startswith('http'): + action_url = compat_urlparse.urljoin(self._SIGNIN_URL, action_url) + + form_data = self._hidden_inputs(form_html) + form_data.update(extra_form_data) + + try: + response = self._download_json( + action_url, None, note, + data=urlencode_postdata(form_data), + headers={ + 'Referer': referrer_url, + 'X-Requested-With': 'XMLHttpRequest', + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: + response = self._parse_json(e.cause.read().decode('utf-8'), None) + self._check_error(response, ('email', 'password')) + raise + + self._check_error(response, 'ErrorMessage') + + return response, action_url + def _login(self): username, password = self._get_login_info() if username is None: return - login_form = { - 'username': username, - 'password': password, - 'remember': 'false', - 'stayPut': 'false' - } - request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(login_form)) - login_page = self._download_webpage( - request, None, 'Logging in as %s' % username) + # Step 1: download signin page + signin_page = self._download_webpage( + self._SIGNIN_URL, None, 'Downloading signin page') - # Not (yet) logged in - m = re.search(r'loginResultJson\s*=\s*\'(?P<json>[^\']+)\';', login_page) - if m is not None: - response = m.group('json') - response_json = json.loads(response) - state = response_json['state'] + # Step 2: submit email + signin_form = self._search_regex( + r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)', + signin_page, 'signin form') + signin_page, signin_url = self._login_step( + signin_form, self._PASSWORD_URL, {'email': username}, + 'Submitting email', self._SIGNIN_URL) - if state == 'notlogged': - raise ExtractorError( - 'Unable to login, incorrect username and/or password', - expected=True) - - # This is when we get popup: - # > You're already logged in to lynda.com on two devices. - # > If you log in here, we'll log you out of another device. - # So, we need to confirm this. - if state == 'conflicted': - confirm_form = { - 'username': '', - 'password': '', - 'resolve': 'true', - 'remember': 'false', - 'stayPut': 'false', - } - request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(confirm_form)) - login_page = self._download_webpage( - request, None, - 'Confirming log in and log out from another device') - - if all(not re.search(p, login_page) for p in ('isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): - if 'login error' in login_page: - mobj = re.search( - r'(?s)<h1[^>]+class="topmost">(?P<title>[^<]+)</h1>\s*<div>(?P<description>.+?)</div>', - login_page) - if mobj: - raise ExtractorError( - 'lynda returned error: %s - %s' - % (mobj.group('title'), clean_html(mobj.group('description'))), - expected=True) - raise ExtractorError('Unable to log in') + # Step 3: submit password + password_form = signin_page['body'] + self._login_step( + password_form, self._USER_URL, {'email': username, 'password': password}, + 'Submitting password', signin_url) def _logout(self): username, _ = self._get_login_info() From 3841256c2c5fd35229cd8f2c2c8a8e2401f7016b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 10 Jun 2016 23:01:52 +0700 Subject: [PATCH 0753/3599] [lynda] Skip login if already logged in --- youtube_dl/extractor/lynda.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index c2678652e..7610985b4 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -71,6 +71,11 @@ class LyndaBaseIE(InfoExtractor): signin_page = self._download_webpage( self._SIGNIN_URL, None, 'Downloading signin page') + # Already logged in + if any(re.search(p, signin_page) for p in ( + 'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): + return + # Step 2: submit email signin_form = self._search_regex( r'(?s)(<form[^>]+data-form-name=["\']signin["\'][^>]*>.+?</form>)', @@ -85,15 +90,6 @@ class LyndaBaseIE(InfoExtractor): password_form, self._USER_URL, {'email': username, 'password': password}, 'Submitting password', signin_url) - def _logout(self): - username, _ = self._get_login_info() - if username is None: - return - - self._download_webpage( - 'http://www.lynda.com/ajax/logout.aspx', None, - 'Logging out', 'Unable to log out', fatal=False) - class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' @@ -217,8 +213,6 @@ class LyndaCourseIE(LyndaBaseIE): 'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, course_id, 'Downloading course JSON') - self._logout() - if course.get('Status') == 'NotFound': raise ExtractorError( 'Course %s does not exist' % course_id, expected=True) From 0434358823a9b7da7656f3e6d8de28d1b42036f5 Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Fri, 10 Jun 2016 19:17:58 +0200 Subject: [PATCH 0754/3599] [Lynda] Extract course description --- youtube_dl/extractor/lynda.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 86d47266f..c1bca5678 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -246,5 +246,6 @@ class LyndaCourseIE(LyndaBaseIE): % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT) course_title = course.get('Title') + course_description = course.get('Description') - return self.playlist_result(entries, course_id, course_title) + return self.playlist_result(entries, course_id, course_title, course_description) From d845622b2e09ebac28e21f76f6d5c2795aa9bb50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 02:41:48 +0700 Subject: [PATCH 0755/3599] release 2016.06.11 --- .github/ISSUE_TEMPLATE.md | 6 +++--- README.md | 2 +- docs/supportedsites.md | 9 +++++---- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e593ee78a..16ef23066 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.03** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.03 +[debug] youtube-dl version 2016.06.11 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/README.md b/README.md index 205c485d0..2ea8acb30 100644 --- a/README.md +++ b/README.md @@ -255,7 +255,7 @@ which means you can modify it, redistribute it or use it however you like. --write-info-json Write video metadata to a .info.json file --write-annotations Write video annotations to a .annotations.xml file - --load-info FILE JSON file containing the video information + --load-info-json FILE JSON file containing the video information (created with the "--write-info-json" option) --cookies FILE File to read cookies from and dump cookie diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 619bd0825..f89c2d1f2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,6 +28,7 @@ - **AdobeTVVideo** - **AdultSwim** - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network + - **AfreecaTV**: afreecatv.com - **Aftonbladet** - **AirMozilla** - **AlJazeera** @@ -43,8 +44,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek**: Saarländischer Rundfunk - **ARD:mediathek** + - **ARD:mediathek**: Saarländischer Rundfunk - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -253,6 +254,7 @@ - **Globo** - **GloboArticle** - **GodTube** + - **GodTV** - **GoldenMoustache** - **Golem** - **GoogleDrive** @@ -738,6 +740,7 @@ - **VideoPremium** - **VideoTt**: video.tt - Your True Tube (Currently broken) - **videoweed**: VideoWeed + - **Vidio** - **vidme** - **vidme:user** - **vidme:user:likes** @@ -773,7 +776,6 @@ - **VRT** - **vube**: Vube.com - **VuClip** - - **vulture.com** - **Walla** - **washingtonpost** - **washingtonpost:article** @@ -781,10 +783,8 @@ - **WatchIndianPorn**: Watch Indian Porn - **WDR** - **wdr:mobile** - - **WDRMaus**: Sendung mit der Maus - **WebOfStories** - **WebOfStoriesPlaylist** - - **Weibo** - **WeiqiTV**: WQTV - **wholecloud**: WholeCloud - **Wimp** @@ -820,6 +820,7 @@ - **Ynet** - **YouJizz** - **youku**: 优酷 + - **youku:show** - **YouPorn** - **YourUpload** - **youtube**: YouTube.com diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d24d06f4a..dafb6513a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.03' +__version__ = '2016.06.11' From 6626c214e1e0fa422d68b875cbb69dfb5aad8745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 03:00:08 +0700 Subject: [PATCH 0756/3599] release 2016.06.11.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 16ef23066..564cffae7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.11 +[debug] youtube-dl version 2016.06.11.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index dafb6513a..5bcb6a7b3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.11' +__version__ = '2016.06.11.1' From 9ddc289f88542f4b0bf7ad5e9c725caf8889f71b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 04:59:47 +0700 Subject: [PATCH 0757/3599] [README.md] Document missing playlist fields in output template --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 2ea8acb30..3ff33c156 100644 --- a/README.md +++ b/README.md @@ -511,6 +511,9 @@ The basic usage is not to set any template arguments when downloading a single f - `autonumber`: Five-digit number that will be increased with each download, starting at zero - `playlist`: Name or id of the playlist that contains the video - `playlist_index`: Index of the video in the playlist padded with leading zeros according to the total length of the playlist + - `playlist_id`: Playlist identifier + - `playlist_title`: Playlist title + Available for the video that belongs to some logical chapter or section: - `chapter`: Name or title of the chapter the video belongs to From 62666af99fb55e3ba535ce630e8ce0aed1b5b0e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:13:05 +0700 Subject: [PATCH 0758/3599] [indavideo] Fix formats' height (Closes #9744) --- youtube_dl/extractor/indavideo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py index 9622f198a..c6f080484 100644 --- a/youtube_dl/extractor/indavideo.py +++ b/youtube_dl/extractor/indavideo.py @@ -60,7 +60,8 @@ class IndavideoEmbedIE(InfoExtractor): formats = [{ 'url': video_url, - 'height': self._search_regex(r'\.(\d{3,4})\.mp4$', video_url, 'height', default=None), + 'height': int_or_none(self._search_regex( + r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)), } for video_url in video_urls] self._sort_formats(formats) From 4cad2929cd7e90be174ae6b0ad0c7d9f47795374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:30:44 +0700 Subject: [PATCH 0759/3599] [limelight] Fix _VALID_URLs --- youtube_dl/extractor/limelight.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 2599d45c3..8dbc940a7 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -123,7 +123,7 @@ class LimelightBaseIE(InfoExtractor): class LimelightMediaIE(LimelightBaseIE): IE_NAME = 'limelight' - _VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})' + _VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bmediaId=)(?P<id>[a-z0-9]{32})' _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', 'info_dict': { @@ -176,7 +176,7 @@ class LimelightMediaIE(LimelightBaseIE): class LimelightChannelIE(LimelightBaseIE): IE_NAME = 'limelight:channel' - _VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})' + _VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelId=)(?P<id>[a-z0-9]{32})' _TEST = { 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', 'info_dict': { From 79027c0ea02d4f296aefe6ca6e5af393c2a4a209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:40:02 +0700 Subject: [PATCH 0760/3599] [limelight] Improve _VALID_URLs --- youtube_dl/extractor/limelight.py | 56 +++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 8dbc940a7..da5d198b9 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -123,7 +123,18 @@ class LimelightBaseIE(InfoExtractor): class LimelightMediaIE(LimelightBaseIE): IE_NAME = 'limelight' - _VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bmediaId=)(?P<id>[a-z0-9]{32})' + _VALID_URL = r'''(?x) + (?: + limelight:media:| + https?:// + (?: + link\.videoplatform\.limelight\.com/media/| + assets\.delvenetworks\.com/player/loader\.swf + ) + \?.*?\bmediaId= + ) + (?P<id>[a-z0-9]{32}) + ''' _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', 'info_dict': { @@ -158,6 +169,9 @@ class LimelightMediaIE(LimelightBaseIE): # rtmp download 'skip_download': True, }, + }, { + 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452', + 'only_matching': True, }] _PLAYLIST_SERVICE_PATH = 'media' _API_PATH = 'media' @@ -176,15 +190,29 @@ class LimelightMediaIE(LimelightBaseIE): class LimelightChannelIE(LimelightBaseIE): IE_NAME = 'limelight:channel' - _VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelId=)(?P<id>[a-z0-9]{32})' - _TEST = { + _VALID_URL = r'''(?x) + (?: + limelight:channel:| + https?:// + (?: + link\.videoplatform\.limelight\.com/media/| + assets\.delvenetworks\.com/player/loader\.swf + ) + \?.*?\bchannelId= + ) + (?P<id>[a-z0-9]{32}) + ''' + _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', 'info_dict': { 'id': 'ab6a524c379342f9b23642917020c082', 'title': 'Javascript Sample Code', }, 'playlist_mincount': 3, - } + }, { + 'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082', + 'only_matching': True, + }] _PLAYLIST_SERVICE_PATH = 'channel' _API_PATH = 'channels' @@ -207,15 +235,29 @@ class LimelightChannelIE(LimelightBaseIE): class LimelightChannelListIE(LimelightBaseIE): IE_NAME = 'limelight:channel_list' - _VALID_URL = r'(?:limelight:channel_list:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})' - _TEST = { + _VALID_URL = r'''(?x) + (?: + limelight:channel_list:| + https?:// + (?: + link\.videoplatform\.limelight\.com/media/| + assets\.delvenetworks\.com/player/loader\.swf + ) + \?.*?\bchannelListId= + ) + (?P<id>[a-z0-9]{32}) + ''' + _TESTS = [{ 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b', 'info_dict': { 'id': '301b117890c4465c8179ede21fd92e2b', 'title': 'Website - Hero Player', }, 'playlist_mincount': 2, - } + }, { + 'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b', + 'only_matching': True, + }] _PLAYLIST_SERVICE_PATH = 'channel_list' def _real_extract(self, url): From 21ac1a8ac3f2a3c301ad8c08730166a8fd82c287 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:52:50 +0700 Subject: [PATCH 0761/3599] [limelight] Fix typo --- youtube_dl/extractor/limelight.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index da5d198b9..a25fb8e2c 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -98,7 +98,7 @@ class LimelightBaseIE(InfoExtractor): } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')] subtitles = {} - for caption in properties.get('captions', {}): + for caption in properties.get('captions', []): lang = caption.get('language_code') subtitles_url = caption.get('url') if lang and subtitles_url: From fe458b65965e5a847a24d00138b723ce67b274e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 05:57:27 +0700 Subject: [PATCH 0762/3599] [limelight] Extract ttml subtitles (Closes #9739) --- youtube_dl/extractor/limelight.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index a25fb8e2c..5d2c3e256 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -102,9 +102,15 @@ class LimelightBaseIE(InfoExtractor): lang = caption.get('language_code') subtitles_url = caption.get('url') if lang and subtitles_url: - subtitles[lang] = [{ + subtitles.setdefault(lang, []).append({ 'url': subtitles_url, - }] + }) + closed_captions_url = properties.get('closed_captions_url') + if closed_captions_url: + subtitles.setdefault('en', []).append({ + 'url': closed_captions_url, + 'ext': 'ttml', + }) return { 'id': video_id, From 698f127c1a9dd460c8dede59df6a0e2ce69f913a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 06:14:22 +0700 Subject: [PATCH 0763/3599] [setup.py] Add python 3.5 classifier --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 9444d403d..c1e923f71 100644 --- a/setup.py +++ b/setup.py @@ -122,6 +122,7 @@ setup( "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", ], cmdclass={'build_lazy_extractors': build_lazy_extractors}, From 33751818d3e31270304db519849d85bec43e9c95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 08:28:51 +0700 Subject: [PATCH 0764/3599] release 2016.06.11.2 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 564cffae7..8fa97ee87 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.2** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.11.1 +[debug] youtube-dl version 2016.06.11.2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5bcb6a7b3..f6cc8b79e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.11.1' +__version__ = '2016.06.11.2' From 4a420119a6e0b7363f9d31e37d3e7af818bedfd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Jun 2016 08:34:30 +0700 Subject: [PATCH 0765/3599] release 2016.06.11.3 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 8fa97ee87..a46b75fd8 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.2** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.3*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.3** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.11.2 +[debug] youtube-dl version 2016.06.11.3 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f6cc8b79e..9932b1e62 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.11.2' +__version__ = '2016.06.11.3' From 47787efa2b6bd5dc1b6f6cb7027586bac2de4c6c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 13:13:16 +0800 Subject: [PATCH 0766/3599] [leeco] Recognize Le Sports URLs (fixes #9750) --- youtube_dl/extractor/leeco.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index 375fdaed1..63f581cd9 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -28,7 +28,7 @@ from ..utils import ( class LeIE(InfoExtractor): IE_DESC = '乐视网' - _VALID_URL = r'https?://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html' + _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|sports\.le\.com/video)/(?P<id>\d+)\.html' _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html' @@ -69,6 +69,9 @@ class LeIE(InfoExtractor): 'hls_prefer_native': True, }, 'skip': 'Only available in China', + }, { + 'url': 'http://sports.le.com/video/25737697.html', + 'only_matching': True, }] @staticmethod @@ -196,7 +199,7 @@ class LeIE(InfoExtractor): class LePlaylistIE(InfoExtractor): - _VALID_URL = r'https?://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)' + _VALID_URL = r'https?://[a-z]+\.le\.com/(?!video)[a-z]+/(?P<id>[a-z0-9_]+)' _TESTS = [{ 'url': 'http://www.le.com/tv/46177.html', From 7aab3696dd02ca45feba523b4194d6430939dd1c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 15:37:04 +0800 Subject: [PATCH 0767/3599] [kuwo] Update _TESTS --- youtube_dl/extractor/kuwo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 11b31a699..0221fb919 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -148,8 +148,8 @@ class KuwoAlbumIE(InfoExtractor): 'url': 'http://www.kuwo.cn/album/502294/', 'info_dict': { 'id': '502294', - 'title': 'M', - 'description': 'md5:6a7235a84cc6400ec3b38a7bdaf1d60c', + 'title': 'Made\xa0Series\xa0《M》', + 'description': 'md5:d463f0d8a0ff3c3ea3d6ed7452a9483f', }, 'playlist_count': 2, } @@ -209,7 +209,7 @@ class KuwoSingerIE(InfoExtractor): 'url': 'http://www.kuwo.cn/mingxing/bruno+mars/', 'info_dict': { 'id': 'bruno+mars', - 'title': 'Bruno Mars', + 'title': 'Bruno\xa0Mars', }, 'playlist_mincount': 329, }, { @@ -306,7 +306,7 @@ class KuwoMvIE(KuwoBaseIE): 'id': '6480076', 'ext': 'mp4', 'title': 'My HouseMV', - 'creator': '2PM', + 'creator': 'PM02:00', }, # In this video, music URLs (anti.s) are blocked outside China and # USA, while the MV URL (mvurl) is available globally, so force the MV From 15d106787e8c21e4d4df95957062bd07c873d203 Mon Sep 17 00:00:00 2001 From: Paul Henning <vxbinaca@users.noreply.github.com> Date: Sat, 11 Jun 2016 05:36:31 -0400 Subject: [PATCH 0768/3599] [utils] Change Firefox 44 to 47 See commit title. --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f77ab8650..0acbd67de 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -76,7 +76,7 @@ def register_socks_protocols(): compiled_regex_type = type(re.compile('')) std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', From 856150d05647904a5cf6c519c6e276ce3536bd20 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 18:22:26 +0800 Subject: [PATCH 0769/3599] [telewebion] Add new extractor (closes #5135) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/telewebion.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/telewebion.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 38708294a..36ddc1f73 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -777,6 +777,7 @@ from .telecinco import TelecincoIE from .telegraaf import TelegraafIE from .telemb import TeleMBIE from .teletask import TeleTaskIE +from .telewebion import TelewebionIE from .testurl import TestURLIE from .tf1 import TF1IE from .theintercept import TheInterceptIE diff --git a/youtube_dl/extractor/telewebion.py b/youtube_dl/extractor/telewebion.py new file mode 100644 index 000000000..77916c601 --- /dev/null +++ b/youtube_dl/extractor/telewebion.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TelewebionIE(InfoExtractor): + _VALID_URL = r'https?://www\.telewebion\.com/#!/episode/(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.telewebion.com/#!/episode/1263668/', + 'info_dict': { + 'id': '1263668', + 'ext': 'mp4', + 'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا', + 'thumbnail': 're:^https?://.*\.jpg', + 'view_count': int, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + secure_token = self._download_webpage( + 'http://m.s2.telewebion.com/op/op?action=getSecurityToken', video_id) + episode_details = self._download_json( + 'http://m.s2.telewebion.com/op/op', video_id, + query={'action': 'getEpisodeDetails', 'episode_id': video_id}) + + m3u8_url = 'http://m.s1.telewebion.com/smil/%s.m3u8?filepath=%s&m3u8=1&secure_token=%s' % ( + video_id, episode_details['file_path'], secure_token) + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', m3u8_id='hls') + + picture_paths = [ + episode_details.get('picture_path'), + episode_details.get('large_picture_path'), + ] + + thumbnails = [{ + 'url': picture_path, + 'preference': idx, + } for idx, picture_path in enumerate(picture_paths) if picture_path is not None] + + return { + 'id': video_id, + 'title': episode_details['title'], + 'formats': formats, + 'thumbnails': thumbnails, + 'view_count': episode_details.get('view_count'), + } From c5edd147d1d2cf0502f5ef48652c88a75ef62529 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 18:33:37 +0800 Subject: [PATCH 0770/3599] [generic] Remove an invalid test Now handled by telewebion.py --- youtube_dl/extractor/generic.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ef18ce3dc..4aa24061c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1073,20 +1073,6 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, - # Contains a SMIL manifest - { - 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html', - 'info_dict': { - 'id': 'file', - 'ext': 'flv', - 'title': '+ Football: Lottery Champions League Europe', - 'uploader': 'www.telewebion.com', - }, - 'params': { - # rtmpe downloads - 'skip_download': True, - } - }, # Brightcove URL in single quotes { 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/', From 531a74968c24416cb2e4a79c9bfbcc9d02368e44 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 21:35:08 +0800 Subject: [PATCH 0771/3599] [vimeo] Fix extraction for VimeoReview videos --- youtube_dl/extractor/vimeo.py | 147 +++++++++++++++++++--------------- 1 file changed, 83 insertions(+), 64 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 59f9cb1ae..0fd2c18a0 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -66,6 +66,69 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _set_vimeo_cookie(self, name, value): self._set_cookie('vimeo.com', name, value) + def _vimeo_sort_formats(self, formats): + # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps + # at the same time without actual units specified. This lead to wrong sorting. + self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id')) + + def _parse_config(self, config, video_id): + # Extract title + video_title = config['video']['title'] + + # Extract uploader, uploader_url and uploader_id + video_uploader = config['video'].get('owner', {}).get('name') + video_uploader_url = config['video'].get('owner', {}).get('url') + video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None + + # Extract video thumbnail + video_thumbnail = config['video'].get('thumbnail') + if video_thumbnail is None: + video_thumbs = config['video'].get('thumbs') + if video_thumbs and isinstance(video_thumbs, dict): + _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] + + # Extract video duration + video_duration = int_or_none(config['video'].get('duration')) + + formats = [] + config_files = config['video'].get('files') or config['request'].get('files', {}) + for f in config_files.get('progressive', []): + video_url = f.get('url') + if not video_url: + continue + formats.append({ + 'url': video_url, + 'format_id': 'http-%s' % f.get('quality'), + 'width': int_or_none(f.get('width')), + 'height': int_or_none(f.get('height')), + 'fps': int_or_none(f.get('fps')), + 'tbr': int_or_none(f.get('bitrate')), + }) + m3u8_url = config_files.get('hls', {}).get('url') + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + + subtitles = {} + text_tracks = config['request'].get('text_tracks') + if text_tracks: + for tt in text_tracks: + subtitles[tt['lang']] = [{ + 'ext': 'vtt', + 'url': 'https://vimeo.com' + tt['url'], + }] + + return { + 'title': video_title, + 'uploader': video_uploader, + 'uploader_id': video_uploader_id, + 'uploader_url': video_uploader_url, + 'thumbnail': video_thumbnail, + 'duration': video_duration, + 'formats': formats, + 'subtitles': subtitles, + } + class VimeoIE(VimeoBaseInfoExtractor): """Information extractor for vimeo.com.""" @@ -153,7 +216,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', 'duration': 10, - 'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people\u2026', + 'description': 'This is "youtube-dl password protected test video" by on Vimeo, the home for high quality videos and the people who love them.', }, 'params': { 'videopassword': 'youtube-dl', @@ -389,21 +452,6 @@ class VimeoIE(VimeoBaseInfoExtractor): 'https://player.vimeo.com/player/%s' % feature_id, {'force_feature_id': True}), 'Vimeo') - # Extract title - video_title = config['video']['title'] - - # Extract uploader, uploader_url and uploader_id - video_uploader = config['video'].get('owner', {}).get('name') - video_uploader_url = config['video'].get('owner', {}).get('url') - video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None - - # Extract video thumbnail - video_thumbnail = config['video'].get('thumbnail') - if video_thumbnail is None: - video_thumbs = config['video'].get('thumbs') - if video_thumbs and isinstance(video_thumbs, dict): - _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] - # Extract video description video_description = self._html_search_regex( @@ -423,9 +471,6 @@ class VimeoIE(VimeoBaseInfoExtractor): if not video_description and not mobj.group('player'): self._downloader.report_warning('Cannot find video description') - # Extract video duration - video_duration = int_or_none(config['video'].get('duration')) - # Extract upload date video_upload_date = None mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage) @@ -463,53 +508,22 @@ class VimeoIE(VimeoBaseInfoExtractor): 'format_id': source_name, 'preference': 1, }) - config_files = config['video'].get('files') or config['request'].get('files', {}) - for f in config_files.get('progressive', []): - video_url = f.get('url') - if not video_url: - continue - formats.append({ - 'url': video_url, - 'format_id': 'http-%s' % f.get('quality'), - 'width': int_or_none(f.get('width')), - 'height': int_or_none(f.get('height')), - 'fps': int_or_none(f.get('fps')), - 'tbr': int_or_none(f.get('bitrate')), - }) - m3u8_url = config_files.get('hls', {}).get('url') - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps - # at the same time without actual units specified. This lead to wrong sorting. - self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id')) - subtitles = {} - text_tracks = config['request'].get('text_tracks') - if text_tracks: - for tt in text_tracks: - subtitles[tt['lang']] = [{ - 'ext': 'vtt', - 'url': 'https://vimeo.com' + tt['url'], - }] - - return { + info_dict = self._parse_config(config, video_id) + formats.extend(info_dict['formats']) + self._vimeo_sort_formats(formats) + info_dict.update({ 'id': video_id, - 'uploader': video_uploader, - 'uploader_url': video_uploader_url, - 'uploader_id': video_uploader_id, - 'upload_date': video_upload_date, - 'title': video_title, - 'thumbnail': video_thumbnail, - 'description': video_description, - 'duration': video_duration, 'formats': formats, + 'upload_date': video_upload_date, + 'description': video_description, 'webpage_url': url, 'view_count': view_count, 'like_count': like_count, 'comment_count': comment_count, - 'subtitles': subtitles, - } + }) + + return info_dict class VimeoOndemandIE(VimeoBaseInfoExtractor): @@ -692,7 +706,7 @@ class VimeoGroupsIE(VimeoAlbumIE): return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name) -class VimeoReviewIE(InfoExtractor): +class VimeoReviewIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:review' IE_DESC = 'Review pages on vimeo' _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)' @@ -704,6 +718,7 @@ class VimeoReviewIE(InfoExtractor): 'ext': 'mp4', 'title': "DICK HARDWICK 'Comedian'", 'uploader': 'Richard Hardwick', + 'uploader_id': 'user21297594', } }, { 'note': 'video player needs Referer', @@ -716,14 +731,18 @@ class VimeoReviewIE(InfoExtractor): 'uploader': 'DevWeek Events', 'duration': 2773, 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader_id': 'user22258446', } }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - player_url = 'https://player.vimeo.com/player/' + video_id - return self.url_result(player_url, 'Vimeo', video_id) + video_id = self._match_id(url) + config = self._download_json( + 'https://player.vimeo.com/video/%s/config' % video_id, video_id) + info_dict = self._parse_config(config, video_id) + self._vimeo_sort_formats(info_dict['formats']) + info_dict['id'] = video_id + return info_dict class VimeoWatchLaterIE(VimeoChannelIE): From 94e5d6aedb5b509601d29dd8ea352afa925d3b22 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 21:49:01 +0800 Subject: [PATCH 0772/3599] [viki] Skip a geo-restricted test --- youtube_dl/extractor/viki.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index e04b814c8..0c0cd622a 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -156,7 +156,8 @@ class VikiIE(VikiBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, + 'skip': 'Blocked in the US', }, { # episode 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', From c83b35d4aa4cec98ac171cca94ec515500076926 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 22:39:13 +0800 Subject: [PATCH 0773/3599] [viki] Update _TESTS --- youtube_dl/extractor/viki.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 0c0cd622a..70ce5de0e 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -127,7 +127,7 @@ class VikiIE(VikiBaseIE): }, { # clip 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference', - 'md5': '86c0b5dbd4d83a6611a79987cc7a1989', + 'md5': 'feea2b1d7b3957f70886e6dfd8b8be84', 'info_dict': { 'id': '1067139v', 'ext': 'mp4', @@ -161,13 +161,13 @@ class VikiIE(VikiBaseIE): }, { # episode 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', - 'md5': '190f3ef426005ba3a080a63325955bc3', + 'md5': '1f54697dabc8f13f31bf06bb2e4de6db', 'info_dict': { 'id': '44699v', 'ext': 'mp4', 'title': 'Boys Over Flowers - Episode 1', - 'description': 'md5:52617e4f729c7d03bfd4bcbbb6e946f2', - 'duration': 4155, + 'description': 'md5:b89cf50038b480b88b5b3c93589a9076', + 'duration': 4204, 'timestamp': 1270496524, 'upload_date': '20100405', 'uploader': 'group8', @@ -197,7 +197,7 @@ class VikiIE(VikiBaseIE): }, { # non-English description 'url': 'http://www.viki.com/videos/158036v-love-in-magic', - 'md5': '1713ae35df5a521b31f6dc40730e7c9c', + 'md5': '013dc282714e22acf9447cad14ff1208', 'info_dict': { 'id': '158036v', 'ext': 'mp4', @@ -303,7 +303,7 @@ class VikiChannelIE(VikiBaseIE): 'title': 'Boys Over Flowers', 'description': 'md5:ecd3cff47967fe193cff37c0bec52790', }, - 'playlist_count': 70, + 'playlist_mincount': 71, }, { 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete', 'info_dict': { From 6d28c408cfb0ce42f591cc6e2bb67522c0812c72 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 11 Jun 2016 23:00:44 +0800 Subject: [PATCH 0774/3599] [viki] Do not use a fallback language for title in the first try In test_Viki_3, 'titles' gives a Hebrew title. --- youtube_dl/extractor/viki.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 70ce5de0e..efa15e0b6 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -101,10 +101,13 @@ class VikiBaseIE(InfoExtractor): self.report_warning('Unable to get session token, login has probably failed') @staticmethod - def dict_selection(dict_obj, preferred_key): + def dict_selection(dict_obj, preferred_key, allow_fallback=True): if preferred_key in dict_obj: return dict_obj.get(preferred_key) + if not allow_fallback: + return + filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()])) return filtered_dict[0] if filtered_dict else None @@ -218,7 +221,7 @@ class VikiIE(VikiBaseIE): self._check_errors(video) - title = self.dict_selection(video.get('titles', {}), 'en') + title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False) if not title: title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id container_titles = video.get('container', {}).get('titles', {}) From 80ae228b344ce36a07fb91c7e968fc5249c03161 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 01:57:23 +0700 Subject: [PATCH 0775/3599] [matchtv] Modernize --- youtube_dl/extractor/matchtv.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/matchtv.py b/youtube_dl/extractor/matchtv.py index 80a0d7013..33b0b539f 100644 --- a/youtube_dl/extractor/matchtv.py +++ b/youtube_dl/extractor/matchtv.py @@ -4,16 +4,12 @@ from __future__ import unicode_literals import random from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode -from ..utils import ( - sanitized_Request, - xpath_text, -) +from ..utils import xpath_text class MatchTVIE(InfoExtractor): - _VALID_URL = r'https?://matchtv\.ru/?#live-player' - _TEST = { + _VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)' + _TESTS = [{ 'url': 'http://matchtv.ru/#live-player', 'info_dict': { 'id': 'matchtv-live', @@ -24,12 +20,16 @@ class MatchTVIE(InfoExtractor): 'params': { 'skip_download': True, }, - } + }, { + 'url': 'http://matchtv.ru/on-air/', + 'only_matching': True, + }] def _real_extract(self, url): video_id = 'matchtv-live' - request = sanitized_Request( - 'http://player.matchtv.ntvplus.tv/player/smil?%s' % compat_urllib_parse_urlencode({ + video_url = self._download_json( + 'http://player.matchtv.ntvplus.tv/player/smil', video_id, + query={ 'ts': '', 'quality': 'SD', 'contentId': '561d2c0df7159b37178b4567', @@ -40,11 +40,10 @@ class MatchTVIE(InfoExtractor): 'contentType': 'channel', 'timeShift': '0', 'platform': 'portal', - }), + }, headers={ 'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf', - }) - video_url = self._download_json(request, video_id)['data']['videoUrl'] + })['data']['videoUrl'] f4m_url = xpath_text(self._download_xml(video_url, video_id), './to') formats = self._extract_f4m_formats(f4m_url, video_id) self._sort_formats(formats) From 2c3322e36ef23eb0566b820dd8e8711de20ed963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 04:49:37 +0700 Subject: [PATCH 0776/3599] [youporn] Fix metadata extraction --- youtube_dl/extractor/youporn.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 1124fe6c2..0df2d76ee 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -17,7 +17,7 @@ class YouPornIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?youporn\.com/watch/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' _TESTS = [{ 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', - 'md5': '71ec5fcfddacf80f495efa8b6a8d9a89', + 'md5': '3744d24c50438cf5b6f6d59feb5055c2', 'info_dict': { 'id': '505835', 'display_id': 'sex-ed-is-it-safe-to-masturbate-daily', @@ -121,21 +121,21 @@ class YouPornIE(InfoExtractor): webpage, 'thumbnail', fatal=False, group='thumbnail') uploader = self._html_search_regex( - r'(?s)<div[^>]+class=["\']videoInfoBy(?:\s+[^"\']+)?["\'][^>]*>\s*By:\s*</div>(.+?)</(?:a|div)>', + r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', webpage, 'uploader', fatal=False) upload_date = unified_strdate(self._html_search_regex( - r'(?s)<div[^>]+class=["\']videoInfoTime["\'][^>]*>(.+?)</div>', + r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>', webpage, 'upload date', fatal=False)) age_limit = self._rta_search(webpage) average_rating = int_or_none(self._search_regex( - r'<div[^>]+class=["\']videoInfoRating["\'][^>]*>\s*<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>', + r'<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>', webpage, 'average rating', fatal=False)) view_count = str_to_int(self._search_regex( - r'(?s)<div[^>]+class=["\']videoInfoViews["\'][^>]*>.*?([\d,.]+)\s*</div>', - webpage, 'view count', fatal=False)) + r'(?s)<div[^>]+class=(["\']).*?\bvideoInfoViews\b.*?\1[^>]*>.*?(?P<count>[\d,.]+)<', + webpage, 'view count', fatal=False, group='count')) comment_count = str_to_int(self._search_regex( r'>All [Cc]omments? \(([\d,.]+)\)', webpage, 'comment count', fatal=False)) From 329ca3bef695bff011ed9b2d5f03e1331bf5bf0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 06:05:34 +0700 Subject: [PATCH 0777/3599] [utils] Add try_get To reduce boilerplate when accessing JSON --- youtube_dl/utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0acbd67de..c8308ba3a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1901,6 +1901,16 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True): return d.get(key_or_keys, default) +def try_get(src, getter, expected_type=None): + try: + v = getter(src) + except (AttributeError, KeyError, TypeError, IndexError): + pass + else: + if expected_type is None or isinstance(v, expected_type): + return v + + def encode_compat_str(string, encoding=preferredencoding(), errors='strict'): return string if isinstance(string, compat_str) else compat_str(string, encoding, errors) From 98960c911c9bacc0c366dd11b194963a82606850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 06:06:04 +0700 Subject: [PATCH 0778/3599] [instagram] Extract metadata from JSON --- youtube_dl/extractor/instagram.py | 72 ++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 3cbe77ad8..fc0197ae1 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -8,6 +8,7 @@ from ..utils import ( int_or_none, limit_length, lowercase_escape, + try_get, ) @@ -19,10 +20,16 @@ class InstagramIE(InfoExtractor): 'info_dict': { 'id': 'aye83DjauH', 'ext': 'mp4', - 'uploader_id': 'naomipq', 'title': 'Video by naomipq', 'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8', - } + 'thumbnail': 're:^https?://.*\.jpg', + 'timestamp': 1371748545, + 'upload_date': '20130620', + 'uploader_id': 'naomipq', + 'uploader': 'Naomi Leonor Phan-Quang', + 'like_count': int, + 'comment_count': int, + }, }, { # missing description 'url': 'https://www.instagram.com/p/BA-pQFBG8HZ/?taken-by=britneyspears', @@ -31,6 +38,13 @@ class InstagramIE(InfoExtractor): 'ext': 'mp4', 'uploader_id': 'britneyspears', 'title': 'Video by britneyspears', + 'thumbnail': 're:^https?://.*\.jpg', + 'timestamp': 1453760977, + 'upload_date': '20160125', + 'uploader_id': 'britneyspears', + 'uploader': 'Britney Spears', + 'like_count': int, + 'comment_count': int, }, 'params': { 'skip_download': True, @@ -67,21 +81,57 @@ class InstagramIE(InfoExtractor): url = mobj.group('url') webpage = self._download_webpage(url, video_id) - uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', - webpage, 'uploader id', fatal=False) - desc = self._search_regex( - r'"caption":"(.+?)"', webpage, 'description', default=None) - if desc is not None: - desc = lowercase_escape(desc) + + (video_url, description, thumbnail, timestamp, uploader, + uploader_id, like_count, comment_count) = [None] * 8 + + shared_data = self._parse_json( + self._search_regex( + r'window\._sharedData\s*=\s*({.+?});', + webpage, 'shared data', default='{}'), + video_id, fatal=False) + if shared_data: + media = try_get( + shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict) + if media: + video_url = media.get('video_url') + description = media.get('caption') + thumbnail = media.get('display_src') + timestamp = int_or_none(media.get('date')) + uploader = media.get('owner', {}).get('full_name') + uploader_id = media.get('owner', {}).get('username') + like_count = int_or_none(media.get('likes', {}).get('count')) + comment_count = int_or_none(media.get('comments', {}).get('count')) + + if not video_url: + video_url = self._og_search_video_url(webpage, secure=False) + + if not uploader_id: + uploader_id = self._search_regex( + r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', + webpage, 'uploader id', fatal=False) + + if not description: + description = self._search_regex( + r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None) + if description is not None: + description = lowercase_escape(description) + + if not thumbnail: + thumbnail = self._og_search_thumbnail(webpage) return { 'id': video_id, - 'url': self._og_search_video_url(webpage, secure=False), + 'url': video_url, 'ext': 'mp4', 'title': 'Video by %s' % uploader_id, - 'thumbnail': self._og_search_thumbnail(webpage), + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, 'uploader_id': uploader_id, - 'description': desc, + 'uploader': uploader, + 'like_count': like_count, + 'comment_count': comment_count, } From a936ac321c5c0cee8e9769334945e744cdc60ae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 06:39:31 +0700 Subject: [PATCH 0779/3599] [README.md] Document using output template in batch files (Closes #9717) --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 3ff33c156..43e5114ea 100644 --- a/README.md +++ b/README.md @@ -553,6 +553,10 @@ The current default template is `%(title)s-%(id)s.%(ext)s`. In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: +#### Output template and Windows batch files + +If you are using output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`. + #### Output template examples Note on Windows you may need to use double quotes instead of single. From 4e790117292d060a3e449c9edfffe14d231aee96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 06:57:04 +0700 Subject: [PATCH 0780/3599] [nrktv] Fix tests --- youtube_dl/extractor/nrk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 486e086bb..4a790da7b 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -163,7 +163,7 @@ class NRKTVIE(NRKBaseIE): 'ext': 'mp4', 'title': '20 spørsmål 23.05.2014', 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', - 'duration': 1741.52, + 'duration': 1741, }, }, { 'url': 'https://tv.nrk.no/program/mdfp15000514', @@ -173,7 +173,7 @@ class NRKTVIE(NRKBaseIE): 'ext': 'mp4', 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014', 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db', - 'duration': 4605.08, + 'duration': 4605, }, }, { # single playlist video From 971e3b7520563936f6e6946f5c08d64f65ab6f42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 07:20:37 +0700 Subject: [PATCH 0781/3599] [nrk:skole] Fix extraction --- youtube_dl/extractor/nrk.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 4a790da7b..6ded5bd45 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -260,30 +260,34 @@ class NRKPlaylistIE(InfoExtractor): class NRKSkoleIE(InfoExtractor): IE_DESC = 'NRK Skole' - _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/klippdetalj?.*\btopic=(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)' _TESTS = [{ - 'url': 'http://nrk.no/skole/klippdetalj?topic=nrk:klipp/616532', - 'md5': '04cd85877cc1913bce73c5d28a47e00f', + 'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099', + 'md5': '6bc936b01f9dd8ed45bc58b252b2d9b6', 'info_dict': { 'id': '6021', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Genetikk og eneggede tvillinger', 'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d', 'duration': 399, }, }, { - 'url': 'http://www.nrk.no/skole/klippdetalj?topic=nrk%3Aklipp%2F616532#embed', - 'only_matching': True, - }, { - 'url': 'http://www.nrk.no/skole/klippdetalj?topic=urn:x-mediadb:21379', + 'url': 'https://www.nrk.no/skole/?page=objectives&subject=naturfag&objective=K15114&mediaId=19355', 'only_matching': True, }] def _real_extract(self, url): - video_id = compat_urllib_parse_unquote(self._match_id(url)) + video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'https://mimir.nrk.no/plugin/1.0/static?mediaId=%s' % video_id, + video_id) + + nrk_id = self._parse_json( + self._search_regex( + r'<script[^>]+type=["\']application/json["\'][^>]*>({.+?})</script>', + webpage, 'application json'), + video_id)['activeMedia']['psId'] - nrk_id = self._search_regex(r'data-nrk-id=["\'](\d+)', webpage, 'nrk id') return self.url_result('nrk:%s' % nrk_id) From 84dcd1c4e47f2a5a84a4658f42c66f7546588001 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 11:08:39 +0700 Subject: [PATCH 0782/3599] [streamcloud] Detect removed videos (Closes #3768) --- youtube_dl/extractor/streamcloud.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py index 712359885..58560ec64 100644 --- a/youtube_dl/extractor/streamcloud.py +++ b/youtube_dl/extractor/streamcloud.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, sanitized_Request, urlencode_postdata, ) @@ -14,7 +15,7 @@ class StreamcloudIE(InfoExtractor): IE_NAME = 'streamcloud.eu' _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?' - _TEST = { + _TESTS = [{ 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html', 'md5': '6bea4c7fa5daaacc2a946b7146286686', 'info_dict': { @@ -23,7 +24,10 @@ class StreamcloudIE(InfoExtractor): 'title': 'youtube-dl test video \'/\\ ä ↭', }, 'skip': 'Only available from the EU' - } + }, { + 'url': 'http://streamcloud.eu/ua8cmfh1nbe6/NSHIP-148--KUC-NG--H264-.mp4.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -31,6 +35,10 @@ class StreamcloudIE(InfoExtractor): orig_webpage = self._download_webpage(url, video_id) + if '>File Not Found<' in orig_webpage: + raise ExtractorError( + 'Video %s does not exist' % video_id, expected=True) + fields = re.findall(r'''(?x)<input\s+ type="(?:hidden|submit)"\s+ name="([^"]+)"\s+ From 77a9a9c295c753c4de4c96def6a9a15de1025f0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 12:06:48 +0700 Subject: [PATCH 0783/3599] release 2016.06.12 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a46b75fd8..243f2de5d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.11.3*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.11.3** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.12** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.11.3 +[debug] youtube-dl version 2016.06.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f89c2d1f2..e8c0a5d24 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -44,8 +44,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek** - **ARD:mediathek**: Saarländischer Rundfunk + - **ARD:mediathek** - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -647,6 +647,7 @@ - **Telegraaf** - **TeleMB** - **TeleTask** + - **Telewebion** - **TF1** - **TheIntercept** - **ThePlatform** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9932b1e62..5e9c14398 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.11.3' +__version__ = '2016.06.12' From e69f9f5d68aed32cc27ca188b0f51925d949c365 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Jun 2016 16:45:07 +0700 Subject: [PATCH 0784/3599] [downloader/external] Decode error string before writing to stderr --- youtube_dl/downloader/external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 3ff1f9ed4..fae245024 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -85,7 +85,7 @@ class ExternalFD(FileDownloader): cmd, stderr=subprocess.PIPE) _, stderr = p.communicate() if p.returncode != 0: - self.to_stderr(stderr) + self.to_stderr(stderr.decode('utf-8', 'replace')) return p.returncode From bccdac68749e7a39a47dd0e1ad0ec9c177657de6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 01:11:04 +0700 Subject: [PATCH 0785/3599] [xfileshare:xvidstage] Add support for videos with packed codes (Closes #4335) --- youtube_dl/extractor/xfileshare.py | 31 ++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index ee4d04c20..fe0ab6300 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -5,8 +5,10 @@ import re from .common import InfoExtractor from ..utils import ( + decode_packed_codes, ExtractorError, int_or_none, + NO_DEFAULT, sanitized_Request, urlencode_postdata, ) @@ -23,6 +25,7 @@ class XFileShareIE(InfoExtractor): ('thevideobee.to', 'TheVideoBee'), ('vidto.me', 'Vidto'), ('streamin.to', 'Streamin.To'), + ('xvidstage.com', 'XVIDSTAGE'), ) IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) @@ -78,6 +81,13 @@ class XFileShareIE(InfoExtractor): 'ext': 'mp4', 'title': 'Big Buck Bunny trailer', }, + }, { + 'url': 'http://xvidstage.com/e0qcnl03co6z', + 'info_dict': { + 'id': 'e0qcnl03co6z', + 'ext': 'mp4', + 'title': 'Chucky Prank 2015.mp4', + }, }] def _real_extract(self, url): @@ -113,10 +123,23 @@ class XFileShareIE(InfoExtractor): r'>Watch (.+) ', r'<h2 class="video-page-head">([^<]+)</h2>'], webpage, 'title', default=None) or self._og_search_title(webpage)).strip() - video_url = self._search_regex( - [r'file\s*:\s*["\'](http[^"\']+)["\'],', - r'file_link\s*=\s*\'(https?:\/\/[0-9a-zA-z.\/\-_]+)'], - webpage, 'file url') + + def extract_video_url(default=NO_DEFAULT): + return self._search_regex( + (r'file\s*:\s*(["\'])(?P<url>http.+?)\1,', + r'file_link\s*=\s*(["\'])(?P<url>http.+?)\1', + r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http.+?)\2\)', + r'<embed[^>]+src=(["\'])(?P<url>http.+?)\1'), + webpage, 'file url', default=default, group='url') + + video_url = extract_video_url(default=None) + + if not video_url: + webpage = decode_packed_codes(self._search_regex( + r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))", + webpage, 'packed code')) + video_url = extract_video_url() + thumbnail = self._search_regex( r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None) From cf2bf840bac1742cb422549a5491a30f70d1abb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 01:11:14 +0700 Subject: [PATCH 0786/3599] [xfileshare] Fix test --- youtube_dl/extractor/xfileshare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index fe0ab6300..0f8ccf430 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -39,7 +39,7 @@ class XFileShareIE(InfoExtractor): 'md5': '5ae4a3580620380619678ee4875893ba', 'info_dict': { 'id': '06y9juieqpmi', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ', 'thumbnail': 're:http://.*\.jpg', }, From 33b72ce64e8705a71f8ab0e6a322e5f9f3b99276 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 01:19:54 +0700 Subject: [PATCH 0787/3599] [xfileshare] Improve removed videos detection --- youtube_dl/extractor/xfileshare.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 0f8ccf430..995aada0d 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -32,7 +32,10 @@ class XFileShareIE(InfoExtractor): _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' % '|'.join(re.escape(site) for site in list(zip(*_SITES))[0])) - _FILE_NOT_FOUND_REGEX = r'>(?:404 - )?File Not Found<' + _FILE_NOT_FOUND_REGEXES = ( + r'>(?:404 - )?File Not Found<', + r'>The file was removed by administrator<', + ) _TESTS = [{ 'url': 'http://gorillavid.in/06y9juieqpmi', @@ -88,6 +91,10 @@ class XFileShareIE(InfoExtractor): 'ext': 'mp4', 'title': 'Chucky Prank 2015.mp4', }, + }, { + # removed by administrator + 'url': 'http://xvidstage.com/amfy7atlkx25', + 'only_matching': True, }] def _real_extract(self, url): @@ -97,7 +104,7 @@ class XFileShareIE(InfoExtractor): url = 'http://%s/%s' % (mobj.group('host'), video_id) webpage = self._download_webpage(url, video_id) - if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None: + if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES): raise ExtractorError('Video %s does not exist' % video_id, expected=True) fields = self._hidden_inputs(webpage) From b50e02c1e4c9ea70e88ab115b17cfa109b0c9617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 07:05:32 +0700 Subject: [PATCH 0788/3599] [README.md] Update links to options available for YoutubeDL --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 43e5114ea..b5cbaced7 100644 --- a/README.md +++ b/README.md @@ -964,7 +964,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of what can be done, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L121-L269). For a start, if you want to intercept youtube-dl's output, set a `logger` object. +Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L128-L278). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: From b4663f12b1c872f4e731f1940831ec017bc86959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 07:16:35 +0700 Subject: [PATCH 0789/3599] [README.md] Update links to info dict metafields --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b5cbaced7..5a9768161 100644 --- a/README.md +++ b/README.md @@ -935,8 +935,8 @@ After you have ensured this site is distributing it's content legally, you can f ``` 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want. -8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. +8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. 9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: From 79cd8b3d8acee7845260d5bd60698155a0d81d33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Jun 2016 10:04:04 +0700 Subject: [PATCH 0790/3599] [README.md] Suggest checking extractor code under all Python versions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a9768161..f1e59542d 100644 --- a/README.md +++ b/README.md @@ -937,7 +937,7 @@ After you have ensured this site is distributing it's content legally, you can f 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. -9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). +9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py From 778f96944785f814a97964be1d6fb3bb78bc13f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 00:06:31 +0700 Subject: [PATCH 0791/3599] [twitch:clips] Add extractor (Closes #9767) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/twitch.py | 43 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 36ddc1f73..d2db4d803 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -862,6 +862,7 @@ from .twitch import ( TwitchProfileIE, TwitchPastBroadcastsIE, TwitchStreamIE, + TwitchClipsIE, ) from .twitter import ( TwitterCardIE, diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index d898f14c3..20919774d 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -16,6 +16,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + js_to_json, orderedSet, parse_duration, parse_iso8601, @@ -454,3 +455,45 @@ class TwitchStreamIE(TwitchBaseIE): 'formats': formats, 'is_live': True, } + + +class TwitchClipsIE(InfoExtractor): + IE_NAME = 'twitch:clips' + _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' + + _TEST = { + 'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound', + 'md5': '761769e1eafce0ffebfb4089cb3847cd', + 'info_dict': { + 'id': 'AggressiveCobraPoooound', + 'ext': 'mp4', + 'title': 'EA Play 2016 Live from the Novo Theatre', + 'thumbnail': 're:^https?://.*\.jpg', + 'creator': 'EA', + 'uploader': 'stereotype_', + 'uploader_id': 'stereotype_', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + clip = self._parse_json( + self._search_regex( + r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'), + video_id, transform_source=js_to_json) + + video_url = clip['clip_video_url'] + title = clip['channel_title'] + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage), + 'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'), + 'uploader': clip.get('curator_login'), + 'uploader_id': clip.get('curator_display_name'), + } From 14d0f4e0f3e1b6a467b6302eb60644535aff4292 Mon Sep 17 00:00:00 2001 From: Dracony <draconyster@gmail.com> Date: Thu, 9 Jun 2016 13:31:22 +0200 Subject: [PATCH 0792/3599] Added extractor for rockstargames.com --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/rockstargames.py | 54 +++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 youtube_dl/extractor/rockstargames.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d2db4d803..8a6c54b97 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -649,6 +649,7 @@ from .revision3 import ( from .rice import RICEIE from .ringtv import RingTVIE from .ro220 import Ro220IE +from .rockstargames import RockstarGamesIE from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE from .rtbf import RTBFIE diff --git a/youtube_dl/extractor/rockstargames.py b/youtube_dl/extractor/rockstargames.py new file mode 100644 index 000000000..427ab153a --- /dev/null +++ b/youtube_dl/extractor/rockstargames.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + qualities, + parse_iso8601 +) + + +class RockstarGamesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos/video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://www.rockstargames.com/videos/video/11544/', + 'md5': '03b5caa6e357a4bd50e3143fc03e5733', + 'info_dict': { + 'id': '11544', + 'ext': 'mp4', + 'title': 'Further Adventures in Finance and Felony Trailer', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3', + 'upload_date': '20160602', + 'timestamp': 1464876000 + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json( + 'https://www.rockstargames.com/videoplayer/videos/get-video.json?id=%s&locale=en_us' % video_id, + video_id + )['video'] + + formats = [] + + for video in json_data['files_processed']['video/mp4']: + if not video.get('src'): + continue + height = video.get('resolution', '').replace('p', '') + + formats.append({ + 'url': self._proto_relative_url(video['src']), + 'height': int(height) if height.isdigit() else -1, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': json_data['title'], + 'description': json_data.get('description'), + 'formats': formats, + 'thumbnail': self._proto_relative_url(json_data.get('screencap')), + 'timestamp': parse_iso8601(json_data.get('created')) + } From 16b6bd01d238c2c58e3ac7ba91c706261d5810e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 01:11:24 +0700 Subject: [PATCH 0793/3599] [rockstargames] Improve and add Youtube fallback (Closes #9737) --- youtube_dl/extractor/rockstargames.py | 55 +++++++++++++++++---------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/rockstargames.py b/youtube_dl/extractor/rockstargames.py index 427ab153a..48128e219 100644 --- a/youtube_dl/extractor/rockstargames.py +++ b/youtube_dl/extractor/rockstargames.py @@ -3,52 +3,67 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - qualities, - parse_iso8601 + int_or_none, + parse_iso8601, ) class RockstarGamesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos/video/(?P<id>[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P<id>\d+)' + _TESTS = [{ 'url': 'https://www.rockstargames.com/videos/video/11544/', 'md5': '03b5caa6e357a4bd50e3143fc03e5733', 'info_dict': { 'id': '11544', 'ext': 'mp4', 'title': 'Further Adventures in Finance and Felony Trailer', - 'thumbnail': 're:^https?://.*\.jpg$', 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1464876000, 'upload_date': '20160602', - 'timestamp': 1464876000 } - } + }, { + 'url': 'http://www.rockstargames.com/videos#/?video=48', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - json_data = self._download_json( - 'https://www.rockstargames.com/videoplayer/videos/get-video.json?id=%s&locale=en_us' % video_id, - video_id - )['video'] + + video = self._download_json( + 'https://www.rockstargames.com/videoplayer/videos/get-video.json', + video_id, query={ + 'id': video_id, + 'locale': 'en_us', + })['video'] + + title = video['title'] formats = [] - - for video in json_data['files_processed']['video/mp4']: + for video in video['files_processed']['video/mp4']: if not video.get('src'): continue - height = video.get('resolution', '').replace('p', '') - + resolution = video.get('resolution') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', resolution or '', 'height', default=None)) formats.append({ 'url': self._proto_relative_url(video['src']), - 'height': int(height) if height.isdigit() else -1, + 'format_id': resolution, + 'height': height, }) + + if not formats: + youtube_id = video.get('youtube_id') + if youtube_id: + return self.url_result(youtube_id, 'Youtube') + self._sort_formats(formats) return { 'id': video_id, - 'title': json_data['title'], - 'description': json_data.get('description'), + 'title': title, + 'description': video.get('description'), + 'thumbnail': self._proto_relative_url(video.get('screencap')), + 'timestamp': parse_iso8601(video.get('created')), 'formats': formats, - 'thumbnail': self._proto_relative_url(json_data.get('screencap')), - 'timestamp': parse_iso8601(json_data.get('created')) } From fea55ef4a95d226668bd63742c4731832de93a79 Mon Sep 17 00:00:00 2001 From: venth <artur.krysiak.warszawa@gmail.com> Date: Sun, 8 May 2016 22:26:08 +0200 Subject: [PATCH 0794/3599] [wrzuta.pl:playlist] Added playlist extraction from wrzuta.pl --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/wrzuta.py | 74 ++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8a6c54b97..5a93fec6a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -981,6 +981,7 @@ from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE from .wrzuta import WrzutaIE +from .wrzuta import WrzutaPlaylistIE from .wsj import WSJIE from .xbef import XBefIE from .xboxclips import XboxClipsIE diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index c42764921..830649214 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -80,3 +80,77 @@ class WrzutaIE(InfoExtractor): 'description': self._og_search_description(webpage), 'age_limit': embedpage.get('minimalAge', 0), } + + +_ENTRY_PATTERN = r'<a href="(?P<playlist_entry_url>[^"]+)" target="_blank" class="playlist\-file\-page">' +_PLAYLIST_SIZE_PATTERN = r'<div class="playlist-counter">[0-9]+/([0-9]+)</div>' + + +class WrzutaPlaylistIE(InfoExtractor): + """ + this class covers extraction of wrzuta playlist entries + the extraction process bases on following steps: + * collect information of playlist size + * download all entries provided on + the playlist webpage (the playlist is split + on two pages: first directly reached from webpage + second: downloaded on demand by ajax call and rendered + using the ajax call response) + * in case size of extracted entries not reached total number of entries + use the ajax call to collect the remaining entries + """ + + IE_NAME = 'wrzuta.pl:playlist' + + _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/' \ + '(?P<id>[0-9a-zA-Z]+)/.*' + + _TESTS = [{ + 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza', + 'playlist_mincount': 14, + 'info_dict': { + 'id': '7XfO4vE84iR', + 'title': 'Moja muza', + }, + }, { + 'url': 'http://heroesf70.wrzuta.pl/playlista/6Nj3wQHx756/lipiec_-_lato_2015_muzyka_swiata', + 'playlist_mincount': 144, + 'info_dict': { + 'id': '6Nj3wQHx756', + 'title': 'Lipiec - Lato 2015 Muzyka Świata', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + uploader = mobj.group('uploader') + + entries = [] + + webpage = self._download_webpage(url, playlist_id) + + playlist_size = self._html_search_regex(_PLAYLIST_SIZE_PATTERN, webpage, 'Size of the playlist') + playlist_size = int(playlist_size) if playlist_size else 0 + + playlist_title = self._og_search_title(webpage).replace('Playlista: ', '', 1) + + if playlist_size: + entries = list(map( + lambda entry_url: self.url_result(entry_url), + re.findall(_ENTRY_PATTERN, webpage) + )) + + if playlist_size > len(entries): + playlist_content = self._download_json( + 'http://{uploader_id}.wrzuta.pl/xhr/get_playlist_offset/{playlist_id}'.format( + uploader_id=uploader, + playlist_id=playlist_id, + ), + playlist_id, + 'Downloading playlist content as JSON metadata', + 'Unable to download playlist content as JSON metadata', + ) + entries += [self.url_result(entry['filelink']) for entry in playlist_content['files']] + + return self.playlist_result(entries, playlist_id, playlist_title) From 1759672eede27be0a3d473c4b2925a0b10dce547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 02:13:54 +0700 Subject: [PATCH 0795/3599] [wrzuta:playlist] Improve and simplify (Closes #9341) --- youtube_dl/extractor/extractors.py | 6 ++-- youtube_dl/extractor/wrzuta.py | 49 ++++++++++++++---------------- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5a93fec6a..5fce9f47a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -980,8 +980,10 @@ from .weiqitv import WeiqiTVIE from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE -from .wrzuta import WrzutaIE -from .wrzuta import WrzutaPlaylistIE +from .wrzuta import ( + WrzutaIE, + WrzutaPlaylistIE, +) from .wsj import WSJIE from .xbef import XBefIE from .xboxclips import XboxClipsIE diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index 830649214..b811f57fb 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( int_or_none, qualities, + remove_start, ) @@ -82,10 +83,6 @@ class WrzutaIE(InfoExtractor): } -_ENTRY_PATTERN = r'<a href="(?P<playlist_entry_url>[^"]+)" target="_blank" class="playlist\-file\-page">' -_PLAYLIST_SIZE_PATTERN = r'<div class="playlist-counter">[0-9]+/([0-9]+)</div>' - - class WrzutaPlaylistIE(InfoExtractor): """ this class covers extraction of wrzuta playlist entries @@ -101,10 +98,7 @@ class WrzutaPlaylistIE(InfoExtractor): """ IE_NAME = 'wrzuta.pl:playlist' - - _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/' \ - '(?P<id>[0-9a-zA-Z]+)/.*' - + _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/(?P<id>[0-9a-zA-Z]+)' _TESTS = [{ 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza', 'playlist_mincount': 14, @@ -119,6 +113,9 @@ class WrzutaPlaylistIE(InfoExtractor): 'id': '6Nj3wQHx756', 'title': 'Lipiec - Lato 2015 Muzyka Świata', }, + }, { + 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR', + 'only_matching': True, }] def _real_extract(self, url): @@ -126,31 +123,31 @@ class WrzutaPlaylistIE(InfoExtractor): playlist_id = mobj.group('id') uploader = mobj.group('uploader') - entries = [] - webpage = self._download_webpage(url, playlist_id) - playlist_size = self._html_search_regex(_PLAYLIST_SIZE_PATTERN, webpage, 'Size of the playlist') - playlist_size = int(playlist_size) if playlist_size else 0 + playlist_size = int_or_none(self._html_search_regex( + (r'<div[^>]+class=["\']playlist-counter["\'][^>]*>\d+/(\d+)', + r'<div[^>]+class=["\']all-counter["\'][^>]*>(.+?)</div>'), + webpage, 'playlist size', default=None)) - playlist_title = self._og_search_title(webpage).replace('Playlista: ', '', 1) + playlist_title = remove_start( + self._og_search_title(webpage), 'Playlista: ') + entries = [] if playlist_size: - entries = list(map( - lambda entry_url: self.url_result(entry_url), - re.findall(_ENTRY_PATTERN, webpage) - )) - + entries = [ + self.url_result(entry_url) + for _, entry_url in re.findall( + r'<a[^>]+href=(["\'])(http.+?)\1[^>]+class=["\']playlist-file-page', + webpage)] if playlist_size > len(entries): playlist_content = self._download_json( - 'http://{uploader_id}.wrzuta.pl/xhr/get_playlist_offset/{playlist_id}'.format( - uploader_id=uploader, - playlist_id=playlist_id, - ), + 'http://%s.wrzuta.pl/xhr/get_playlist_offset/%s' % (uploader, playlist_id), playlist_id, - 'Downloading playlist content as JSON metadata', - 'Unable to download playlist content as JSON metadata', - ) - entries += [self.url_result(entry['filelink']) for entry in playlist_content['files']] + 'Downloading playlist JSON', + 'Unable to download playlist JSON') + entries.extend([ + self.url_result(entry['filelink']) + for entry in playlist_content.get('files', []) if entry.get('filelink')]) return self.playlist_result(entries, playlist_id, playlist_title) From bc2a871f3eb5f2fce7fc1097787e829106d11f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 02:15:09 +0700 Subject: [PATCH 0796/3599] Credit @dracony for rockstargames (#9737) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 3272fc6ea..4f77de3c7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -173,3 +173,4 @@ Kevin Deldycke inondle Tomáš Čech Déstin Reed +Roman Tsiupa From a4ea28eee6c89756ac5bddfd0c6ef11dd490a191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 02:15:47 +0700 Subject: [PATCH 0797/3599] Credit @venth for wrzuta:playlist (#9341) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 4f77de3c7..cdf655c39 100644 --- a/AUTHORS +++ b/AUTHORS @@ -174,3 +174,4 @@ inondle Tomáš Čech Déstin Reed Roman Tsiupa +Artur Krysiak From d01fb21d4c58650a3ccd2a6fe2877cc9a53dd942 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 02:19:42 +0700 Subject: [PATCH 0798/3599] release 2016.06.14 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 6 +++--- docs/supportedsites.md | 5 ++++- youtube_dl/version.py | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 243f2de5d..4c52c5933 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.12** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.14** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.12 +[debug] youtube-dl version 2016.06.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c83b8655a..a59fac9b2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -142,9 +142,9 @@ After you have ensured this site is distributing it's content legally, you can f ``` 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want. -8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. -9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. +8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. +9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e8c0a5d24..152552dee 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -535,6 +535,7 @@ - **revision3:embed** - **RICE** - **RingTV** + - **RockstarGames** - **RottenTomatoes** - **Roxwel** - **RTBF** @@ -699,6 +700,7 @@ - **TVPlay**: TV3Play and related services - **Tweakers** - **twitch:chapter** + - **twitch:clips** - **twitch:past_broadcasts** - **twitch:profile** - **twitch:stream** @@ -793,10 +795,11 @@ - **WNL** - **WorldStarHipHop** - **wrzuta.pl** + - **wrzuta.pl:playlist** - **WSJ**: Wall Street Journal - **XBef** - **XboxClips** - - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To + - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE - **XHamster** - **XHamsterEmbed** - **xiami:album**: 虾米音乐 - 专辑 diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5e9c14398..e441a5dc4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.12' +__version__ = '2016.06.14' From ff4af6ec5903dd922c7bb6ec632b11830c44d04a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 02:49:33 +0700 Subject: [PATCH 0799/3599] [lynda] Remove superfluous _NETRC_MACHINE --- youtube_dl/extractor/lynda.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 5b458d9bc..2d5040032 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -95,7 +95,6 @@ class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' IE_DESC = 'lynda.com videos' _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)' - _NETRC_MACHINE = 'lynda' _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' From 4cef70db6c3c3dfd4f45fdc5a85f98bef3ec67a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 03:16:56 +0700 Subject: [PATCH 0800/3599] [devscripts/release.sh] Add flag for gpg-sign commits --- devscripts/release.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 87e8eda50..f8d466ba8 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -15,6 +15,7 @@ set -e skip_tests=true +gpg_sign_commits="" buildserver='localhost:8142' while true @@ -24,6 +25,10 @@ case "$1" in skip_tests=false shift ;; + --gpg-sign-commits|-S) + gpg_sign_commits="-S" + shift + ;; --buildserver) buildserver="$2" shift 2 @@ -69,7 +74,7 @@ sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py /bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py -git commit -m "release $version" +git commit $gpg_sign_commits -m "release $version" /bin/echo -e "\n### Now tagging, signing and pushing..." git tag -s -m "Release $version" "$version" @@ -116,7 +121,7 @@ git clone --branch gh-pages --single-branch . build/gh-pages "$ROOT/devscripts/gh-pages/update-copyright.py" "$ROOT/devscripts/gh-pages/update-sites.py" git add *.html *.html.in update - git commit -m "release $version" + git commit $gpg_sign_commits -m "release $version" git push "$ROOT" gh-pages git push "$ORIGIN_URL" gh-pages ) From 6c3760292c9d20f891395111bea97f401270d86b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Jun 2016 04:57:59 +0700 Subject: [PATCH 0801/3599] [pornhub] Improve title extraction (Closes #9777) --- youtube_dl/extractor/pornhub.py | 39 +++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 407ea08d4..6d57e1d35 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import itertools @@ -39,7 +40,25 @@ class PornHubIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'age_limit': 18, - } + }, + }, { + # non-ASCII title + 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002', + 'info_dict': { + 'id': '1331683002', + 'ext': 'mp4', + 'title': '重庆婷婷女王足交', + 'uploader': 'cj397186295', + 'duration': 1753, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'only_matching': True, @@ -76,19 +95,25 @@ class PornHubIE(InfoExtractor): 'PornHub said: %s' % error_msg, expected=True, video_id=video_id) + # video_title from flashvars contains whitespace instead of non-ASCII (see + # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying + # on that anymore. + title = self._html_search_meta( + 'twitter:title', webpage, default=None) or self._search_regex( + (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)', + r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1', + r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'), + webpage, 'title', group='title') + flashvars = self._parse_json( self._search_regex( r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), video_id) if flashvars: - video_title = flashvars.get('video_title') thumbnail = flashvars.get('image_url') duration = int_or_none(flashvars.get('video_duration')) else: - video_title, thumbnail, duration = [None] * 3 - - if not video_title: - video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') + title, thumbnail, duration = [None] * 3 video_uploader = self._html_search_regex( r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', @@ -137,7 +162,7 @@ class PornHubIE(InfoExtractor): return { 'id': video_id, 'uploader': video_uploader, - 'title': video_title, + 'title': title, 'thumbnail': thumbnail, 'duration': duration, 'view_count': view_count, From 8bc4dbb1af1573f26685b0e609af319dcf34a48e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 14 Jun 2016 11:14:59 +0800 Subject: [PATCH 0802/3599] [wrzuta.pl] Detect error and update _TESTS --- youtube_dl/extractor/wrzuta.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index b811f57fb..bdd7097ba 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, qualities, remove_start, @@ -27,16 +28,17 @@ class WrzutaIE(InfoExtractor): 'uploader_id': 'laboratoriumdextera', 'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd', }, + 'skip': 'Redirected to wrzuta.pl', }, { - 'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty', - 'md5': 'bc78077859bea7bcfe4295d7d7fc9025', + 'url': 'http://vexling.wrzuta.pl/audio/01xBFabGXu6/james_horner_-_into_the_na_39_vi_world_bonus', + 'md5': 'f80564fb5a2ec6ec59705ae2bf2ba56d', 'info_dict': { - 'id': '063jOPX5ue2', - 'ext': 'ogg', - 'title': 'Liber & Natalia Szroeder - Teraz Ty', - 'duration': 203, - 'uploader_id': 'jolka85', - 'description': 'md5:2d2b6340f9188c8c4cd891580e481096', + 'id': '01xBFabGXu6', + 'ext': 'mp3', + 'title': 'James Horner - Into The Na\'vi World [Bonus]', + 'description': 'md5:30a70718b2cd9df3120fce4445b0263b', + 'duration': 95, + 'uploader_id': 'vexling', }, }] @@ -46,7 +48,10 @@ class WrzutaIE(InfoExtractor): typ = mobj.group('typ') uploader = mobj.group('uploader') - webpage = self._download_webpage(url, video_id) + webpage, urlh = self._download_webpage_handle(url, video_id) + + if urlh.geturl() == 'http://www.wrzuta.pl/': + raise ExtractorError('Video removed', expected=True) quality = qualities(['SD', 'MQ', 'HQ', 'HD']) From 4c93ee8d14dc081d413304d2d2eb694cb62cc71a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Jun 2016 22:34:55 +0700 Subject: [PATCH 0803/3599] [imdb] Improve _VALID_URL (Closes #9788) --- youtube_dl/extractor/imdb.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 3a2b7cec5..0acce9f4c 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -12,7 +12,7 @@ from ..utils import ( class ImdbIE(InfoExtractor): IE_NAME = 'imdb' IE_DESC = 'Internet Movie Database trailers' - _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P<id>\d+)' + _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.imdb.com/video/imdb/vi2524815897', @@ -25,6 +25,12 @@ class ImdbIE(InfoExtractor): }, { 'url': 'http://www.imdb.com/video/_/vi2524815897', 'only_matching': True, + }, { + 'url': 'http://www.imdb.com/title/tt1667889/?ref_=ext_shr_eml_vi#lb-vi2524815897', + 'only_matching': True, + }, { + 'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897', + 'only_matching': True, }] def _real_extract(self, url): From 47212f7bcbd59af40f91796562a6b72ba0439ac4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 16 Jun 2016 11:00:54 +0800 Subject: [PATCH 0804/3599] [utils] Don't transform numbers not starting with a zero Fix test_Viidea and maybe others --- test/test_utils.py | 3 +++ youtube_dl/utils.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 0e25de6b7..b7ef51f8d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -640,6 +640,9 @@ class TestUtil(unittest.TestCase): "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"} }''') + inp = '''{"foo":101}''' + self.assertEqual(js_to_json(inp), '''{"foo":101}''') + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c8308ba3a..82f67f6cd 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1970,7 +1970,7 @@ def js_to_json(code): '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| /\*.*?\*/|,(?=\s*[\]}])| [a-zA-Z_][.a-zA-Z_0-9]*| - (?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| + \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) ''', fix_kv, code) From 369ff750812ff874a0f4b4ceebb15a024e2f0a9d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 16 Jun 2016 12:26:45 +0800 Subject: [PATCH 0805/3599] [jwplatform] Improved JWPlayer support --- youtube_dl/extractor/jwplatform.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index fa6f335e1..e44e31104 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -12,9 +12,35 @@ from ..utils import ( class JWPlatformBaseIE(InfoExtractor): + @staticmethod + def _find_jwplayer_data(webpage): + # TODO: Merge this with JWPlayer-related codes in generic.py + + mobj = re.search( + 'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)', + webpage) + if mobj: + return mobj.group('options') + + def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): + jwplayer_data = self._parse_json( + self._find_jwplayer_data(webpage), video_id) + return self._parse_jwplayer_data( + jwplayer_data, video_id, *args, **kwargs) + def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None): + # JWPlayer backward compatibility: flattened playlists + # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 + if 'playlist' not in jwplayer_data: + jwplayer_data = {'playlist': [jwplayer_data]} + video_data = jwplayer_data['playlist'][0] + # JWPlayer backward compatibility: flattened sources + # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 + if 'sources' not in video_data: + video_data['sources'] = [video_data] + formats = [] for source in video_data['sources']: source_url = self._proto_relative_url(source['file']) From efbd1eb51a9e940d01a2c02fd02c3778dd88b14b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 16 Jun 2016 12:27:21 +0800 Subject: [PATCH 0806/3599] [wimp] Fix extraction and update _TESTS --- youtube_dl/extractor/wimp.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index 828c03dc3..54eb51427 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -1,29 +1,33 @@ from __future__ import unicode_literals -from .common import InfoExtractor from .youtube import YoutubeIE +from .jwplatform import JWPlatformBaseIE -class WimpIE(InfoExtractor): +class WimpIE(JWPlatformBaseIE): _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)' _TESTS = [{ - 'url': 'http://www.wimp.com/maruexhausted/', + 'url': 'http://www.wimp.com/maru-is-exhausted/', 'md5': 'ee21217ffd66d058e8b16be340b74883', 'info_dict': { - 'id': 'maruexhausted', + 'id': 'maru-is-exhausted', 'ext': 'mp4', 'title': 'Maru is exhausted.', 'description': 'md5:57e099e857c0a4ea312542b684a869b8', } }, { 'url': 'http://www.wimp.com/clowncar/', - 'md5': '4e2986c793694b55b37cf92521d12bb4', + 'md5': '5c31ad862a90dc5b1f023956faec13fe', 'info_dict': { - 'id': 'clowncar', + 'id': 'cG4CEr2aiSg', 'ext': 'webm', - 'title': 'It\'s like a clown car.', - 'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2', + 'title': 'Basset hound clown car...incredible!', + 'description': '5 of my Bassets crawled in this dog loo! www.bellinghambassets.com\n\nFor licensing/usage please contact: licensing(at)jukinmediadotcom', + 'upload_date': '20140303', + 'uploader': 'Gretchen Hoey', + 'uploader_id': 'gretchenandjeff1', }, + 'add_ie': ['Youtube'], }] def _real_extract(self, url): @@ -41,14 +45,13 @@ class WimpIE(InfoExtractor): 'ie_key': YoutubeIE.ie_key(), } - video_url = self._search_regex( - r'<video[^>]+>\s*<source[^>]+src=(["\'])(?P<url>.+?)\1', - webpage, 'video URL', group='url') + info_dict = self._extract_jwplayer_data( + webpage, video_id, require_title=False) - return { + info_dict.update({ 'id': video_id, - 'url': video_url, 'title': self._og_search_title(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), 'description': self._og_search_description(webpage), - } + }) + + return info_dict From 27e5fa8198a187c62cbe7275ebb474d3821bfaa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Jun 2016 22:33:12 +0700 Subject: [PATCH 0807/3599] [cda] Fix extraction (Closes #9803) --- youtube_dl/extractor/cda.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index 498d2c0d8..8af318703 100755 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -58,7 +58,8 @@ class CDAIE(InfoExtractor): def extract_format(page, version): unpacked = decode_packed_codes(page) format_url = self._search_regex( - r"url:\\'(.+?)\\'", unpacked, '%s url' % version, fatal=False) + r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked, + '%s url' % version, fatal=False, group='url') if not format_url: return f = { @@ -75,7 +76,8 @@ class CDAIE(InfoExtractor): info_dict['formats'].append(f) if not info_dict['duration']: info_dict['duration'] = parse_duration(self._search_regex( - r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False)) + r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1", + unpacked, 'duration', fatal=False, group='duration')) extract_format(webpage, 'default') From d2161cade56353e7bee2a0f3eeb2db9f095c2129 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Jun 2016 22:40:55 +0700 Subject: [PATCH 0808/3599] release 2016.06.16 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 4c52c5933..eeac09d5d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.14** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.16** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.14 +[debug] youtube-dl version 2016.06.16 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 152552dee..13315f4f4 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -44,8 +44,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek**: Saarländischer Rundfunk - **ARD:mediathek** + - **ARD:mediathek**: Saarländischer Rundfunk - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e441a5dc4..52619cae8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.14' +__version__ = '2016.06.16' From ded7511a700cb6962f8a5922b1e3b4ef480a4c81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Jun 2016 23:42:52 +0700 Subject: [PATCH 0809/3599] [bbccouk] Add support for playlists (Closes #9812) --- youtube_dl/extractor/bbc.py | 75 +++++++++++++++++++++++++++++- youtube_dl/extractor/extractors.py | 2 + 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 74c4510f9..de236fbde 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -31,7 +31,7 @@ class BBCCoUkIE(InfoExtractor): music/clips[/#]| radio/player/ ) - (?P<id>%s) + (?P<id>%s)(?!/(?:episodes|broadcasts|clips)) ''' % _ID_REGEX _MEDIASELECTOR_URLS = [ @@ -698,7 +698,9 @@ class BBCIE(BBCCoUkIE): @classmethod def suitable(cls, url): - return False if BBCCoUkIE.suitable(url) or BBCCoUkArticleIE.suitable(url) else super(BBCIE, cls).suitable(url) + EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE) + return (False if any(ie.suitable(url) for ie in EXCLUDE_IE) + else super(BBCIE, cls).suitable(url)) def _extract_from_media_meta(self, media_meta, video_id): # Direct links to media in media metadata (e.g. @@ -975,3 +977,72 @@ class BBCCoUkArticleIE(InfoExtractor): r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)] return self.playlist_result(entries, playlist_id, title, description) + + +class BBCCoUkPlaylistBaseIE(InfoExtractor): + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key()) + for video_id in re.findall( + self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)] + + title, description = self._extract_title_and_description(webpage) + + return self.playlist_result(entries, playlist_id, title, description) + + +class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): + IE_NAME = 'bbc.co.uk:iplayer:playlist' + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P<id>%s)' % BBCCoUkIE._ID_REGEX + _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s' + _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)' + _TEST = { + 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v', + 'info_dict': { + 'id': 'b05rcz9v', + 'title': 'The Disappearance', + 'description': 'French thriller serial about a missing teenager.', + }, + 'playlist_mincount': 6, + } + + def _extract_title_and_description(self, webpage): + title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False) + description = self._search_regex( + r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>', + webpage, 'description', fatal=False, group='value') + return title, description + + +class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE): + IE_NAME = 'bbc.co.uk:playlist' + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX + _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s' + _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)' + _TESTS = [{ + 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips', + 'info_dict': { + 'id': 'b05rcz9v', + 'title': 'The Disappearance - Clips - BBC Four', + 'description': 'French thriller serial about a missing teenager.', + }, + 'playlist_mincount': 7, + }, { + 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06', + 'only_matching': True, + }, { + 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips', + 'only_matching': True, + }, { + 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player', + 'only_matching': True, + }] + + def _extract_title_and_description(self, webpage): + title = self._og_search_title(webpage, fatal=False) + description = self._og_search_description(webpage) + return title, description diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5fce9f47a..ed51dfdaa 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -71,6 +71,8 @@ from .bandcamp import BandcampIE, BandcampAlbumIE from .bbc import ( BBCCoUkIE, BBCCoUkArticleIE, + BBCCoUkIPlayerPlaylistIE, + BBCCoUkPlaylistIE, BBCIE, ) from .beeg import BeegIE From eb451890da79e686a218e42c9761df2a6f5b6ee3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Jun 2016 03:04:14 +0700 Subject: [PATCH 0810/3599] [carambatv] Add extractor (Closes #9815) --- youtube_dl/extractor/carambatv.py | 88 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 ++ 2 files changed, 92 insertions(+) create mode 100644 youtube_dl/extractor/carambatv.py diff --git a/youtube_dl/extractor/carambatv.py b/youtube_dl/extractor/carambatv.py new file mode 100644 index 000000000..5797fb951 --- /dev/null +++ b/youtube_dl/extractor/carambatv.py @@ -0,0 +1,88 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + float_or_none, + int_or_none, + try_get, +) + + +class CarambaTVIE(InfoExtractor): + _VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://video1.carambatv.ru/v/191910501', + 'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a', + 'info_dict': { + 'id': '191910501', + 'ext': 'mp4', + 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 2678.31, + }, + }, { + 'url': 'carambatv:191910501', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id, + video_id) + + title = video['title'] + + base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id + + formats = [{ + 'url': base_url + f['fn'], + 'height': int_or_none(f.get('height')), + 'format_id': '%sp' % f['height'] if f.get('height') else None, + } for f in video['qualities'] if f.get('fn')] + self._sort_formats(formats) + + thumbnail = video.get('splash') + duration = float_or_none(try_get( + video, lambda x: x['annotations'][0]['end_time'], compat_str)) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + } + + +class CarambaTVPageIE(InfoExtractor): + _VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/', + 'md5': '', + 'info_dict': { + 'id': '191910501', + 'ext': 'mp4', + 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 2678.31, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_url = self._og_search_property('video:iframe', webpage, default=None) + + if not video_url: + video_id = self._search_regex( + r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)', + webpage, 'video id') + video_url = 'carambatv:%s' % video_id + + return self.url_result(video_url, CarambaTVIE.ie_key()) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ed51dfdaa..23320229b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -110,6 +110,10 @@ from .camwithher import CamWithHerIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE from .canvas import CanvasIE +from .carambatv import ( + CarambaTVIE, + CarambaTVPageIE, +) from .cbc import ( CBCIE, CBCPlayerIE, From 6929569403aeade9aced5c4103db652e9c16bdca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Jun 2016 04:06:19 +0700 Subject: [PATCH 0811/3599] [mitele] Extract series metadata and make title more robust (Closes #9758) --- youtube_dl/extractor/mitele.py | 49 +++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 3589c223d..5a00cd397 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -1,5 +1,8 @@ +# coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import ( compat_urllib_parse_urlencode, @@ -8,6 +11,7 @@ from ..compat import ( from ..utils import ( get_element_by_attribute, int_or_none, + remove_start, ) @@ -15,7 +19,7 @@ class MiTeleIE(InfoExtractor): IE_DESC = 'mitele.es' _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' - _TEST = { + _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', # MD5 is unstable 'info_dict': { @@ -24,10 +28,31 @@ class MiTeleIE(InfoExtractor): 'ext': 'flv', 'title': 'Tor, la web invisible', 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', + 'series': 'Diario de', + 'season': 'La redacción', + 'episode': 'Programa 144', 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 2913, }, - } + }, { + # no explicit title + 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/', + 'info_dict': { + 'id': 'eLZSwoEd1S3pVyUm8lc6F', + 'display_id': 'programa-226', + 'ext': 'flv', + 'title': 'Cuarto Milenio - Temporada 6 - Programa 226', + 'description': 'md5:50daf9fadefa4e62d9fc866d0c015701', + 'series': 'Cuarto Milenio', + 'season': 'Temporada 6', + 'episode': 'Programa 226', + 'thumbnail': 're:(?i)^https?://.*\.jpg$', + 'duration': 7312, + }, + 'params': { + 'skip_download': True, + }, + }] def _real_extract(self, url): display_id = self._match_id(url) @@ -70,7 +95,22 @@ class MiTeleIE(InfoExtractor): self._sort_formats(formats) title = self._search_regex( - r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title') + r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', + webpage, 'title', default=None) + + mobj = re.search(r'''(?sx) + class="Destacado-text"[^>]*>.*?<h1>\s* + <span>(?P<series>[^<]+)</span>\s* + <span>(?P<season>[^<]+)</span>\s* + <span>(?P<episode>[^<]+)</span>''', webpage) + series, season, episode = mobj.groups() if mobj else [None] * 3 + + if not title: + if mobj: + title = '%s - %s - %s' % (series, season, episode) + else: + title = remove_start(self._search_regex( + r'<title>([^<]+)', webpage, 'title'), 'Ver online ') video_id = self._search_regex( r'data-media-id\s*=\s*"([^"]+)"', webpage, @@ -83,6 +123,9 @@ class MiTeleIE(InfoExtractor): 'display_id': display_id, 'title': title, 'description': get_element_by_attribute('class', 'text', webpage), + 'series': series, + 'season': season, + 'episode': episode, 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, From f011876076a9fc4ee3fcb8b17f8bc2bcf5c8b8b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 Jun 2016 04:40:48 +0700 Subject: [PATCH 0812/3599] [nickde] Add extractor (Closes #9778) --- youtube_dl/extractor/extractors.py | 5 ++++- youtube_dl/extractor/nick.py | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 23320229b..efa58f70f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -518,7 +518,10 @@ from .nhl import ( NHLVideocenterCategoryIE, NHLIE, ) -from .nick import NickIE +from .nick import ( + NickIE, + NickDeIE, +) from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninegag import NineGagIE from .noco import NocoIE diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index ce065f2b0..e96013791 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor from ..compat import compat_urllib_parse_urlencode +from ..utils import update_url_query class NickIE(MTVServicesInfoExtractor): @@ -61,3 +62,26 @@ class NickIE(MTVServicesInfoExtractor): def _extract_mgid(self, webpage): return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid') + + +class NickDeIE(MTVServicesInfoExtractor): + IE_NAME = 'nick.de' + _VALID_URL = r'https?://(?:www\.)?nick\.de/(?:playlist|shows)/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', + 'only_matching': True, + }, { + 'url': 'http://www.nick.de/shows/342-icarly', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + mrss_url = update_url_query(self._search_regex( + r'data-mrss=(["\'])(?Phttp.+?)\1', webpage, 'mrss url', group='url'), + {'siteKey': 'nick.de'}) + + return self._get_videos_info_from_url(mrss_url, video_id) From 20a6a154fe8e6a5d246ad1326c4082ddd6091718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 Jun 2016 04:46:26 +0700 Subject: [PATCH 0813/3599] [mtv] Use compat_xpath and fix FutureWarning --- youtube_dl/extractor/mtv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 640ee3d93..8a638a47c 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..compat import ( compat_urllib_parse_urlencode, compat_str, + compat_xpath, ) from ..utils import ( ExtractorError, @@ -139,9 +140,9 @@ class MTVServicesInfoExtractor(InfoExtractor): itemdoc, './/{http://search.yahoo.com/mrss/}category', 'scheme', 'urn:mtvn:video_title') if title_el is None: - title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title') + title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title')) if title_el is None: - title_el = itemdoc.find('.//title') or itemdoc.find('./title') + title_el = itemdoc.find(compat_xpath('.//title')) if title_el.text is None: title_el = None From d0d93f76ea0dd1dae15bdba6059815d9cc467b05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 Jun 2016 05:30:46 +0700 Subject: [PATCH 0814/3599] [pornhd] Fix metadata extraction --- youtube_dl/extractor/pornhd.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index 39b53ecf6..33faf5e58 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -39,9 +39,10 @@ class PornHdIE(InfoExtractor): [r']+class=["\']video-name["\'][^>]*>([^<]+)', r'(.+?) - .*?[Pp]ornHD.*?'], webpage, 'title') description = self._html_search_regex( - r'
([^<]+)
', webpage, 'description', fatal=False) + r'<(div|p)[^>]+class="description"[^>]*>(?P[^<]+)', webpage, 'view count', fatal=False)) + r'(\d+) views\s*<', webpage, 'view count', fatal=False)) thumbnail = self._search_regex( r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) From e6fe993c318738fee5a4a2ce7a86c4512e42653a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 Jun 2016 05:37:53 +0700 Subject: [PATCH 0815/3599] [pornhd] Improve formats extraction --- youtube_dl/extractor/pornhd.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index 33faf5e58..e7721b013 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -7,7 +7,6 @@ from .common import InfoExtractor from ..utils import ( int_or_none, js_to_json, - qualities, ) @@ -46,18 +45,19 @@ class PornHdIE(InfoExtractor): thumbnail = self._search_regex( r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) - quality = qualities(['sd', 'hd']) sources = json.loads(js_to_json(self._search_regex( r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]", webpage, 'sources'))) formats = [] - for qname, video_url in sources.items(): + for format_id, video_url in sources.items(): if not video_url: continue + height = int_or_none(self._search_regex( + r'^(\d+)[pP]', format_id, 'height', default=None)) formats.append({ 'url': video_url, - 'format_id': qname, - 'quality': quality(qname), + 'format_id': format_id, + 'height': height, }) self._sort_formats(formats) From 667d96480b4f9c78ceace063415c3424d4d562ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 Jun 2016 05:42:20 +0700 Subject: [PATCH 0816/3599] [pornhd] Detect removed videos and modernize --- youtube_dl/extractor/pornhd.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index e7721b013..7a5f00fe0 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -1,10 +1,10 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, js_to_json, ) @@ -37,17 +37,17 @@ class PornHdIE(InfoExtractor): title = self._html_search_regex( [r']+class=["\']video-name["\'][^>]*>([^<]+)', r'(.+?) - .*?[Pp]ornHD.*?'], webpage, 'title') - description = self._html_search_regex( - r'<(div|p)[^>]+class="description"[^>]*>(?P[^<]+)]+class="no-video"[^>]*>(?P.+?)]+class="description"[^>]*>(?P[^<]+) Date: Sat, 18 Jun 2016 05:50:17 +0700 Subject: [PATCH 0817/3599] [pornhd] Add working test --- youtube_dl/extractor/pornhd.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index 7a5f00fe0..8df12eec0 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -12,7 +12,21 @@ from ..utils import ( class PornHdIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P\d+)(?:/(?P.+))?' - _TEST = { + _TESTS = [{ + 'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', + 'md5': 'c8b964b1f0a4b5f7f28ae3a5c9f86ad5', + 'info_dict': { + 'id': '9864', + 'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', + 'ext': 'mp4', + 'title': 'Restroom selfie masturbation', + 'description': 'md5:3748420395e03e31ac96857a8f125b2b', + 'thumbnail': 're:^https?://.*\.jpg', + 'view_count': int, + 'age_limit': 18, + } + }, { + # removed video 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', 'md5': '956b8ca569f7f4d8ec563e2c41598441', 'info_dict': { @@ -24,8 +38,9 @@ class PornHdIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg', 'view_count': int, 'age_limit': 18, - } - } + }, + 'skip': 'Not available anymore', + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 5fc2757682483b4b0277df2e2454dab882237882 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 Jun 2016 06:00:05 +0700 Subject: [PATCH 0818/3599] release 2016.06.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 5 +++++ youtube_dl/version.py | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index eeac09d5d..4b281e649 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.16** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.16 +[debug] youtube-dl version 2016.06.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 13315f4f4..6a7a4bf2d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -74,6 +74,8 @@ - **bbc**: BBC - **bbc.co.uk**: BBC iPlayer - **bbc.co.uk:article**: BBC articles + - **bbc.co.uk:iplayer:playlist** + - **bbc.co.uk:playlist** - **BeatportPro** - **Beeg** - **BehindKink** @@ -104,6 +106,8 @@ - **canalc2.tv** - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv - **Canvas** + - **CarambaTV** + - **CarambaTVPage** - **CBC** - **CBCPlayer** - **CBS** @@ -432,6 +436,7 @@ - **nhl.com:videocenter** - **nhl.com:videocenter:category**: NHL videocenter category - **nick.com** + - **nick.de** - **niconico**: ニコニコ動画 - **NiconicoPlaylist** - **njoy**: N-JOY diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 52619cae8..92b7badc9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.16' +__version__ = '2016.06.18' From 0f47cc2e925014afef4339a8213d52797a710eb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 Jun 2016 06:20:34 +0700 Subject: [PATCH 0819/3599] release 2016.06.18.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 4b281e649..2736bb23b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.18.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.18.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.18 +[debug] youtube-dl version 2016.06.18.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 6a7a4bf2d..c79798d86 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -44,8 +44,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek** - **ARD:mediathek**: Saarländischer Rundfunk + - **ARD:mediathek** - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 92b7badc9..f71bc8c2a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.18' +__version__ = '2016.06.18.1' From c878e635de1563ded54b2504104e94cd59c70b55 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 18 Jun 2016 12:17:24 +0800 Subject: [PATCH 0820/3599] [bet] Moved to MTVServices --- youtube_dl/extractor/bet.py | 96 +++++++++++++------------------------ 1 file changed, 34 insertions(+), 62 deletions(-) diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index 986245bf0..bd3ee2e2e 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -1,31 +1,27 @@ from __future__ import unicode_literals -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote -from ..utils import ( - xpath_text, - xpath_with_ns, - int_or_none, - parse_iso8601, -) +from .mtv import MTVServicesInfoExtractor +from ..utils import unified_strdate +from ..compat import compat_urllib_parse_urlencode -class BetIE(InfoExtractor): +class BetIE(MTVServicesInfoExtractor): _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P.+?)\.html' _TESTS = [ { 'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', 'info_dict': { - 'id': 'news/national/2014/a-conversation-with-president-obama', + 'id': '07e96bd3-8850-3051-b856-271b457f0ab8', 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', 'ext': 'flv', 'title': 'A Conversation With President Obama', - 'description': 'md5:699d0652a350cf3e491cd15cc745b5da', + 'description': 'President Obama urges persistence in confronting racism and bias.', 'duration': 1534, - 'timestamp': 1418075340, 'upload_date': '20141208', - 'uploader': 'admin', 'thumbnail': 're:(?i)^https?://.*\.jpg$', + 'subtitles': { + 'en': 'mincount:2', + } }, 'params': { # rtmp download @@ -35,16 +31,17 @@ class BetIE(InfoExtractor): { 'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', 'info_dict': { - 'id': 'news/national/2014/justice-for-ferguson-a-community-reacts', + 'id': '9f516bf1-7543-39c4-8076-dd441b459ba9', 'display_id': 'justice-for-ferguson-a-community-reacts', 'ext': 'flv', 'title': 'Justice for Ferguson: A Community Reacts', 'description': 'A BET News special.', 'duration': 1696, - 'timestamp': 1416942360, 'upload_date': '20141125', - 'uploader': 'admin', 'thumbnail': 're:(?i)^https?://.*\.jpg$', + 'subtitles': { + 'en': 'mincount:2', + } }, 'params': { # rtmp download @@ -53,57 +50,32 @@ class BetIE(InfoExtractor): } ] + _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" + + def _get_feed_query(self, uri): + return compat_urllib_parse_urlencode({ + 'uuid': uri, + }) + + def _extract_mgid(self, webpage): + return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid') + def _real_extract(self, url): display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + mgid = self._extract_mgid(webpage) + videos_info = self._get_videos_info(mgid) - media_url = compat_urllib_parse_unquote(self._search_regex( - [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"], - webpage, 'media URL')) + info_dict = videos_info['entries'][0] - video_id = self._search_regex( - r'/video/(.*)/_jcr_content/', media_url, 'video id') + upload_date = unified_strdate(self._html_search_meta('date', webpage)) + description = self._html_search_meta('description', webpage) - mrss = self._download_xml(media_url, display_id) - - item = mrss.find('./channel/item') - - NS_MAP = { - 'dc': 'http://purl.org/dc/elements/1.1/', - 'media': 'http://search.yahoo.com/mrss/', - 'ka': 'http://kickapps.com/karss', - } - - title = xpath_text(item, './title', 'title') - description = xpath_text( - item, './description', 'description', fatal=False) - - timestamp = parse_iso8601(xpath_text( - item, xpath_with_ns('./dc:date', NS_MAP), - 'upload date', fatal=False)) - uploader = xpath_text( - item, xpath_with_ns('./dc:creator', NS_MAP), - 'uploader', fatal=False) - - media_content = item.find( - xpath_with_ns('./media:content', NS_MAP)) - duration = int_or_none(media_content.get('duration')) - smil_url = media_content.get('url') - - thumbnail = media_content.find( - xpath_with_ns('./media:thumbnail', NS_MAP)).get('url') - - formats = self._extract_smil_formats(smil_url, display_id) - self._sort_formats(formats) - - return { - 'id': video_id, + info_dict.update({ 'display_id': display_id, - 'title': title, 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'uploader': uploader, - 'duration': duration, - 'formats': formats, - } + 'upload_date': upload_date, + }) + + return info_dict From a72df5f36feddaffbcfa35e3415562509a9f67b4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 18 Jun 2016 12:19:06 +0800 Subject: [PATCH 0821/3599] [mtvservices] Fix ext for RTMP streams --- youtube_dl/extractor/mtv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 8a638a47c..dd0639589 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -85,9 +85,10 @@ class MTVServicesInfoExtractor(InfoExtractor): rtmp_video_url = rendition.find('./src').text if rtmp_video_url.endswith('siteunavail.png'): continue + new_url = self._transform_rtmp_url(rtmp_video_url) formats.append({ - 'ext': ext, - 'url': self._transform_rtmp_url(rtmp_video_url), + 'ext': 'flv' if new_url.startswith('rtmp') else ext, + 'url': new_url, 'format_id': rendition.get('bitrate'), 'width': int(rendition.get('width')), 'height': int(rendition.get('height')), From 26264cb0566309bcdc7f0f2a8ee376bf205fc675 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 18 Jun 2016 12:21:40 +0800 Subject: [PATCH 0822/3599] [adobetv] Use embedded data in the webpage Sometimes the HTML webpage is returned even with '?format=json' --- youtube_dl/extractor/adobetv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py index 8753ee2cf..5ae16fa16 100644 --- a/youtube_dl/extractor/adobetv.py +++ b/youtube_dl/extractor/adobetv.py @@ -156,7 +156,10 @@ class AdobeTVVideoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json(url + '?format=json', video_id) + webpage = self._download_webpage(url, video_id) + + video_data = self._parse_json(self._search_regex( + r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) formats = [{ 'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')), From 1b6cf16be7e8a771ca79e7eb161db9ff59238dab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 18 Jun 2016 12:27:39 +0800 Subject: [PATCH 0823/3599] [aftonbladet] Fix extraction --- youtube_dl/extractor/aftonbladet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py index d548592fe..5766b4fe8 100644 --- a/youtube_dl/extractor/aftonbladet.py +++ b/youtube_dl/extractor/aftonbladet.py @@ -24,10 +24,10 @@ class AftonbladetIE(InfoExtractor): webpage = self._download_webpage(url, video_id) # find internal video meta data - meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json' + meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json' player_config = self._parse_json(self._html_search_regex( r'data-player-config="([^"]+)"', webpage, 'player config'), video_id) - internal_meta_id = player_config['videoId'] + internal_meta_id = player_config['aptomaVideoId'] internal_meta_url = meta_url % internal_meta_id internal_meta_json = self._download_json( internal_meta_url, video_id, 'Downloading video meta data') From 09e3f91e408eb357929abad3710c799376004138 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 18 Jun 2016 12:34:58 +0800 Subject: [PATCH 0824/3599] [arte] Update _TESTS and fix for pages with multiple YouTube videos Some tests are from #6895 and #6613 --- youtube_dl/extractor/arte.py | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index f40532929..9e39faf47 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -240,10 +240,10 @@ class ArteTVPlus7IE(ArteTVBaseIE): return self._extract_from_json_url(json_url, video_id, lang, title=title) # Different kind of embed URL (e.g. # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) - embed_url = self._search_regex( - r']+src=(["\'])(?P.+?)\1', - webpage, 'embed url', group='url') - return self.url_result(embed_url) + entries = [ + self.url_result(url) + for _, url in re.findall(r']+src=(["\'])(?P.+?)\1', webpage)] + return self.playlist_result(entries) # It also uses the arte_vp_url url from the webpage to extract the information @@ -252,22 +252,17 @@ class ArteTVCreativeIE(ArteTVPlus7IE): _VALID_URL = r'https?://creative\.arte\.tv/(?Pfr|de|en|es)/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ - 'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', + 'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1', 'info_dict': { - 'id': '72176', + 'id': '057405-001-A', 'ext': 'mp4', - 'title': 'Folge 2 - Corporate Design', - 'upload_date': '20131004', + 'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)', + 'upload_date': '20150716', }, }, { 'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion', - 'info_dict': { - 'id': '160676', - 'ext': 'mp4', - 'title': 'Monty Python live (mostly)', - 'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n', - 'upload_date': '20140805', - } + 'playlist_count': 11, + 'add_ie': ['Youtube'], }, { 'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde', 'only_matching': True, @@ -349,14 +344,13 @@ class ArteTVCinemaIE(ArteTVPlus7IE): _VALID_URL = r'https?://cinema\.arte\.tv/(?Pfr|de|en|es)/(?P.+)' _TESTS = [{ - 'url': 'http://cinema.arte.tv/de/node/38291', - 'md5': '6b275511a5107c60bacbeeda368c3aa1', + 'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck', + 'md5': 'a5b9dd5575a11d93daf0e3f404f45438', 'info_dict': { - 'id': '055876-000_PWA12025-D', + 'id': '062494-000-A', 'ext': 'mp4', - 'title': 'Tod auf dem Nil', - 'upload_date': '20160122', - 'description': 'md5:7f749bbb77d800ef2be11d54529b96bc', + 'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck', + 'upload_date': '20150807', }, }] From 573c35272f7a1973e44109614c8639e0d3e21fdd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 18 Jun 2016 12:35:55 +0800 Subject: [PATCH 0825/3599] [bbc] Skip a geo-restricted test case --- youtube_dl/extractor/bbc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index de236fbde..4b3cd8c65 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -192,6 +192,7 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, }, + 'skip': 'Now it\'s really geo-restricted', }, { # compact player (https://github.com/rg3/youtube-dl/issues/8147) 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player', From 1f3574575851eb34b6c6a983e276fa77a0dc3da1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 18 Jun 2016 12:39:08 +0800 Subject: [PATCH 0826/3599] [azubu] Don't fail on optional fields --- youtube_dl/extractor/azubu.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index efa624de1..a813eb429 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -46,6 +46,7 @@ class AzubuIE(InfoExtractor): 'uploader_id': 272749, 'view_count': int, }, + 'skip': 'Channel offline', }, ] @@ -56,22 +57,26 @@ class AzubuIE(InfoExtractor): 'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data'] title = data['title'].strip() - description = data['description'] - thumbnail = data['thumbnail'] - view_count = data['view_count'] - uploader = data['user']['username'] - uploader_id = data['user']['id'] + description = data.get('description') + thumbnail = data.get('thumbnail') + view_count = data.get('view_count') + user = data.get('user', {}) + uploader = user.get('username') + uploader_id = user.get('id') stream_params = json.loads(data['stream_params']) - timestamp = float_or_none(stream_params['creationDate'], 1000) - duration = float_or_none(stream_params['length'], 1000) + timestamp = float_or_none(stream_params.get('creationDate'), 1000) + duration = float_or_none(stream_params.get('length'), 1000) renditions = stream_params.get('renditions') or [] video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength') if video: renditions.append(video) + if not renditions and not user.get('channel', {}).get('is_live', True): + raise ExtractorError('%s said: channel is offline.' % self.IE_NAME, expected=True) + formats = [{ 'url': fmt['url'], 'width': fmt['frameWidth'], From 0278aa443f7b4cc0886f40d6704925dc2488921d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 18 Jun 2016 12:53:48 +0800 Subject: [PATCH 0827/3599] [br] Skip invalid tests --- youtube_dl/extractor/br.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 11cf49851..ff0aa11b1 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -29,7 +29,8 @@ class BRIE(InfoExtractor): 'duration': 180, 'uploader': 'Reinhard Weber', 'upload_date': '20150422', - } + }, + 'skip': '404 not found', }, { 'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html', @@ -40,7 +41,8 @@ class BRIE(InfoExtractor): 'title': 'Manfred Schreiber ist tot', 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97', 'duration': 26, - } + }, + 'skip': '404 not found', }, { 'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html', @@ -51,7 +53,8 @@ class BRIE(InfoExtractor): 'title': 'Kurzweilig und sehr bewegend', 'description': 'md5:0351996e3283d64adeb38ede91fac54e', 'duration': 296, - } + }, + 'skip': '404 not found', }, { 'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html', From 6d0d4fc26d45c55ef6e99b31892047b0bdfed0e0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 18 Jun 2016 13:40:55 +0800 Subject: [PATCH 0828/3599] [wdr] Add WDRBaseIE, for Sportschau (#9799) --- youtube_dl/extractor/wdr.py | 179 +++++++++++++++++++----------------- 1 file changed, 95 insertions(+), 84 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 6b83a2a04..390f9e830 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -15,7 +15,87 @@ from ..utils import ( ) -class WDRIE(InfoExtractor): +class WDRBaseIE(InfoExtractor): + def _extract_wdr_video(self, webpage, display_id): + # for wdr.de the data-extension is in a tag with the class "mediaLink" + # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" + # for wdrmaus its in a link to the page in a multiline "videoLink"-tag + json_metadata = self._html_search_regex( + r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', + webpage, 'media link', default=None, flags=re.MULTILINE) + + if not json_metadata: + return + + media_link_obj = self._parse_json(json_metadata, display_id, + transform_source=js_to_json) + jsonp_url = media_link_obj['mediaObj']['url'] + + metadata = self._download_json( + jsonp_url, 'metadata', transform_source=strip_jsonp) + + metadata_tracker_data = metadata['trackerData'] + metadata_media_resource = metadata['mediaResource'] + + formats = [] + + # check if the metadata contains a direct URL to a file + for kind, media_resource in metadata_media_resource.items(): + if kind not in ('dflt', 'alt'): + continue + + for tag_name, medium_url in media_resource.items(): + if tag_name not in ('videoURL', 'audioURL'): + continue + + ext = determine_ext(medium_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + medium_url, display_id, 'mp4', 'm3u8_native', + m3u8_id='hls')) + elif ext == 'f4m': + manifest_url = update_url_query( + medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) + formats.extend(self._extract_f4m_formats( + manifest_url, display_id, f4m_id='hds', fatal=False)) + elif ext == 'smil': + formats.extend(self._extract_smil_formats( + medium_url, 'stream', fatal=False)) + else: + a_format = { + 'url': medium_url + } + if ext == 'unknown_video': + urlh = self._request_webpage( + medium_url, display_id, note='Determining extension') + ext = urlhandle_detect_ext(urlh) + a_format['ext'] = ext + formats.append(a_format) + + self._sort_formats(formats) + + subtitles = {} + caption_url = metadata_media_resource.get('captionURL') + if caption_url: + subtitles['de'] = [{ + 'url': caption_url, + 'ext': 'ttml', + }] + + title = metadata_tracker_data['trackerClipTitle'] + + return { + 'id': metadata_tracker_data.get('trackerClipId', display_id), + 'display_id': display_id, + 'title': title, + 'alt_title': metadata_tracker_data.get('trackerClipSubcategory'), + 'formats': formats, + 'subtitles': subtitles, + 'upload_date': unified_strdate(metadata_tracker_data.get('trackerClipAirTime')), + } + + +class WDRIE(WDRBaseIE): _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' _PAGE_REGEX = r'/(?:mediathek/)?[^/]+/(?P[^/]+)/(?P.+)\.html' _VALID_URL = r'(?Phttps?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL @@ -91,10 +171,10 @@ class WDRIE(InfoExtractor): }, { 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', - # HDS download, MD5 is unstable + 'md5': '803138901f6368ee497b4d195bb164f2', 'info_dict': { 'id': 'mdb-186083', - 'ext': 'flv', + 'ext': 'mp4', 'upload_date': '20130919', 'title': 'Sachgeschichte - Achterbahn ', 'description': '- Die Sendung mit der Maus -', @@ -120,14 +200,9 @@ class WDRIE(InfoExtractor): display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id) - # for wdr.de the data-extension is in a tag with the class "mediaLink" - # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" - # for wdrmaus its in a link to the page in a multiline "videoLink"-tag - json_metadata = self._html_search_regex( - r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', - webpage, 'media link', default=None, flags=re.MULTILINE) + info_dict = self._extract_wdr_video(webpage, display_id) - if not json_metadata: + if not info_dict: entries = [ self.url_result(page_url + href[0], 'WDR') for href in re.findall( @@ -140,86 +215,22 @@ class WDRIE(InfoExtractor): raise ExtractorError('No downloadable streams found', expected=True) - media_link_obj = self._parse_json(json_metadata, display_id, - transform_source=js_to_json) - jsonp_url = media_link_obj['mediaObj']['url'] - - metadata = self._download_json( - jsonp_url, 'metadata', transform_source=strip_jsonp) - - metadata_tracker_data = metadata['trackerData'] - metadata_media_resource = metadata['mediaResource'] - - formats = [] - - # check if the metadata contains a direct URL to a file - for kind, media_resource in metadata_media_resource.items(): - if kind not in ('dflt', 'alt'): - continue - - for tag_name, medium_url in media_resource.items(): - if tag_name not in ('videoURL', 'audioURL'): - continue - - ext = determine_ext(medium_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - medium_url, display_id, 'mp4', 'm3u8_native', - m3u8_id='hls')) - elif ext == 'f4m': - manifest_url = update_url_query( - medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) - formats.extend(self._extract_f4m_formats( - manifest_url, display_id, f4m_id='hds', fatal=False)) - elif ext == 'smil': - formats.extend(self._extract_smil_formats( - medium_url, 'stream', fatal=False)) - else: - a_format = { - 'url': medium_url - } - if ext == 'unknown_video': - urlh = self._request_webpage( - medium_url, display_id, note='Determining extension') - ext = urlhandle_detect_ext(urlh) - a_format['ext'] = ext - formats.append(a_format) - - self._sort_formats(formats) - - subtitles = {} - caption_url = metadata_media_resource.get('captionURL') - if caption_url: - subtitles['de'] = [{ - 'url': caption_url, - 'ext': 'ttml', - }] - - title = metadata_tracker_data.get('trackerClipTitle') is_live = url_type == 'live' if is_live: - title = self._live_title(title) - upload_date = None - elif 'trackerClipAirTime' in metadata_tracker_data: - upload_date = metadata_tracker_data['trackerClipAirTime'] - else: - upload_date = self._html_search_meta('DC.Date', webpage, 'upload date') + info_dict.update({ + 'title': self._live_title(info_dict['title']), + 'upload_date': None, + }) + elif 'upload_date' not in info_dict: + info_dict['upload_date'] = unified_strdate(self._html_search_meta('DC.Date', webpage, 'upload date')) - if upload_date: - upload_date = unified_strdate(upload_date) - - return { - 'id': metadata_tracker_data.get('trackerClipId', display_id), - 'display_id': display_id, - 'title': title, - 'alt_title': metadata_tracker_data.get('trackerClipSubcategory'), - 'formats': formats, - 'upload_date': upload_date, + info_dict.update({ 'description': self._html_search_meta('Description', webpage), 'is_live': is_live, - 'subtitles': subtitles, - } + }) + + return info_dict class WDRMobileIE(InfoExtractor): From b5aad37f6bdc72acaca198202dc9f7eaa3185e51 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 18 Jun 2016 13:41:50 +0800 Subject: [PATCH 0829/3599] [ard] Remove SportschauIE, which is now based on WDR (#9799) --- youtube_dl/extractor/ard.py | 39 ------------------------------------- 1 file changed, 39 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 26446c2fe..fd45b3e42 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -8,7 +8,6 @@ from .generic import GenericIE from ..utils import ( determine_ext, ExtractorError, - get_element_by_attribute, qualities, int_or_none, parse_duration, @@ -274,41 +273,3 @@ class ARDIE(InfoExtractor): 'upload_date': upload_date, 'thumbnail': thumbnail, } - - -class SportschauIE(ARDMediathekIE): - IE_NAME = 'Sportschau' - _VALID_URL = r'(?Phttps?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P[^/#?]+))\.html' - _TESTS = [{ - 'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html', - 'info_dict': { - 'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100', - 'ext': 'mp4', - 'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"', - 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - base_url = mobj.group('baseurl') - - webpage = self._download_webpage(url, video_id) - title = get_element_by_attribute('class', 'headline', webpage) - description = self._html_search_meta('description', webpage, 'description') - - info = self._extract_media_info( - base_url + '-mc_defaultQuality-h.json', webpage, video_id) - - info.update({ - 'title': title, - 'description': description, - }) - - return info From e8f13f2637fd33b20ac2682dbbdaef63b6288bf4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 18 Jun 2016 13:42:58 +0800 Subject: [PATCH 0830/3599] [sportschau.de] Fix extraction and moved to its own file (closes #9799) --- youtube_dl/extractor/sportschau.py | 38 ++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 youtube_dl/extractor/sportschau.py diff --git a/youtube_dl/extractor/sportschau.py b/youtube_dl/extractor/sportschau.py new file mode 100644 index 000000000..0d7925a08 --- /dev/null +++ b/youtube_dl/extractor/sportschau.py @@ -0,0 +1,38 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .wdr import WDRBaseIE +from ..utils import get_element_by_attribute + + +class SportschauIE(WDRBaseIE): + IE_NAME = 'Sportschau' + _VALID_URL = r'https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video-?(?P[^/#?]+)\.html' + _TEST = { + 'url': 'http://www.sportschau.de/uefaeuro2016/videos/video-dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100.html', + 'info_dict': { + 'id': 'mdb-1140188', + 'display_id': 'dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100', + 'ext': 'mp4', + 'title': 'DFB-Team geht gut gelaunt ins Spiel gegen Polen', + 'description': 'Vor dem zweiten Gruppenspiel gegen Polen herrscht gute Stimmung im deutschen Team. Insbesondere Bastian Schweinsteiger strotzt vor Optimismus nach seinem Tor gegen die Ukraine.', + 'upload_date': '20160615', + }, + 'skip': 'Geo-restricted to Germany', + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = get_element_by_attribute('class', 'headline', webpage) + description = self._html_search_meta('description', webpage, 'description') + + info = self._extract_wdr_video(webpage, video_id) + + info.update({ + 'title': title, + 'description': description, + }) + + return info From b0b128049a9180e58698e74bdc8079d80086dbef Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 18 Jun 2016 13:43:47 +0800 Subject: [PATCH 0831/3599] [extractors] Update references to sportschau (#9799) --- youtube_dl/extractor/extractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index efa58f70f..6dc5904b3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -44,7 +44,6 @@ from .archiveorg import ArchiveOrgIE from .ard import ( ARDIE, ARDMediathekIE, - SportschauIE, ) from .arte import ( ArteTvIE, @@ -747,6 +746,7 @@ from .sportbox import ( SportBoxEmbedIE, ) from .sportdeutschland import SportDeutschlandIE +from .sportschau import SportschauIE from .srgssr import ( SRGSSRIE, SRGSSRPlayIE, From d7c6c656c5c2fa64a1f8a4365a5fe62861b1dceb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 Jun 2016 21:42:17 +0700 Subject: [PATCH 0832/3599] [arte:+7] Expand _VALID_URL (Closes #9820) --- youtube_dl/extractor/arte.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 9e39faf47..e602d3673 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -180,11 +180,14 @@ class ArteTVBaseIE(InfoExtractor): class ArteTVPlus7IE(ArteTVBaseIE): IE_NAME = 'arte.tv:+7' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?Pfr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P[^/]+)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/[^/]+/(?Pfr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P[^/]+)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D', 'only_matching': True, + }. { + 'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22', + 'only_matching': True, }] @classmethod From c1823c8ad9ca49f61ef15bbc126b0f95e4825d14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 Jun 2016 22:08:48 +0700 Subject: [PATCH 0833/3599] [README.md] Remove 'small' from description (#9814) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f1e59542d..c6feef116 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ Or with [MacPorts](https://www.macports.org/): Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html). # DESCRIPTION -**youtube-dl** is a small command-line program to download videos from +**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on Mac OS X. It is released to the public domain, From 90b6288cce3e5a433a521bc862d98d31be9624c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 18 Jun 2016 22:23:48 +0700 Subject: [PATCH 0834/3599] [arte:+7] Simplify _VALID_URL --- youtube_dl/extractor/arte.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index e602d3673..049f1fa9e 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -180,12 +180,12 @@ class ArteTVBaseIE(InfoExtractor): class ArteTVPlus7IE(ArteTVBaseIE): IE_NAME = 'arte.tv:+7' - _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/[^/]+/(?Pfr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P[^/]+)/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/[^/]+/(?Pfr|de|en|es)/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D', 'only_matching': True, - }. { + }, { 'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22', 'only_matching': True, }] From 41c1023300596f62dff93d9275f5e4d7a6762e66 Mon Sep 17 00:00:00 2001 From: Steven Gosseling Date: Fri, 26 Feb 2016 13:31:52 +0100 Subject: [PATCH 0835/3599] [closertotruth] Add extractor Removed print statement from code. Replaced two regex searches with the corret ones. Removed some unnecessary semicolumns fixed title extraction refactored everything to search_regex processed comments on commit 5650b0d, fixed feedback from flake8 Improved regexes and returns info dict now. Added support for closertotruth interview URL Added support for episodes page --- youtube_dl/extractor/closertotruth.py | 69 +++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 youtube_dl/extractor/closertotruth.py diff --git a/youtube_dl/extractor/closertotruth.py b/youtube_dl/extractor/closertotruth.py new file mode 100644 index 000000000..d04ff5e4f --- /dev/null +++ b/youtube_dl/extractor/closertotruth.py @@ -0,0 +1,69 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class CloserToTruthIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(episodes/|(series|interviews)/(?:[^#]+#video-)?(?P\d+))' + _TESTS = [ + { + 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688', + 'md5': '5c548bde260a9247ddfdc07c7458ed29', + 'info_dict': { + 'id': '0_zof1ktre', + 'ext': 'mov', + 'title': 'Solutions to the Mind-Body Problem?', + 'upload_date': '20140221', + 'timestamp': 1392956007, + 'uploader_id': 'CTTXML' + } + }, + { + 'url': 'http://closertotruth.com/interviews/1725', + 'md5': 'b00598fd6a38372edb976408f72c5792', + 'info_dict': { + 'id': '0_19qv5rn1', + 'ext': 'mov', + 'title': 'AyaFr-002 - Francisco J. Ayala', + 'upload_date': '20140307', + 'timestamp': 1394236431, + 'uploader_id': 'CTTXML' + } + }, + { + 'url': 'http://closertotruth.com/episodes/how-do-brains-work', + 'md5': '4dd96aa0a5c296afa5c0bd24895c2f16', + 'info_dict': { + 'id': '0_iuxai6g6', + 'ext': 'mov', + 'title': 'How do Brains Work?', + 'upload_date': '20140221', + 'timestamp': 1392956024, + 'uploader_id': 'CTTXML' + } + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_title = self._search_regex(r'(.+) \|.+', webpage, 'video title') + + entry_id = self._search_regex(r']+id="(?:video-%s|embed-kaltura)"[^>]+data-kaltura="([^"]+)' % video_id, webpage, "video entry_id") + + interviewee_name = self._search_regex(r'
(.*).+', webpage, "video interviewee_name", False) + + if interviewee_name: + video_title = video_title + ' - ' + interviewee_name + + p_id = self._search_regex(r']+src=["\'].+?partner_id/(\d+)', webpage, "kaltura partner_id") + + return { + '_type': 'url_transparent', + 'id': entry_id, + 'url': 'kaltura:%s:%s' % (p_id, entry_id), + 'ie_key': 'Kaltura', + 'title': video_title + } From cb23192bc4c56d80229a7a5f70cb61d0879db6c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 19 Jun 2016 00:35:29 +0700 Subject: [PATCH 0836/3599] [closertotruth] Update and improve (Closes #8680) --- youtube_dl/extractor/closertotruth.py | 117 +++++++++++++++----------- youtube_dl/extractor/extractors.py | 1 + 2 files changed, 71 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/closertotruth.py b/youtube_dl/extractor/closertotruth.py index d04ff5e4f..26243d52d 100644 --- a/youtube_dl/extractor/closertotruth.py +++ b/youtube_dl/extractor/closertotruth.py @@ -1,69 +1,92 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class CloserToTruthIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(episodes/|(series|interviews)/(?:[^#]+#video-)?(?P\d+))' - _TESTS = [ - { - 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688', - 'md5': '5c548bde260a9247ddfdc07c7458ed29', - 'info_dict': { - 'id': '0_zof1ktre', - 'ext': 'mov', - 'title': 'Solutions to the Mind-Body Problem?', - 'upload_date': '20140221', - 'timestamp': 1392956007, - 'uploader_id': 'CTTXML' - } + _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688', + 'info_dict': { + 'id': '0_zof1ktre', + 'display_id': 'solutions-the-mind-body-problem', + 'ext': 'mov', + 'title': 'Solutions to the Mind-Body Problem?', + 'upload_date': '20140221', + 'timestamp': 1392956007, + 'uploader_id': 'CTTXML' }, - { - 'url': 'http://closertotruth.com/interviews/1725', - 'md5': 'b00598fd6a38372edb976408f72c5792', - 'info_dict': { - 'id': '0_19qv5rn1', - 'ext': 'mov', - 'title': 'AyaFr-002 - Francisco J. Ayala', - 'upload_date': '20140307', - 'timestamp': 1394236431, - 'uploader_id': 'CTTXML' - } + 'params': { + 'skip_download': True, }, - { - 'url': 'http://closertotruth.com/episodes/how-do-brains-work', - 'md5': '4dd96aa0a5c296afa5c0bd24895c2f16', - 'info_dict': { - 'id': '0_iuxai6g6', - 'ext': 'mov', - 'title': 'How do Brains Work?', - 'upload_date': '20140221', - 'timestamp': 1392956024, - 'uploader_id': 'CTTXML' - } + }, { + 'url': 'http://closertotruth.com/episodes/how-do-brains-work', + 'info_dict': { + 'id': '0_iuxai6g6', + 'display_id': 'how-do-brains-work', + 'ext': 'mov', + 'title': 'How do Brains Work?', + 'upload_date': '20140221', + 'timestamp': 1392956024, + 'uploader_id': 'CTTXML' }, - ] + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://closertotruth.com/interviews/1725', + 'info_dict': { + 'id': '1725', + 'title': 'AyaFr-002', + }, + 'playlist_mincount': 2, + }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + display_id = self._match_id(url) - video_title = self._search_regex(r'(.+) \|.+', webpage, 'video title') + webpage = self._download_webpage(url, display_id) - entry_id = self._search_regex(r']+id="(?:video-%s|embed-kaltura)"[^>]+data-kaltura="([^"]+)' % video_id, webpage, "video entry_id") + partner_id = self._search_regex( + r']+src=["\'].*?\b(?:partner_id|p)/(\d+)', + webpage, 'kaltura partner_id') - interviewee_name = self._search_regex(r'
(.*).+', webpage, "video interviewee_name", False) + title = self._search_regex( + r'(.+?)\s*\|\s*.+?', webpage, 'video title') - if interviewee_name: - video_title = video_title + ' - ' + interviewee_name + select = self._search_regex( + r'(?s)]+id="select-version"[^>]*>(.+?)', + webpage, 'select version', default=None) + if select: + entry_ids = set() + entries = [] + for mobj in re.finditer( + r']+value=(["\'])(?P[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P[^<]+)', + webpage): + entry_id = mobj.group('id') + if entry_id in entry_ids: + continue + entry_ids.add(entry_id) + entries.append({ + '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, entry_id), + 'ie_key': 'Kaltura', + 'title': mobj.group('title'), + }) + if entries: + return self.playlist_result(entries, display_id, title) - p_id = self._search_regex(r'<script[^>]+src=["\'].+?partner_id/(\d+)', webpage, "kaltura partner_id") + entry_id = self._search_regex( + r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2', + webpage, 'kaltura entry_id', group='id') return { '_type': 'url_transparent', - 'id': entry_id, - 'url': 'kaltura:%s:%s' % (p_id, entry_id), + 'display_id': display_id, + 'url': 'kaltura:%s:%s' % (partner_id, entry_id), 'ie_key': 'Kaltura', - 'title': video_title + 'title': title } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6dc5904b3..2ff867651 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -140,6 +140,7 @@ from .cliprs import ClipRsIE from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .clipsyndicate import ClipsyndicateIE +from .closertotruth import CloserToTruthIE from .cloudy import CloudyIE from .clubic import ClubicIE from .clyp import ClypIE From 7577d849a62ecdcc52ede6dcf73edf2a717fc646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Jun 2016 02:25:34 +0700 Subject: [PATCH 0837/3599] [r7] Fix extraction and add support for articles (Closes #9826) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/r7.py | 95 +++++++++++++++++++----------- 2 files changed, 64 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2ff867651..b1b04f2fc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -631,7 +631,10 @@ from .qqmusic import ( QQMusicToplistIE, QQMusicPlaylistIE, ) -from .r7 import R7IE +from .r7 import ( + R7IE, + R7ArticleIE, +) from .radiocanada import ( RadioCanadaIE, RadioCanadaAudioVideoIE, diff --git a/youtube_dl/extractor/r7.py b/youtube_dl/extractor/r7.py index 976c8feec..069dbfaed 100644 --- a/youtube_dl/extractor/r7.py +++ b/youtube_dl/extractor/r7.py @@ -2,22 +2,19 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - js_to_json, - unescapeHTML, - int_or_none, -) +from ..utils import int_or_none class R7IE(InfoExtractor): - _VALID_URL = r'''(?x)https?:// + _VALID_URL = r'''(?x) + https?:// (?: (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| noticias\.r7\.com(?:/[^/]+)+/[^/]+-| player\.r7\.com/video/i/ ) (?P<id>[\da-f]{24}) - ''' + ''' _TESTS = [{ 'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', 'md5': '403c4e393617e8e8ddc748978ee8efde', @@ -25,6 +22,7 @@ class R7IE(InfoExtractor): 'id': '54e7050b0cf2ff57e0279389', 'ext': 'mp4', 'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', + 'description': 'md5:01812008664be76a6479aa58ec865b72', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 98, 'like_count': int, @@ -44,45 +42,72 @@ class R7IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://player.r7.com/video/i/%s' % video_id, video_id) + video = self._download_json( + 'http://player-api.r7.com/video/i/%s' % video_id, video_id) - item = self._parse_json(js_to_json(self._search_regex( - r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id) - - title = unescapeHTML(item['title']) - thumbnail = item.get('init', {}).get('thumbUri') - duration = None - - statistics = item.get('statistics', {}) - like_count = int_or_none(statistics.get('likes')) - view_count = int_or_none(statistics.get('views')) + title = video['title'] formats = [] - for format_key, format_dict in item['playlist'][0].items(): - src = format_dict.get('src') - if not src: - continue - format_id = format_dict.get('format') or format_key - if duration is None: - duration = format_dict.get('duration') - if '.f4m' in src: - formats.extend(self._extract_f4m_formats(src, video_id, preference=-1)) - elif src.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2)) - else: - formats.append({ - 'url': src, - 'format_id': format_id, - }) + media_url_hls = video.get('media_url_hls') + if media_url_hls: + formats.extend(self._extract_m3u8_formats( + media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + media_url = video.get('media_url') + if media_url: + f = { + 'url': media_url, + 'format_id': 'http', + } + # m3u8 format always matches the http format, let's copy metadata from + # one to another + m3u8_formats = list(filter( + lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + formats)) + if len(m3u8_formats) == 1: + f_copy = m3u8_formats[0].copy() + f_copy.update(f) + f_copy['protocol'] = 'http' + f = f_copy + formats.append(f) self._sort_formats(formats) + description = video.get('description') + thumbnail = video.get('thumb') + duration = int_or_none(video.get('media_duration')) + like_count = int_or_none(video.get('likes')) + view_count = int_or_none(video.get('views')) + return { 'id': video_id, 'title': title, + 'description': description, 'thumbnail': thumbnail, 'duration': duration, 'like_count': like_count, 'view_count': view_count, 'formats': formats, } + + +class R7ArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)' + _TEST = { + 'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015', + 'only_matching': True, + } + + @classmethod + def suitable(cls, url): + return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})', + webpage, 'video id') + + return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key()) From 589568789f500b7a515355a07efec4bcec0f3243 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Jun 2016 02:30:29 +0700 Subject: [PATCH 0838/3599] release 2016.06.19 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 4 +++- youtube_dl/version.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2736bb23b..52e04aa74 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.18.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.18.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.19** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.18.1 +[debug] youtube-dl version 2016.06.19 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c79798d86..7c90940c7 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -44,8 +44,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek**: Saarländischer Rundfunk - **ARD:mediathek** + - **ARD:mediathek**: Saarländischer Rundfunk - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -128,6 +128,7 @@ - **cliphunter** - **ClipRs** - **Clipsyndicate** + - **CloserToTruth** - **cloudtime**: CloudTime - **Cloudy** - **Clubic** @@ -521,6 +522,7 @@ - **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:toplist**: QQ音乐 - 排行榜 - **R7** + - **R7Article** - **radio.de** - **radiobremen** - **radiocanada** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index f71bc8c2a..417e86ed6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.18.1' +__version__ = '2016.06.19' From 7c05097633138459e9bdf7e10738e021b04689a7 Mon Sep 17 00:00:00 2001 From: Lucas Moura <lucas.moura128@gmail.com> Date: Sat, 18 Jun 2016 17:01:47 -0300 Subject: [PATCH 0839/3599] [jsinterp] Avoid double key lookup for setting new key In order to add a new key to both __objects and __functions dicts on jsinterp.py, it is necessary to first verify if a key was present and if not, create the key and assign it to a value. However, this can be done with a single step using dict setdefault method. --- youtube_dl/jsinterp.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index a7440c582..4a5a0dbc3 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -131,9 +131,8 @@ class JSInterpreter(object): if variable in local_vars: obj = local_vars[variable] else: - if variable not in self._objects: - self._objects[variable] = self.extract_object(variable) - obj = self._objects[variable] + obj = self._objects.setdefault( + variable, self.extract_object(variable)) if arg_str is None: # Member access @@ -204,8 +203,7 @@ class JSInterpreter(object): argvals = tuple([ int(v) if v.isdigit() else local_vars[v] for v in m.group('args').split(',')]) - if fname not in self._functions: - self._functions[fname] = self.extract_function(fname) + self._functions.setdefault(fname, self.extract_function(fname)) return self._functions[fname](argvals) raise ExtractorError('Unsupported JS expression %r' % expr) From 6a55bb66ee4367a8445c8df2d803090e68c42fe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Jun 2016 03:56:01 +0700 Subject: [PATCH 0840/3599] [vimeo] Fix rented videos (Closes #9830) --- youtube_dl/extractor/vimeo.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 0fd2c18a0..c52986af6 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -8,6 +8,7 @@ import itertools from .common import InfoExtractor from ..compat import ( compat_HTTPError, + compat_str, compat_urlparse, ) from ..utils import ( @@ -24,6 +25,7 @@ from ..utils import ( urlencode_postdata, unescapeHTML, parse_filesize, + try_get, ) @@ -445,7 +447,18 @@ class VimeoIE(VimeoBaseInfoExtractor): if config.get('view') == 4: config = self._verify_player_video_password(url, video_id) - if '>You rented this title.<' in webpage: + def is_rented(): + if '>You rented this title.<' in webpage: + return True + if config.get('user', {}).get('purchased'): + return True + label = try_get( + config, lambda x: x['video']['vod']['purchase_options'][0]['label_string'], compat_str) + if label and label.startswith('You rented this'): + return True + return False + + if is_rented(): feature_id = config.get('video', {}).get('vod', {}).get('feature_id') if feature_id and not data.get('force_feature_id', False): return self.url_result(smuggle_url( From a50fd6e0263b6e5d97a13a0f781a2325c0ab7efa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Jun 2016 03:57:14 +0700 Subject: [PATCH 0841/3599] release 2016.06.19.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 52e04aa74..1b25628bf 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.19** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.19.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.19.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.19 +[debug] youtube-dl version 2016.06.19.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 417e86ed6..a3e2c3079 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.19' +__version__ = '2016.06.19.1' From e154c65128305fcaf6c6b2d5fd41ecd83e4ec154 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Sun, 10 Jan 2016 20:09:53 +0100 Subject: [PATCH 0842/3599] [downloader/hls] Add support for AES-128 encrypted segments in hlsnative downloader --- youtube_dl/downloader/hls.py | 78 ++++++++++++++++++++++++---------- youtube_dl/extractor/common.py | 17 ++------ youtube_dl/utils.py | 9 ++++ 3 files changed, 68 insertions(+), 36 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 54f2108e9..1d5f178a0 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -2,14 +2,24 @@ from __future__ import unicode_literals import os.path import re +import binascii +try: + from Crypto.Cipher import AES + can_decrypt_frag = True +except ImportError: + can_decrypt_frag = False from .fragment import FragmentFD from .external import FFmpegFD -from ..compat import compat_urlparse +from ..compat import ( + compat_urlparse, + compat_struct_pack, +) from ..utils import ( encodeFilename, sanitize_open, + parse_m3u8_attributes, ) @@ -21,7 +31,7 @@ class HlsFD(FragmentFD): @staticmethod def can_download(manifest): UNSUPPORTED_FEATURES = ( - r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1] + r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] # Live streams heuristic does not always work (e.g. geo restricted to Germany @@ -39,7 +49,9 @@ class HlsFD(FragmentFD): # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 ) - return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) + check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] + check_results.append(not (re.search(r'#EXT-X-KEY:METHOD=AES-128', manifest) and not can_decrypt_frag)) + return all(check_results) def real_download(self, filename, info_dict): man_url = info_dict['url'] @@ -57,36 +69,58 @@ class HlsFD(FragmentFD): fd.add_progress_hook(ph) return fd.real_download(filename, info_dict) - fragment_urls = [] + total_frags = 0 for line in s.splitlines(): line = line.strip() if line and not line.startswith('#'): - segment_url = ( - line - if re.match(r'^https?://', line) - else compat_urlparse.urljoin(man_url, line)) - fragment_urls.append(segment_url) - # We only download the first fragment during the test - if self.params.get('test', False): - break + total_frags += 1 ctx = { 'filename': filename, - 'total_frags': len(fragment_urls), + 'total_frags': total_frags, } self._prepare_and_start_frag_download(ctx) + i = 0 + media_sequence = 0 + decrypt_info = {'METHOD': 'NONE'} frags_filenames = [] - for i, frag_url in enumerate(fragment_urls): - frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) - success = ctx['dl'].download(frag_filename, {'url': frag_url}) - if not success: - return False - down, frag_sanitized = sanitize_open(frag_filename, 'rb') - ctx['dest_stream'].write(down.read()) - down.close() - frags_filenames.append(frag_sanitized) + for line in s.splitlines(): + line = line.strip() + if line: + if not line.startswith('#'): + frag_url = ( + line + if re.match(r'^https?://', line) + else compat_urlparse.urljoin(man_url, line)) + frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) + success = ctx['dl'].download(frag_filename, {'url': frag_url}) + if not success: + return False + down, frag_sanitized = sanitize_open(frag_filename, 'rb') + frag_content = down.read() + down.close() + if decrypt_info['METHOD'] == 'AES-128': + iv = decrypt_info.get('IV') or compat_struct_pack(">8xq", media_sequence) + frag_content = AES.new(decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) + ctx['dest_stream'].write(frag_content) + frags_filenames.append(frag_sanitized) + # We only download the first fragment during the test + if self.params.get('test', False): + break + i += 1 + media_sequence += 1 + elif line.startswith('#EXT-X-KEY'): + decrypt_info = parse_m3u8_attributes(line[11:]) + if decrypt_info['METHOD'] == 'AES-128': + if 'IV' in decrypt_info: + decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:]) + if not re.match(r'^https?://', decrypt_info['URI']): + decrypt_info['URI'] = compat_urlparse.urljoin(man_url, decrypt_info['URI']) + decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read() + elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): + media_sequence = int(line[22:]) self._finish_frag_download(ctx) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index bfd432160..5a2603b50 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -53,6 +53,7 @@ from ..utils import ( mimetype2ext, update_Request, update_url_query, + parse_m3u8_attributes, ) @@ -1150,23 +1151,11 @@ class InfoExtractor(object): }] last_info = None last_media = None - kv_rex = re.compile( - r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)') for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): - last_info = {} - for m in kv_rex.finditer(line): - v = m.group('val') - if v.startswith('"'): - v = v[1:-1] - last_info[m.group('key')] = v + last_info = parse_m3u8_attributes(line) elif line.startswith('#EXT-X-MEDIA:'): - last_media = {} - for m in kv_rex.finditer(line): - v = m.group('val') - if v.startswith('"'): - v = v[1:-1] - last_media[m.group('key')] = v + last_media = parse_m3u8_attributes(line) elif line.startswith('#') or not line.strip(): continue else: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 82f67f6cd..562031fe1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2852,3 +2852,12 @@ def decode_packed_codes(code): return re.sub( r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)], obfucasted_code) + + +def parse_m3u8_attributes(attrib): + info = {} + for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib): + if val.startswith('"'): + val = val[1:-1] + info[key] = val + return info From 6cd64b6806e92b7246aebd89448189180d88db82 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 19 Jun 2016 05:45:48 +0100 Subject: [PATCH 0843/3599] [foxsports] extract http formats --- youtube_dl/extractor/foxsports.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dl/extractor/foxsports.py index df7665176..a3bb98377 100644 --- a/youtube_dl/extractor/foxsports.py +++ b/youtube_dl/extractor/foxsports.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import smuggle_url +from ..utils import ( + smuggle_url, + update_url_query, +) class FoxSportsIE(InfoExtractor): @@ -9,11 +12,15 @@ class FoxSportsIE(InfoExtractor): _TEST = { 'url': 'http://www.foxsports.com/video?vid=432609859715', + 'md5': 'b49050e955bebe32c301972e4012ac17', 'info_dict': { - 'id': 'gA0bHB3Ladz3', - 'ext': 'flv', + 'id': 'i0qKWsk3qJaM', + 'ext': 'mp4', 'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', 'description': 'Courtney Lee talks about Memphis being focused.', + 'upload_date': '20150423', + 'timestamp': 1429761109, + 'uploader': 'NEWA-FNG-FOXSPORTS', }, 'add_ie': ['ThePlatform'], } @@ -28,5 +35,8 @@ class FoxSportsIE(InfoExtractor): r"data-player-config='([^']+)'", webpage, 'data player config'), video_id) - return self.url_result(smuggle_url( - config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True})) + return self.url_result(smuggle_url(update_url_query( + config['releaseURL'], { + 'mbr': 'true', + 'switch': 'http', + }), {'force_smil_url': True})) From 6c83e583b334226965bdf45583c09dbe8bfe9dab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 19 Jun 2016 13:32:08 +0800 Subject: [PATCH 0844/3599] [radiojavan] PEP8 E275 is added in pycodestyle 2.6 See https://github.com/PyCQA/pycodestyle/pull/491 --- youtube_dl/extractor/radiojavan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index 884c28420..ec4fa6e60 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import( +from ..utils import ( unified_strdate, str_to_int, ) From 5839d556e4d491ce940324965eaeecfb843306cc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 19 Jun 2016 23:37:05 +0100 Subject: [PATCH 0845/3599] [theplatform] reduce requests for theplatform feed info extraction --- youtube_dl/extractor/theplatform.py | 54 +++++++++++++++++++---------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 5793ec6ef..07d222ae3 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -277,9 +277,9 @@ class ThePlatformIE(ThePlatformBaseIE): class ThePlatformFeedIE(ThePlatformBaseIE): - _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s' - _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)' - _TEST = { + _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s' + _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))' + _TESTS = [{ # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207 'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207', 'md5': '6e32495b5073ab414471b615c5ded394', @@ -295,32 +295,38 @@ class ThePlatformFeedIE(ThePlatformBaseIE): 'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'], 'uploader': 'NBCU-NEWS', }, - } + }] - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') - provider_id = mobj.group('provider_id') - feed_id = mobj.group('feed_id') - - real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id) - feed = self._download_json(real_url, video_id) - entry = feed['entries'][0] + def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}): + real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query) + entry = self._download_json(real_url, video_id)['entries'][0] formats = [] subtitles = {} first_video_id = None duration = None + asset_types = [] for item in entry['media$content']: - smil_url = item['plfile$url'] + '&mbr=true' + smil_url = item['plfile$url'] cur_video_id = ThePlatformIE._match_id(smil_url) if first_video_id is None: first_video_id = cur_video_id duration = float_or_none(item.get('plfile$duration')) - cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id) - formats.extend(cur_formats) - subtitles = self._merge_subtitles(subtitles, cur_subtitles) + for asset_type in item['plfile$assetTypes']: + if asset_type in asset_types: + continue + asset_types.append(asset_type) + query = { + 'mbr': 'true', + 'formats': item['plfile$format'], + 'assetTypes': asset_type, + } + if asset_type in asset_types_query: + query.update(asset_types_query[asset_type]) + cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query( + smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type) + formats.extend(cur_formats) + subtitles = self._merge_subtitles(subtitles, cur_subtitles) self._sort_formats(formats) @@ -344,5 +350,17 @@ class ThePlatformFeedIE(ThePlatformBaseIE): 'timestamp': timestamp, 'categories': categories, }) + if custom_fields: + ret.update(custom_fields(entry)) return ret + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + provider_id = mobj.group('provider_id') + feed_id = mobj.group('feed_id') + filter_query = mobj.group('filter') + + return self._extract_feed_info(provider_id, feed_id, filter_query, video_id) From 43518503a66d670330a8406829fc30b431420c9c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 19 Jun 2016 23:40:00 +0100 Subject: [PATCH 0846/3599] [cbs,cbsnews,cbssports] reduce requests while extracting all formats --- youtube_dl/extractor/cbs.py | 84 +++++++------------------------ youtube_dl/extractor/cbsnews.py | 31 +++--------- youtube_dl/extractor/cbssports.py | 40 +++++++-------- 3 files changed, 44 insertions(+), 111 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index ac2c7dced..030eeaa65 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -2,16 +2,14 @@ from __future__ import unicode_literals import re -from .theplatform import ThePlatformIE +from .theplatform import ThePlatformFeedIE from ..utils import ( - xpath_text, - xpath_element, int_or_none, find_xpath_attr, ) -class CBSBaseIE(ThePlatformIE): +class CBSBaseIE(ThePlatformFeedIE): def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL') return { @@ -21,9 +19,22 @@ class CBSBaseIE(ThePlatformIE): }] } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] + def _extract_video_info(self, filter_query, video_id): + return self._extract_feed_info( + 'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: { + 'series': entry.get('cbs$SeriesTitle'), + 'season_number': int_or_none(entry.get('cbs$SeasonNumber')), + 'episode': entry.get('cbs$EpisodeTitle'), + 'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')), + }, { + 'StreamPack': { + 'manifest': 'm3u', + } + }) + class CBSIE(CBSBaseIE): - _VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))' + _VALID_URL = r'(?:cbs|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)' _TESTS = [{ 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', @@ -38,25 +49,7 @@ class CBSIE(CBSBaseIE): 'upload_date': '20131127', 'uploader': 'CBSI-NEW', }, - 'params': { - # rtmp download - 'skip_download': True, - }, - '_skip': 'Blocked outside the US', - }, { - 'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/', - 'info_dict': { - 'id': 'WWF_5KqY3PK1', - 'display_id': 'st-vincent', - 'ext': 'flv', - 'title': 'Live on Letterman - St. Vincent', - 'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.', - 'duration': 3221, - }, - 'params': { - # rtmp download - 'skip_download': True, - }, + 'expected_warnings': ['Failed to download m3u8 information'], '_skip': 'Blocked outside the US', }, { 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', @@ -68,44 +61,5 @@ class CBSIE(CBSBaseIE): TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' def _real_extract(self, url): - content_id, display_id = re.match(self._VALID_URL, url).groups() - if not content_id: - webpage = self._download_webpage(url, display_id) - content_id = self._search_regex( - [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], - webpage, 'content id') - items_data = self._download_xml( - 'http://can.cbs.com/thunder/player/videoPlayerService.php', - content_id, query={'partner': 'cbs', 'contentId': content_id}) - video_data = xpath_element(items_data, './/item') - title = xpath_text(video_data, 'videoTitle', 'title', True) - - subtitles = {} - formats = [] - for item in items_data.findall('.//item'): - pid = xpath_text(item, 'pid') - if not pid: - continue - tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid - if '.m3u8' in xpath_text(item, 'contentUrl', default=''): - tp_release_url += '&manifest=m3u' - tp_formats, tp_subtitles = self._extract_theplatform_smil( - tp_release_url, content_id, 'Downloading %s SMIL data' % pid) - formats.extend(tp_formats) - subtitles = self._merge_subtitles(subtitles, tp_subtitles) - self._sort_formats(formats) - - info = self.get_metadata('dJ5BDC/media/guid/2198311517/%s' % content_id, content_id) - info.update({ - 'id': content_id, - 'display_id': display_id, - 'title': title, - 'series': xpath_text(video_data, 'seriesTitle'), - 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), - 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), - 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), - 'thumbnail': xpath_text(video_data, 'previewImageURL'), - 'formats': formats, - 'subtitles': subtitles, - }) - return info + content_id = self._match_id(url) + return self._extract_video_info('byGuid=%s' % content_id, content_id) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 79ddc20a0..387537e76 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -30,9 +30,12 @@ class CBSNewsIE(CBSBaseIE): { 'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', 'info_dict': { - 'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack', + 'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y', 'ext': 'mp4', 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', + 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7', + 'upload_date': '19700101', + 'uploader': 'CBSI-NEW', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 205, 'subtitles': { @@ -58,30 +61,8 @@ class CBSNewsIE(CBSBaseIE): webpage, 'video JSON info'), video_id) item = video_info['item'] if 'item' in video_info else video_info - title = item.get('articleTitle') or item.get('hed') - duration = item.get('duration') - thumbnail = item.get('mediaImage') or item.get('thumbnail') - - subtitles = {} - formats = [] - for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']: - pid = item.get('media' + format_id) - if not pid: - continue - release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid - tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid) - formats.extend(tp_formats) - subtitles = self._merge_subtitles(subtitles, tp_subtitles) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - } + guid = item['mpxRefId'] + return self._extract_video_info('byGuid=%s' % guid, guid) class CBSNewsLiveVideoIE(InfoExtractor): diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py index 549ae32f3..78ca44b02 100644 --- a/youtube_dl/extractor/cbssports.py +++ b/youtube_dl/extractor/cbssports.py @@ -1,30 +1,28 @@ from __future__ import unicode_literals -import re - -from .common import InfoExtractor +from .cbs import CBSBaseIE -class CBSSportsIE(InfoExtractor): - _VALID_URL = r'https?://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' +class CBSSportsIE(CBSBaseIE): + _VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P<id>\d+)' - _TEST = { - 'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', + _TESTS = [{ + 'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast', 'info_dict': { - 'id': '_d5_GbO8p1sT', - 'ext': 'flv', - 'title': 'US Open flashbacks: 1990s', - 'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.', + 'id': '708337219968', + 'ext': 'mp4', + 'title': 'Ben Simmons the next LeBron? Not so fast', + 'description': 'md5:854294f627921baba1f4b9a990d87197', + 'timestamp': 1466293740, + 'upload_date': '20160618', + 'uploader': 'CBSI-NEW', }, - } + 'params': { + # m3u8 download + 'skip_download': True, + } + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - section = mobj.group('section') - video_id = mobj.group('id') - all_videos = self._download_json( - 'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section, - video_id) - # The json file contains the info of all the videos in the section - video_info = next(v for v in all_videos if v['pcid'] == video_id) - return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform') + video_id = self._match_id(url) + return self._extract_video_info('byId=%s' % video_id, video_id) From 819707920a63946ea1e4f0ae2bf842425d22c2e9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 19 Jun 2016 23:55:19 +0100 Subject: [PATCH 0847/3599] [cbs] fix _VALID_URL --- youtube_dl/extractor/cbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 030eeaa65..21720f084 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -34,7 +34,7 @@ class CBSBaseIE(ThePlatformFeedIE): class CBSIE(CBSBaseIE): - _VALID_URL = r'(?:cbs|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)' + _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)' _TESTS = [{ 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', From 1f749b6658439049b952fdb979acb6c4422a358a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 20 Jun 2016 13:29:13 +0200 Subject: [PATCH 0848/3599] Revert "[jsinterp] Avoid double key lookup for setting new key" This reverts commit 7c05097633138459e9bdf7e10738e021b04689a7. --- youtube_dl/jsinterp.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 4a5a0dbc3..a7440c582 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -131,8 +131,9 @@ class JSInterpreter(object): if variable in local_vars: obj = local_vars[variable] else: - obj = self._objects.setdefault( - variable, self.extract_object(variable)) + if variable not in self._objects: + self._objects[variable] = self.extract_object(variable) + obj = self._objects[variable] if arg_str is None: # Member access @@ -203,7 +204,8 @@ class JSInterpreter(object): argvals = tuple([ int(v) if v.isdigit() else local_vars[v] for v in m.group('args').split(',')]) - self._functions.setdefault(fname, self.extract_function(fname)) + if fname not in self._functions: + self._functions[fname] = self.extract_function(fname) return self._functions[fname](argvals) raise ExtractorError('Unsupported JS expression %r' % expr) From 8369a4fe768b1838f640ad984fbc923037b06c3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Jun 2016 21:55:17 +0700 Subject: [PATCH 0849/3599] [downloader/hls] Simplify and carry long lines --- youtube_dl/downloader/hls.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 1d5f178a0..3b7bb3508 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -50,7 +50,7 @@ class HlsFD(FragmentFD): # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 ) check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] - check_results.append(not (re.search(r'#EXT-X-KEY:METHOD=AES-128', manifest) and not can_decrypt_frag)) + check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest) return all(check_results) def real_download(self, filename, info_dict): @@ -102,8 +102,9 @@ class HlsFD(FragmentFD): frag_content = down.read() down.close() if decrypt_info['METHOD'] == 'AES-128': - iv = decrypt_info.get('IV') or compat_struct_pack(">8xq", media_sequence) - frag_content = AES.new(decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) + iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) + frag_content = AES.new( + decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) ctx['dest_stream'].write(frag_content) frags_filenames.append(frag_sanitized) # We only download the first fragment during the test @@ -117,7 +118,8 @@ class HlsFD(FragmentFD): if 'IV' in decrypt_info: decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:]) if not re.match(r'^https?://', decrypt_info['URI']): - decrypt_info['URI'] = compat_urlparse.urljoin(man_url, decrypt_info['URI']) + decrypt_info['URI'] = compat_urlparse.urljoin( + man_url, decrypt_info['URI']) decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read() elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) From 19e2d1cdeaf36805d72206a6309a6f7421f3c9ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Jun 2016 20:50:01 +0700 Subject: [PATCH 0850/3599] release 2016.06.20 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1b25628bf..64ddb891e 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.19.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.19.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.20*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.20** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.19.1 +[debug] youtube-dl version 2016.06.20 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7c90940c7..5be8238c0 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -44,8 +44,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek** - **ARD:mediathek**: Saarländischer Rundfunk + - **ARD:mediathek** - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a3e2c3079..4a9f162c1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.19.1' +__version__ = '2016.06.20' From feef925f49c80fc125ff24f61a144af902a648d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Jun 2016 22:40:22 +0700 Subject: [PATCH 0851/3599] [streamcloud] Capture error message (#9840) --- youtube_dl/extractor/streamcloud.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py index 58560ec64..6a6bb90c4 100644 --- a/youtube_dl/extractor/streamcloud.py +++ b/youtube_dl/extractor/streamcloud.py @@ -6,7 +6,6 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, - sanitized_Request, urlencode_postdata, ) @@ -45,20 +44,26 @@ class StreamcloudIE(InfoExtractor): (?:id="[^"]+"\s+)? value="([^"]*)" ''', orig_webpage) - post = urlencode_postdata(fields) self._sleep(12, video_id) - headers = { - b'Content-Type': b'application/x-www-form-urlencoded', - } - req = sanitized_Request(url, post, headers) webpage = self._download_webpage( - req, video_id, note='Downloading video page ...') - title = self._html_search_regex( - r'<h1[^>]*>([^<]+)<', webpage, 'title') - video_url = self._search_regex( - r'file:\s*"([^"]+)"', webpage, 'video URL') + url, video_id, data=urlencode_postdata(fields), headers={ + b'Content-Type': b'application/x-www-form-urlencoded', + }) + + try: + title = self._html_search_regex( + r'<h1[^>]*>([^<]+)<', webpage, 'title') + video_url = self._search_regex( + r'file:\s*"([^"]+)"', webpage, 'video URL') + except ExtractorError: + message = self._html_search_regex( + r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>', + webpage, 'message', default=None, group='message') + if message: + raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) + raise thumbnail = self._search_regex( r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False) From cdfc187cd5bd163e7e67ca0c02108380cc06c180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Jun 2016 22:40:33 +0700 Subject: [PATCH 0852/3599] [cbs] Remove unused import --- youtube_dl/extractor/cbs.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 21720f084..a23173d6f 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .theplatform import ThePlatformFeedIE from ..utils import ( int_or_none, From e4f90ea0a72711f6577d4cde1dd145f03ab34803 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 21 Jun 2016 17:55:53 +0800 Subject: [PATCH 0853/3599] [svt] Fix extraction for SVTPlay (closes #9809) --- youtube_dl/extractor/svt.py | 63 ++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 2ab30e45f..6526a6345 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -6,17 +6,14 @@ import re from .common import InfoExtractor from ..utils import ( determine_ext, + dict_get, ) class SVTBaseIE(InfoExtractor): - def _extract_video(self, url, video_id): - info = self._download_json(url, video_id) + def _extract_video(self, info, video_id): + video_info = self._get_video_info(info) - title = info['context']['title'] - thumbnail = info['context'].get('thumbnailImage') - - video_info = info['video'] formats = [] for vr in video_info['videoReferences']: player_type = vr.get('playerType') @@ -43,22 +40,25 @@ class SVTBaseIE(InfoExtractor): self._sort_formats(formats) subtitles = {} - subtitle_references = video_info.get('subtitleReferences') + subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences')) if isinstance(subtitle_references, list): for sr in subtitle_references: subtitle_url = sr.get('url') + subtitle_lang = sr.get('language', 'sv') if subtitle_url: - subtitles.setdefault('sv', []).append({'url': subtitle_url}) + if determine_ext(subtitle_url) == 'm3u8': + # TODO(yan12125): handle WebVTT in m3u8 manifests + continue + + subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url}) duration = video_info.get('materialLength') age_limit = 18 if video_info.get('inappropriateForChildren') else 0 return { 'id': video_id, - 'title': title, 'formats': formats, 'subtitles': subtitles, - 'thumbnail': thumbnail, 'duration': duration, 'age_limit': age_limit, } @@ -68,11 +68,11 @@ class SVTIE(SVTBaseIE): _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)' _TEST = { 'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false', - 'md5': '9648197555fc1b49e3dc22db4af51d46', + 'md5': '33e9a5d8f646523ce0868ecfb0eed77d', 'info_dict': { 'id': '2900353', - 'ext': 'flv', - 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)', + 'ext': 'mp4', + 'title': 'Stjärnorna skojar till det - under SVT-intervjun', 'duration': 27, 'age_limit': 0, }, @@ -85,18 +85,26 @@ class SVTIE(SVTBaseIE): if mobj: return mobj.group('url') + def _get_video_info(self, info): + return info['video'] + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) widget_id = mobj.group('widget_id') article_id = mobj.group('id') - return self._extract_video( + + info = self._download_json( 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id), article_id) + info_dict = self._extract_video(info, article_id) + info_dict['title'] = info['context']['title'] + return info_dict + class SVTPlayIE(SVTBaseIE): IE_DESC = 'SVT Play and Öppet arkiv' - _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', 'md5': '2b6704fe4a28801e1a098bbf3c5ac611', @@ -115,10 +123,23 @@ class SVTPlayIE(SVTBaseIE): }, } + def _get_video_info(self, info): + return info['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'] + def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - host = mobj.group('host') - return self._extract_video( - 'http://www.%s.se/video/%s?output=json' % (host, video_id), - video_id) + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + data = self._parse_json(self._search_regex( + r'root\["__svtplay"\]\s*=\s*([^;]+);', webpage, 'embedded data'), video_id) + + thumbnail = self._og_search_thumbnail(webpage) + + info_dict = self._extract_video(data, video_id) + info_dict.update({ + 'title': data['context']['dispatcher']['stores']['MetaStore']['title'], + 'thumbnail': thumbnail, + }) + + return info_dict From 1ac5705f62aa3f6fdb6f2a97fbd24594010b7598 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 21 Jun 2016 13:36:56 +0100 Subject: [PATCH 0854/3599] [gamespot] extract all formats --- youtube_dl/extractor/gamespot.py | 91 ++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 4ffdd7515..621257c9f 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -1,19 +1,19 @@ from __future__ import unicode_literals import re -import json -from .common import InfoExtractor +from .once import OnceIE from ..compat import ( compat_urllib_parse_unquote, - compat_urlparse, ) from ..utils import ( unescapeHTML, + url_basename, + dict_get, ) -class GameSpotIE(InfoExtractor): +class GameSpotIE(OnceIE): _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?' _TESTS = [{ 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', @@ -39,29 +39,73 @@ class GameSpotIE(InfoExtractor): webpage = self._download_webpage(url, page_id) data_video_json = self._search_regex( r'data-video=["\'](.*?)["\']', webpage, 'data video') - data_video = json.loads(unescapeHTML(data_video_json)) + data_video = self._parse_json(unescapeHTML(data_video_json), page_id) streams = data_video['videoStreams'] + manifest_url = None formats = [] f4m_url = streams.get('f4m_stream') - if f4m_url is not None: - # Transform the manifest url to a link to the mp4 files - # they are used in mobile devices. - f4m_path = compat_urlparse.urlparse(f4m_url).path - QUALITIES_RE = r'((,\d+)+,?)' - qualities = self._search_regex(QUALITIES_RE, f4m_path, 'qualities').strip(',').split(',') - http_path = f4m_path[1:].split('/', 1)[1] - http_template = re.sub(QUALITIES_RE, r'%s', http_path) - http_template = http_template.replace('.csmil/manifest.f4m', '') - http_template = compat_urlparse.urljoin( - 'http://video.gamespotcdn.com/', http_template) - for q in qualities: - formats.append({ - 'url': http_template % q, - 'ext': 'mp4', - 'format_id': q, - }) - else: + if f4m_url: + manifest_url = f4m_url + formats.extend(self._extract_f4m_formats( + f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False)) + m3u8_url = streams.get('m3u8_stream') + if m3u8_url: + manifest_url = m3u8_url + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, page_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False) + formats.extend(m3u8_formats) + progressive_url = dict_get( + streams, ('progressive_hd', 'progressive_high', 'progressive_low')) + if progressive_url and manifest_url: + qualities_basename = self._search_regex( + '/([^/]+)\.csmil/', + manifest_url, 'qualities basename', default=None) + if qualities_basename: + QUALITIES_RE = r'((,\d+)+,?)' + qualities = self._search_regex( + QUALITIES_RE, qualities_basename, + 'qualities', default=None) + if qualities: + qualities = list(map(lambda q: int(q), qualities.strip(',').split(','))) + qualities.sort() + http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename) + http_url_basename = url_basename(progressive_url) + if m3u8_formats: + self._sort_formats(m3u8_formats) + m3u8_formats = list(filter( + lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + m3u8_formats)) + if len(qualities) == len(m3u8_formats): + for q, m3u8_format in zip(qualities, m3u8_formats): + f = m3u8_format.copy() + f.update({ + 'url': progressive_url.replace( + http_url_basename, http_template % q), + 'format_id': f['format_id'].replace('hls', 'http'), + 'protocol': 'http', + }) + formats.append(f) + else: + for q in qualities: + formats.append({ + 'url': progressive_url.replace( + http_url_basename, http_template % q), + 'ext': 'mp4', + 'format_id': 'http-%d' % q, + 'tbr': q, + }) + + onceux_json = self._search_regex( + r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None) + if onceux_json: + onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') + if onceux_url: + formats.extend(self._extract_once_formats(re.sub( + r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', ''))) + + if not formats: for quality in ['sd', 'hd']: # It's actually a link to a flv file flv_url = streams.get('f4m_{0}'.format(quality)) @@ -71,6 +115,7 @@ class GameSpotIE(InfoExtractor): 'ext': 'flv', 'format_id': quality, }) + self._sort_formats(formats) return { 'id': data_video['guid'], From 7cfc1e2a104977c41f6008885b36b96bcb2b146e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Jun 2016 22:31:41 +0700 Subject: [PATCH 0855/3599] [gametrailers] Remove extractor gametrailers closed (see http://www.polygon.com/2016/2/8/10944452/gametrailers-shuts-down-after-13-year-run) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/gametrailers.py | 62 ---------------------------- 2 files changed, 63 deletions(-) delete mode 100644 youtube_dl/extractor/gametrailers.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b1b04f2fc..4e2a2f2e9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -285,7 +285,6 @@ from .gameone import ( from .gamersyde import GamersydeIE from .gamespot import GameSpotIE from .gamestar import GameStarIE -from .gametrailers import GametrailersIE from .gazeta import GazetaIE from .gdcvault import GDCVaultIE from .generic import GenericIE diff --git a/youtube_dl/extractor/gametrailers.py b/youtube_dl/extractor/gametrailers.py deleted file mode 100644 index 1e7948ab8..000000000 --- a/youtube_dl/extractor/gametrailers.py +++ /dev/null @@ -1,62 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_age_limit, - url_basename, -) - - -class GametrailersIE(InfoExtractor): - _VALID_URL = r'https?://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)' - - _TEST = { - 'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review', - 'md5': 'f28c4efa0bdfaf9b760f6507955b6a6a', - 'info_dict': { - 'id': '2983958', - 'ext': 'mp4', - 'display_id': '116437-Just-Cause-3-Review', - 'title': 'Just Cause 3 - Review', - 'description': 'It\'s a lot of fun to shoot at things and then watch them explode in Just Cause 3, but should there be more to the experience than that?', - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - title = self._html_search_regex( - r'<title>(.+?)\|', webpage, 'title').strip() - embed_url = self._proto_relative_url( - self._search_regex( - r'src=\'(//embed.gametrailers.com/embed/[^\']+)\'', webpage, - 'embed url'), - scheme='http:') - video_id = url_basename(embed_url) - embed_page = self._download_webpage(embed_url, video_id) - embed_vars_json = self._search_regex( - r'(?s)var embedVars = (\{.*?\})\s*</script>', embed_page, - 'embed vars') - info = self._parse_json(embed_vars_json, video_id) - - formats = [] - for media in info['media']: - if media['mediaPurpose'] == 'play': - formats.append({ - 'url': media['uri'], - 'height': media['height'], - 'width:': media['width'], - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'formats': formats, - 'thumbnail': info.get('thumbUri'), - 'description': self._og_search_description(webpage), - 'duration': int_or_none(info.get('videoLengthInSeconds')), - 'age_limit': parse_age_limit(info.get('audienceRating')), - } From ca74c90bf55cfb16f5eadc3a63e9389202ee80f5 Mon Sep 17 00:00:00 2001 From: Shai Coleman <shai.coleman@storyful.com> Date: Wed, 22 Jun 2016 12:52:15 +0100 Subject: [PATCH 0856/3599] Fix issue downloading facebook videos youtube-dl expects the format items to be returned as a list, but when there's only one item Facebook returns a dict instead, this wraps the dict in a list if necessary --- youtube_dl/extractor/facebook.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index f5bbd39d2..9b87b37ae 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -239,6 +239,8 @@ class FacebookIE(InfoExtractor): formats = [] for format_id, f in video_data.items(): + if f and isinstance(f, dict): + f = [f] if not f or not isinstance(f, list): continue for quality in ('sd', 'hd'): From 23bdae0955ae5e0adaf6212bb7aa6cec77ae4d1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Jun 2016 23:36:07 +0700 Subject: [PATCH 0857/3599] [svt] Various improvements + [svt:play] Add fallback path looking for video id and fix extraction for oppetarkiv * [svt:base] Detect geo restriction * [svt:base] Extract series related metadata --- youtube_dl/extractor/svt.py | 82 +++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 6526a6345..67f56fab8 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -7,13 +7,13 @@ from .common import InfoExtractor from ..utils import ( determine_ext, dict_get, + int_or_none, + try_get, ) class SVTBaseIE(InfoExtractor): - def _extract_video(self, info, video_id): - video_info = self._get_video_info(info) - + def _extract_video(self, video_info, video_id): formats = [] for vr in video_info['videoReferences']: player_type = vr.get('playerType') @@ -37,6 +37,8 @@ class SVTBaseIE(InfoExtractor): 'format_id': player_type, 'url': vurl, }) + if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): + self.raise_geo_restricted('This video is only available in Sweden') self._sort_formats(formats) subtitles = {} @@ -52,15 +54,32 @@ class SVTBaseIE(InfoExtractor): subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url}) - duration = video_info.get('materialLength') - age_limit = 18 if video_info.get('inappropriateForChildren') else 0 + title = video_info.get('title') + + series = video_info.get('programTitle') + season_number = int_or_none(video_info.get('season')) + episode = video_info.get('episodeTitle') + episode_number = int_or_none(video_info.get('episodeNumber')) + + duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) + age_limit = None + adult = dict_get( + video_info, ('inappropriateForChildren', 'blockedForChildren'), + skip_false_values=False) + if adult is not None: + age_limit = 18 if adult else 0 return { 'id': video_id, + 'title': title, 'formats': formats, 'subtitles': subtitles, 'duration': duration, 'age_limit': age_limit, + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, } @@ -85,9 +104,6 @@ class SVTIE(SVTBaseIE): if mobj: return mobj.group('url') - def _get_video_info(self, info): - return info['video'] - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) widget_id = mobj.group('widget_id') @@ -97,7 +113,7 @@ class SVTIE(SVTBaseIE): 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id), article_id) - info_dict = self._extract_video(info, article_id) + info_dict = self._extract_video(info['video'], article_id) info_dict['title'] = info['context']['title'] return info_dict @@ -105,7 +121,7 @@ class SVTIE(SVTBaseIE): class SVTPlayIE(SVTBaseIE): IE_DESC = 'SVT Play and Öppet arkiv' _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', 'md5': '2b6704fe4a28801e1a098bbf3c5ac611', 'info_dict': { @@ -121,25 +137,47 @@ class SVTPlayIE(SVTBaseIE): }] }, }, - } - - def _get_video_info(self, info): - return info['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'] + }, { + # geo restricted to Sweden + 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - data = self._parse_json(self._search_regex( - r'root\["__svtplay"\]\s*=\s*([^;]+);', webpage, 'embedded data'), video_id) + data = self._parse_json( + self._search_regex( + r'root\["__svtplay"\]\s*=\s*([^;]+);', + webpage, 'embedded data', default='{}'), + video_id, fatal=False) thumbnail = self._og_search_thumbnail(webpage) - info_dict = self._extract_video(data, video_id) - info_dict.update({ - 'title': data['context']['dispatcher']['stores']['MetaStore']['title'], - 'thumbnail': thumbnail, - }) + if data: + video_info = try_get( + data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'], + dict) + if video_info: + info_dict = self._extract_video(video_info, video_id) + info_dict.update({ + 'title': data['context']['dispatcher']['stores']['MetaStore']['title'], + 'thumbnail': thumbnail, + }) + return info_dict - return info_dict + video_id = self._search_regex( + r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', + webpage, 'video id', default=None) + + if video_id: + data = self._download_json( + 'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id) + info_dict = self._extract_video(data, video_id) + if not info_dict.get('title'): + info_dict['title'] = re.sub( + r'\s*\|\s*.+?$', '', + info_dict.get('episode') or self._og_search_title(webpage)) + return info_dict From cf40fdf5c1da33180b5a1b333784c529bc504b6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Jun 2016 23:43:24 +0700 Subject: [PATCH 0858/3599] release 2016.06.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 1 - youtube_dl/version.py | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 64ddb891e..e17625f21 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.20*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.20** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.20 +[debug] youtube-dl version 2016.06.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5be8238c0..96cc407db 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -248,7 +248,6 @@ - **Gamersyde** - **GameSpot** - **GameStar** - - **Gametrailers** - **Gazeta** - **GDCVault** - **generic**: Generic downloader that works on some sites diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4a9f162c1..d2152b2f1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.20' +__version__ = '2016.06.22' From 6ae938b295eaca06944f66faa7c6d668c6c5866c Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Tue, 21 Jun 2016 11:36:54 +0200 Subject: [PATCH 0859/3599] [Vine] Extract view count --- youtube_dl/extractor/vine.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index a6a6cc479..5b801849c 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -24,6 +24,7 @@ class VineIE(InfoExtractor): 'upload_date': '20130519', 'uploader': 'Jack Dorsey', 'uploader_id': '76', + 'view_count': int, 'like_count': int, 'comment_count': int, 'repost_count': int, @@ -39,6 +40,7 @@ class VineIE(InfoExtractor): 'upload_date': '20140815', 'uploader': 'Mars Ruiz', 'uploader_id': '1102363502380728320', + 'view_count': int, 'like_count': int, 'comment_count': int, 'repost_count': int, @@ -54,6 +56,7 @@ class VineIE(InfoExtractor): 'upload_date': '20130430', 'uploader': 'Z3k3', 'uploader_id': '936470460173008896', + 'view_count': int, 'like_count': int, 'comment_count': int, 'repost_count': int, @@ -71,6 +74,7 @@ class VineIE(InfoExtractor): 'upload_date': '20150705', 'uploader': 'Pimry_zaa', 'uploader_id': '1135760698325307392', + 'view_count': int, 'like_count': int, 'comment_count': int, 'repost_count': int, @@ -109,6 +113,7 @@ class VineIE(InfoExtractor): 'upload_date': unified_strdate(data.get('created')), 'uploader': username, 'uploader_id': data.get('userIdStr'), + 'view_count': int_or_none(data.get('loops', {}).get('count')), 'like_count': int_or_none(data.get('likes', {}).get('count')), 'comment_count': int_or_none(data.get('comments', {}).get('count')), 'repost_count': int_or_none(data.get('reposts', {}).get('count')), From 169d836feb9d796205a02713db33eafcbb49f1e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 22 Jun 2016 19:13:46 +0200 Subject: [PATCH 0860/3599] lazy-extractors: Fix after commit 6e6b9f600f2f447604f6108fb6486b73cc25def1 The problem was in the following code: class ArteTVPlus7IE(ArteTVBaseIE): ... @classmethod def suitable(cls, url): return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url) And its sublcasses like ArteTVCinemaIE. Since in the lazy_extractors.py file ArteTVCinemaIE was not a subclass of ArteTVPlus7IE, super(ArteTVPlus7IE, cls) failed. To fix it we have to make it a subclass. Since the order of _ALL_CLASSES is arbitrary we must sort them so that the base classes are defined first. We also must add base classes like YoutubeBaseInfoExtractor. --- devscripts/make_lazy_extractors.py | 47 ++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index b5a8b9190..9a79c2bc5 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -14,15 +14,17 @@ if os.path.exists(lazy_extractors_filename): os.remove(lazy_extractors_filename) from youtube_dl.extractor import _ALL_CLASSES -from youtube_dl.extractor.common import InfoExtractor +from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor with open('devscripts/lazy_load_template.py', 'rt') as f: module_template = f.read() -module_contents = [module_template + '\n' + getsource(InfoExtractor.suitable)] +module_contents = [ + module_template + '\n' + getsource(InfoExtractor.suitable) + '\n', + 'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n'] ie_template = ''' -class {name}(LazyLoadExtractor): +class {name}({bases}): _VALID_URL = {valid_url!r} _module = '{module}' ''' @@ -34,10 +36,20 @@ make_valid_template = ''' ''' +def get_base_name(base): + if base is InfoExtractor: + return 'LazyLoadExtractor' + elif base is SearchInfoExtractor: + return 'LazyLoadSearchExtractor' + else: + return base.__name__ + + def build_lazy_ie(ie, name): valid_url = getattr(ie, '_VALID_URL', None) s = ie_template.format( name=name, + bases=', '.join(map(get_base_name, ie.__bases__)), valid_url=valid_url, module=ie.__module__) if ie.suitable.__func__ is not InfoExtractor.suitable.__func__: @@ -47,12 +59,35 @@ def build_lazy_ie(ie, name): s += make_valid_template.format(valid_url=ie._make_valid_url()) return s +# find the correct sorting and add the required base classes so that sublcasses +# can be correctly created +classes = _ALL_CLASSES[:-1] +ordered_cls = [] +while classes: + for c in classes[:]: + bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor)) + stop = False + for b in bases: + if b not in classes and b not in ordered_cls: + if b.__name__ == 'GenericIE': + exit() + classes.insert(0, b) + stop = True + if stop: + break + if all(b in ordered_cls for b in bases): + ordered_cls.append(c) + classes.remove(c) + break +ordered_cls.append(_ALL_CLASSES[-1]) + names = [] -for ie in list(sorted(_ALL_CLASSES[:-1], key=lambda cls: cls.ie_key())) + _ALL_CLASSES[-1:]: - name = ie.ie_key() + 'IE' +for ie in ordered_cls: + name = ie.__name__ src = build_lazy_ie(ie, name) module_contents.append(src) - names.append(name) + if ie in _ALL_CLASSES: + names.append(name) module_contents.append( '_ALL_CLASSES = [{0}]'.format(', '.join(names))) From c143ddce5d1e24697f891292ea865e6ed499f162 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Jun 2016 00:51:36 +0700 Subject: [PATCH 0861/3599] [vimeo] Override original URL only when necessary --- youtube_dl/extractor/vimeo.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index c52986af6..1f163d6a4 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -227,8 +227,6 @@ class VimeoIE(VimeoBaseInfoExtractor): { 'url': 'http://vimeo.com/channels/keypeele/75629013', 'md5': '2f86a05afe9d7abc0b9126d229bbe15d', - 'note': 'Video is freely available via original URL ' - 'and protected with password when accessed via http://vimeo.com/75629013', 'info_dict': { 'id': '75629013', 'ext': 'mp4', @@ -272,7 +270,7 @@ class VimeoIE(VimeoBaseInfoExtractor): { # contains original format 'url': 'https://vimeo.com/33951933', - 'md5': '53c688fa95a55bf4b7293d37a89c5c53', + 'md5': '2d9f5475e0537f013d0073e812ab89e6', 'info_dict': { 'id': '33951933', 'ext': 'mp4', @@ -284,6 +282,29 @@ class VimeoIE(VimeoBaseInfoExtractor): 'description': 'md5:ae23671e82d05415868f7ad1aec21147', }, }, + { + # only available via https://vimeo.com/channels/tributes/6213729 and + # not via https://vimeo.com/6213729 + 'url': 'https://vimeo.com/channels/tributes/6213729', + 'info_dict': { + 'id': '6213729', + 'ext': 'mp4', + 'title': 'Vimeo Tribute: The Shining', + 'uploader': 'Casey Donahue', + 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/caseydonahue', + 'uploader_id': 'caseydonahue', + 'upload_date': '20090821', + 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download JSON metadata'], + }, + { + 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741', + 'only_matching': True, + }, { 'url': 'https://vimeo.com/109815029', 'note': 'Video not completely processed, "failed" seed status', @@ -369,7 +390,7 @@ class VimeoIE(VimeoBaseInfoExtractor): orig_url = url if mobj.group('pro') or mobj.group('player'): url = 'https://player.vimeo.com/video/' + video_id - else: + elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information From dfc8f46e1c0e47a3b080d2e38d7d6da279f18fd2 Mon Sep 17 00:00:00 2001 From: Purdea Andrei <andrei@purdea.ro> Date: Wed, 22 Jun 2016 00:34:57 +0300 Subject: [PATCH 0862/3599] [vimeo:channel] Add video id to url_result This will allow us to decide much faster that we don't want an already archived video, and will allow having to download webpages for each video that has already been downloaded, thus significantly speeding up the archival of channels that have no new content. --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 1f163d6a4..32490a8ed 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -652,7 +652,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): yield self._extract_list_title(webpage) for video_id in re.findall(r'id="clip_(\d+?)"', webpage): - yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo') + yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo', video_id=video_id) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: break From c8e3e0974b4ffb6792694336664f90eff38fc762 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Jun 2016 01:28:36 +0700 Subject: [PATCH 0863/3599] [vimeo:channel] Improve playlist extraction --- youtube_dl/extractor/vimeo.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 32490a8ed..26a3d9931 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -146,7 +146,7 @@ class VimeoIE(VimeoBaseInfoExtractor): \. )? vimeo(?P<pro>pro)?\.com/ - (?!channels/[^/?#]+/?(?:$|[?#])|[^/]+/review/|(?:album|ondemand)/) + (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) (?:.*?/)? (?: (?: @@ -314,6 +314,10 @@ class VimeoIE(VimeoBaseInfoExtractor): 'url': 'https://vimeo.com/groups/travelhd/videos/22439234', 'only_matching': True, }, + { + 'url': 'https://vimeo.com/album/2632481/video/79010983', + 'only_matching': True, + }, { # source file returns 403: Forbidden 'url': 'https://vimeo.com/7809605', @@ -651,8 +655,21 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): webpage = self._login_list_password(page_url, list_id, webpage) yield self._extract_list_title(webpage) - for video_id in re.findall(r'id="clip_(\d+?)"', webpage): - yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo', video_id=video_id) + # Try extracting href first since not all videos are available via + # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729) + clips = re.findall( + r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage) + if clips: + for video_id, video_url in clips: + yield self.url_result( + compat_urlparse.urljoin(base_url, video_url), + VimeoIE.ie_key(), video_id=video_id) + # More relaxed fallback + else: + for video_id in re.findall(r'id=["\']clip_(\d+)', webpage): + yield self.url_result( + 'https://vimeo.com/%s' % video_id, + VimeoIE.ie_key(), video_id=video_id) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: break @@ -689,7 +706,7 @@ class VimeoUserIE(VimeoChannelIE): class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)' + _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)/?(?:$|[?#])' _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>' _TESTS = [{ 'url': 'https://vimeo.com/album/2632481', From b5eab86c2424ec04d17fac5de9d15574320ea8f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Jun 2016 01:56:58 +0700 Subject: [PATCH 0864/3599] [vimeo:album] Impove _VALID_URL --- youtube_dl/extractor/vimeo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 26a3d9931..8ba3f55f4 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -706,7 +706,7 @@ class VimeoUserIE(VimeoChannelIE): class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)/?(?:$|[?#])' + _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))' _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>' _TESTS = [{ 'url': 'https://vimeo.com/album/2632481', @@ -726,6 +726,9 @@ class VimeoAlbumIE(VimeoChannelIE): 'params': { 'videopassword': 'youtube-dl', } + }, { + 'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail', + 'only_matching': True, }] def _page_url(self, base_url, pagenum): From 089657ed1f6edcdb10a958a8cd7d91b4888e41eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Jun 2016 02:00:03 +0700 Subject: [PATCH 0865/3599] [vimeo:album] Add paged example URL --- youtube_dl/extractor/vimeo.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 8ba3f55f4..4bdeb1187 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -729,6 +729,10 @@ class VimeoAlbumIE(VimeoChannelIE): }, { 'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail', 'only_matching': True, + }, { + # TODO: respect page number + 'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail', + 'only_matching': True, }] def _page_url(self, base_url, pagenum): From 75ca6bcee2466cb9ca3dc4d1ca35b56a59f6cc4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Jun 2016 04:17:13 +0700 Subject: [PATCH 0866/3599] [vk] Workaround buggy new.vk.com Set-Cookie headers --- youtube_dl/extractor/vk.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 79c819bc3..4e8ec0f86 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re import json +import sys from .common import InfoExtractor from ..compat import compat_str @@ -190,7 +191,7 @@ class VKIE(InfoExtractor): if username is None: return - login_page = self._download_webpage( + login_page, url_handle = self._download_webpage_handle( 'https://vk.com', None, 'Downloading login page') login_form = self._hidden_inputs(login_page) @@ -200,6 +201,22 @@ class VKIE(InfoExtractor): 'pass': password.encode('cp1251'), }) + # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header + # and expects the first one to be set rather than second (see + # https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201). + # As of RFC6265 the newer one cookie should be set into cookie store + # what actually happens. + # We will workaround this VK issue by resetting the remixlhk cookie to + # the first one manually. + cookies = url_handle.headers.get('Set-Cookie') + if sys.version_info[0] >= 3: + cookies = cookies.encode('iso-8859-1') + cookies = cookies.decode('utf-8') + remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) + if remixlhk: + value, domain = remixlhk.groups() + self._set_cookie(domain, 'remixlhk', value) + request = sanitized_Request( 'https://login.vk.com/?act=login', urlencode_postdata(login_form)) From f2bb8c036a0b1feab726321bd877544cb973d7cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Jun 2016 04:18:43 +0700 Subject: [PATCH 0867/3599] [vk] Modernize --- youtube_dl/extractor/vk.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 4e8ec0f86..f8d07beaf 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -217,11 +217,10 @@ class VKIE(InfoExtractor): value, domain = remixlhk.groups() self._set_cookie(domain, 'remixlhk', value) - request = sanitized_Request( - 'https://login.vk.com/?act=login', - urlencode_postdata(login_form)) login_page = self._download_webpage( - request, None, note='Logging in as %s' % username) + 'https://login.vk.com/?act=login', None, + note='Logging in as %s' % username, + data=urlencode_postdata(login_form)) if re.search(r'onLoginFailed', login_page): raise ExtractorError( From 73843ae8acb378e986ab4e7bb3a525b2f6b53cf2 Mon Sep 17 00:00:00 2001 From: rr- <rr-@sakuya.pl> Date: Wed, 22 Jun 2016 17:24:35 +0200 Subject: [PATCH 0868/3599] [xnxx] fix url regex The pattern has changed from "video123412" to "video-o8xa19". The changes maintain backwards compatibility with old-style URLs. --- youtube_dl/extractor/xnxx.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index 5a41f8ffa..f0d4cb9bc 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -6,17 +6,20 @@ from ..compat import compat_urllib_parse_unquote class XNXXIE(InfoExtractor): - _VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)' - _TEST = { - 'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', - 'md5': '0831677e2b4761795f68d417e0b7b445', + _VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/(.*)' + _TESTS = [{ + 'url': 'http://www.xnxx.com/video-6gqggeb/hd_star-581_sam', + 'md5': '6a2a6aff3f10467d94e572edb7b7deb6', 'info_dict': { - 'id': '1135332', + 'id': '6gqggeb', 'ext': 'flv', - 'title': 'lida » Naked Funny Actress (5)', + 'title': 'HD STAR-581 sam', 'age_limit': 18, - } - } + }, + }, { + 'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 97674f041916860343d804b8b07b73017e1a517f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Jun 2016 04:24:00 +0700 Subject: [PATCH 0869/3599] [xnxx] Replace test --- youtube_dl/extractor/xnxx.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index f0d4cb9bc..1e677a63b 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -8,12 +8,12 @@ from ..compat import compat_urllib_parse_unquote class XNXXIE(InfoExtractor): _VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/(.*)' _TESTS = [{ - 'url': 'http://www.xnxx.com/video-6gqggeb/hd_star-581_sam', - 'md5': '6a2a6aff3f10467d94e572edb7b7deb6', + 'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video', + 'md5': 'ef7ecee5af78f8b03dca2cf31341d3a0', 'info_dict': { - 'id': '6gqggeb', + 'id': '55awb78', 'ext': 'flv', - 'title': 'HD STAR-581 sam', + 'title': 'Skyrim Test Video', 'age_limit': 18, }, }, { From adf1921dc157af23e8b317d6095b88c87a149e2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Jun 2016 04:26:49 +0700 Subject: [PATCH 0870/3599] [xnxx] Improve _VALID_URL (Closes #9858) --- youtube_dl/extractor/xnxx.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index 1e677a63b..bcb140305 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -6,7 +6,7 @@ from ..compat import compat_urllib_parse_unquote class XNXXIE(InfoExtractor): - _VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/(.*)' + _VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/' _TESTS = [{ 'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video', 'md5': 'ef7ecee5af78f8b03dca2cf31341d3a0', @@ -19,6 +19,9 @@ class XNXXIE(InfoExtractor): }, { 'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', 'only_matching': True, + }, { + 'url': 'http://www.xnxx.com/video-55awb78/', + 'only_matching': True, }] def _real_extract(self, url): From 3331a4644d141ba9163ecf08015ccb9e0a5b87f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Jun 2016 04:27:10 +0700 Subject: [PATCH 0871/3599] [vk] Remove unused import --- youtube_dl/extractor/vk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index f8d07beaf..cfc5ffd8b 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -11,7 +11,6 @@ from ..utils import ( ExtractorError, int_or_none, orderedSet, - sanitized_Request, str_to_int, unescapeHTML, unified_strdate, From 96f88e91b7bac15b3a6f1eafb6a66964d2d11a7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Jun 2016 04:29:34 +0700 Subject: [PATCH 0872/3599] release 2016.06.23 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e17625f21..f7d1020d3 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.22** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.23** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.22 +[debug] youtube-dl version 2016.06.23 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d2152b2f1..bff747906 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.22' +__version__ = '2016.06.23' From 22b7ac13ef4e34654bcb3fb3dbb40d2fac9b4278 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Jun 2016 00:13:52 +0100 Subject: [PATCH 0873/3599] [tf1] fix wat id extraction(closes #9862) --- youtube_dl/extractor/tf1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 6c848dc6f..e595c4a69 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -48,6 +48,6 @@ class TF1IE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) wat_id = self._html_search_regex( - r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8}).*?\1', + r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1', webpage, 'wat id', group='id') return self.url_result('wat:%s' % wat_id, 'Wat') From 0437307a41dc97becc807abc9a86070ac9c847d8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Jun 2016 01:36:19 +0100 Subject: [PATCH 0874/3599] [nbc:nbcnews] improve extraction and add msnbc to the extractor --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/nbc.py | 157 +++++++++-------------------- 2 files changed, 50 insertions(+), 108 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4e2a2f2e9..6fc5a18f5 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -480,7 +480,6 @@ from .nbc import ( NBCNewsIE, NBCSportsIE, NBCSportsVPlayerIE, - MSNBCIE, ) from .ndr import ( NDRIE, diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 6b7da1149..f694e210b 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -9,10 +9,6 @@ from ..utils import ( lowercase_escape, smuggle_url, unescapeHTML, - update_url_query, - int_or_none, - HEADRequest, - parse_iso8601, ) @@ -192,9 +188,9 @@ class CSNNEIE(InfoExtractor): class NBCNewsIE(ThePlatformIE): - _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today)\.com/ + _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/ (?:video/.+?/(?P<id>\d+)| - ([^/]+/)*(?P<display_id>[^/?]+)) + ([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+)) ''' _TESTS = [ @@ -216,13 +212,16 @@ class NBCNewsIE(ThePlatformIE): 'ext': 'mp4', 'title': 'How Twitter Reacted To The Snowden Interview', 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', + 'uploader': 'NBCU-NEWS', + 'timestamp': 1401363060, + 'upload_date': '20140529', }, }, { 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156', 'md5': 'fdbf39ab73a72df5896b6234ff98518a', 'info_dict': { - 'id': 'Wjf9EDR3A_60', + 'id': '529953347624', 'ext': 'mp4', 'title': 'FULL EPISODE: Family Business', 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04', @@ -237,6 +236,9 @@ class NBCNewsIE(ThePlatformIE): 'ext': 'mp4', 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', + 'timestamp': 1423104900, + 'uploader': 'NBCU-NEWS', + 'upload_date': '20150205', }, }, { @@ -245,10 +247,12 @@ class NBCNewsIE(ThePlatformIE): 'info_dict': { 'id': '529953347624', 'ext': 'mp4', - 'title': 'Volkswagen U.S. Chief: We \'Totally Screwed Up\'', - 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301', + 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up', + 'description': 'md5:c8be487b2d80ff0594c005add88d8351', + 'upload_date': '20150922', + 'timestamp': 1442917800, + 'uploader': 'NBCU-NEWS', }, - 'expected_warnings': ['http-6000 is not available'] }, { 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788', @@ -260,6 +264,22 @@ class NBCNewsIE(ThePlatformIE): 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', 'upload_date': '20160420', 'timestamp': 1461152093, + 'uploader': 'NBCU-NEWS', + }, + }, + { + 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', + 'md5': '6d236bf4f3dddc226633ce6e2c3f814d', + 'info_dict': { + 'id': '314487875924', + 'ext': 'mp4', + 'title': 'The chaotic GOP immigration vote', + 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1406937606, + 'upload_date': '20140802', + 'uploader': 'NBCU-NEWS', + 'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'], }, }, { @@ -290,105 +310,28 @@ class NBCNewsIE(ThePlatformIE): } else: # "feature" and "nightly-news" pages use theplatform.com - display_id = mobj.group('display_id') - webpage = self._download_webpage(url, display_id) - info = None - bootstrap_json = self._search_regex( - [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', - r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'], - webpage, 'bootstrap json', default=None) - bootstrap = self._parse_json( - bootstrap_json, display_id, transform_source=unescapeHTML) - if 'results' in bootstrap: - info = bootstrap['results'][0]['video'] - elif 'video' in bootstrap: - info = bootstrap['video'] - else: - info = bootstrap - video_id = info['mpxId'] - title = info['title'] - - subtitles = {} - caption_links = info.get('captionLinks') - if caption_links: - for (sub_key, sub_ext) in (('smpte-tt', 'ttml'), ('web-vtt', 'vtt'), ('srt', 'srt')): - sub_url = caption_links.get(sub_key) - if sub_url: - subtitles.setdefault('en', []).append({ - 'url': sub_url, - 'ext': sub_ext, - }) - - formats = [] - for video_asset in info['videoAssets']: - video_url = video_asset.get('publicUrl') - if not video_url: - continue - container = video_asset.get('format') - asset_type = video_asset.get('assetType') or '' - if container == 'ISM' or asset_type == 'FireTV-Once': - continue - elif asset_type == 'OnceURL': - tp_formats, tp_subtitles = self._extract_theplatform_smil( - video_url, video_id) - formats.extend(tp_formats) - subtitles = self._merge_subtitles(subtitles, tp_subtitles) + video_id = mobj.group('mpx_id') + if not video_id.isdigit(): + webpage = self._download_webpage(url, video_id) + info = None + bootstrap_json = self._search_regex( + [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', + r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'], + webpage, 'bootstrap json', default=None) + bootstrap = self._parse_json( + bootstrap_json, video_id, transform_source=unescapeHTML) + if 'results' in bootstrap: + info = bootstrap['results'][0]['video'] + elif 'video' in bootstrap: + info = bootstrap['video'] else: - tbr = int_or_none(video_asset.get('bitRate') or video_asset.get('bitrate'), 1000) - format_id = 'http%s' % ('-%d' % tbr if tbr else '') - video_url = update_url_query( - video_url, {'format': 'redirect'}) - # resolve the url so that we can check availability and detect the correct extension - head = self._request_webpage( - HEADRequest(video_url), video_id, - 'Checking %s url' % format_id, - '%s is not available' % format_id, - fatal=False) - if head: - video_url = head.geturl() - formats.append({ - 'format_id': format_id, - 'url': video_url, - 'width': int_or_none(video_asset.get('width')), - 'height': int_or_none(video_asset.get('height')), - 'tbr': tbr, - 'container': video_asset.get('format'), - }) - self._sort_formats(formats) + info = bootstrap + video_id = info['mpxId'] return { + '_type': 'url_transparent', 'id': video_id, - 'title': title, - 'description': info.get('description'), - 'thumbnail': info.get('thumbnail'), - 'duration': int_or_none(info.get('duration')), - 'timestamp': parse_iso8601(info.get('pubDate') or info.get('pub_date')), - 'formats': formats, - 'subtitles': subtitles, + # http://feed.theplatform.com/f/2E2eJC/nbcnews also works + 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id, + 'ie_key': 'ThePlatformFeed', } - - -class MSNBCIE(InfoExtractor): - # https URLs redirect to corresponding http ones - _VALID_URL = r'https?://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)' - _TEST = { - 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', - 'md5': '6d236bf4f3dddc226633ce6e2c3f814d', - 'info_dict': { - 'id': 'n_hayes_Aimm_140801_272214', - 'ext': 'mp4', - 'title': 'The chaotic GOP immigration vote', - 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', - 'thumbnail': 're:^https?://.*\.jpg$', - 'timestamp': 1406937606, - 'upload_date': '20140802', - 'uploader': 'NBCU-NEWS', - 'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'], - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - embed_url = self._html_search_meta('embedURL', webpage) - return self.url_result(embed_url) From b46eabecd3d6e8ea6dd3dc5a948ecbb65d818205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Jun 2016 09:41:34 +0700 Subject: [PATCH 0875/3599] [jsinterp] Relax JS function regex (Closes #9863) --- youtube_dl/jsinterp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index a7440c582..9737f7002 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -232,7 +232,7 @@ class JSInterpreter(object): def extract_function(self, funcname): func_m = re.search( r'''(?x) - (?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s* + (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s* \((?P<args>[^)]*)\)\s* \{(?P<code>[^}]+)\}''' % ( re.escape(funcname), re.escape(funcname), re.escape(funcname)), From 011bd3221b1541eaef9bb14786da37abe4d74ecb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Jun 2016 09:42:56 +0700 Subject: [PATCH 0876/3599] release 2016.06.23.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 3 +-- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f7d1020d3..62cb18d7d 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.23** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.23.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.23.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.23 +[debug] youtube-dl version 2016.06.23.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 96cc407db..891499f59 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -44,8 +44,8 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - - **ARD:mediathek**: Saarländischer Rundfunk - **ARD:mediathek** + - **ARD:mediathek**: Saarländischer Rundfunk - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -385,7 +385,6 @@ - **MovieFap** - **Moviezine** - **MPORA** - - **MSNBC** - **MTV** - **mtv.de** - **mtviggy.com** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index bff747906..0238dc97c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.23' +__version__ = '2016.06.23.1' From 6e3c2047f8a51da3bac0d4d290d64b1b8bb8f1c2 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Jun 2016 04:34:07 +0100 Subject: [PATCH 0877/3599] [tvp] extract all formats and detect erros --- youtube_dl/extractor/tvp.py | 59 ++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py index a4997cb89..5070082da 100644 --- a/youtube_dl/extractor/tvp.py +++ b/youtube_dl/extractor/tvp.py @@ -4,6 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + determine_ext, + clean_html, + get_element_by_attribute, + ExtractorError, +) class TVPIE(InfoExtractor): @@ -21,7 +27,7 @@ class TVPIE(InfoExtractor): }, }, { 'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', - 'md5': 'c3b15ed1af288131115ff17a17c19dda', + 'md5': 'b0005b542e5b4de643a9690326ab1257', 'info_dict': { 'id': '17916176', 'ext': 'mp4', @@ -53,6 +59,11 @@ class TVPIE(InfoExtractor): webpage = self._download_webpage( 'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id) + error_massage = get_element_by_attribute('class', 'msg error', webpage) + if error_massage: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, clean_html(error_massage)), expected=True) + title = self._search_regex( r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1', webpage, 'title', group='title') @@ -66,24 +77,50 @@ class TVPIE(InfoExtractor): r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None) video_url = self._search_regex( - r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None) - if not video_url: + r'0:{src:([\'"])(?P<url>.*?)\1', webpage, + 'formats', group='url', default=None) + if not video_url or 'material_niedostepny.mp4' in video_url: video_url = self._download_json( 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id, video_id)['video_url'] - ext = video_url.rsplit('.', 1)[-1] - if ext != 'ism/manifest': - if '/' in ext: - ext = 'mp4' + formats = [] + video_url_base = self._search_regex( + r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)', + video_url, 'video base url', default=None) + if video_url_base: + # TODO: Current DASH formats are broken - $Time$ pattern in + # <SegmentTemplate> not implemented yet + # formats.extend(self._extract_mpd_formats( + # video_url_base + '.ism/video.mpd', + # video_id, mpd_id='dash', fatal=False)) + formats.extend(self._extract_f4m_formats( + video_url_base + '.ism/video.f4m', + video_id, f4m_id='hds', fatal=False)) + m3u8_formats = self._extract_m3u8_formats( + video_url_base + '.ism/video.m3u8', video_id, + 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + self._sort_formats(m3u8_formats) + m3u8_formats = list(filter( + lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + m3u8_formats)) + formats.extend(m3u8_formats) + for i, m3u8_format in enumerate(m3u8_formats, 2): + http_url = '%s-%d.mp4' % (video_url_base, i) + if self._is_valid_url(http_url, video_id): + f = m3u8_format.copy() + f.update({ + 'url': http_url, + 'format_id': f['format_id'].replace('hls', 'http'), + 'protocol': 'http', + }) + formats.append(f) + else: formats = [{ 'format_id': 'direct', 'url': video_url, - 'ext': ext, + 'ext': determine_ext(video_url, 'mp4'), }] - else: - m3u8_url = re.sub('([^/]*)\.ism/manifest', r'\1.ism/\1.m3u8', video_url) - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') self._sort_formats(formats) From 494172d2e5b2d5b6f309b42e1a2bd7108aed40de Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Jun 2016 15:49:42 +0100 Subject: [PATCH 0878/3599] [appletrailers] extract info from an alternative source if available(closes #8422)(closes #8422) --- youtube_dl/extractor/appletrailers.py | 53 ++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index be40f85b4..babbd0265 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -7,6 +7,8 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( int_or_none, + parse_duration, + unified_strdate, ) @@ -16,7 +18,8 @@ class AppleTrailersIE(InfoExtractor): _TESTS = [{ 'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', 'info_dict': { - 'id': 'manofsteel', + 'id': '5111', + 'title': 'Man of Steel', }, 'playlist': [ { @@ -70,6 +73,15 @@ class AppleTrailersIE(InfoExtractor): 'id': 'blackthorn', }, 'playlist_mincount': 2, + 'expected_warnings': ['Unable to download JSON metadata'], + }, { + # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json + 'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/', + 'info_dict': { + 'id': '15881', + 'title': 'Kung Fu Panda 3', + }, + 'playlist_mincount': 4, }, { 'url': 'http://trailers.apple.com/ca/metropole/autrui/', 'only_matching': True, @@ -85,6 +97,45 @@ class AppleTrailersIE(InfoExtractor): movie = mobj.group('movie') uploader_id = mobj.group('company') + webpage = self._download_webpage(url, movie) + film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') + film_data = self._download_json( + 'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id, + film_id, fatal=False) + + if film_data: + entries = [] + for clip in film_data.get('clips', []): + clip_title = clip['title'] + + formats = [] + for version, version_data in clip.get('versions', {}).items(): + for size, size_data in version_data.get('sizes', {}).items(): + src = size_data.get('src') + if not src: + continue + formats.append({ + 'format_id': '%s-%s' % (version, size), + 'url': re.sub(r'_(\d+p.mov)', r'_h\1', src), + 'width': int_or_none(size_data.get('width')), + 'height': int_or_none(size_data.get('height')), + 'language': version[:2], + }) + self._sort_formats(formats) + + entries.append({ + 'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(), + 'formats': formats, + 'title': clip_title, + 'thumbnail': clip.get('screen') or clip.get('runtime'), + 'duration': parse_duration(clip.get('runtime') or clip.get('faded')), + 'upload_date': unified_strdate(clip.get('posted')), + 'uploader_id': uploader_id, + }) + + page_data = film_data.get('page', {}) + return self.playlist_result(entries, film_id, page_data.get('movie_title')) + playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') def fix_html(s): From 8065d6c55f02c6f618e8495049f253d311cf347f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Jun 2016 17:22:15 +0100 Subject: [PATCH 0879/3599] [dcn] extend _VALID_URL for awaan.ae and extract all available formats --- youtube_dl/extractor/dcn.py | 47 ++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 5deff5f30..efb8585e8 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -20,7 +20,7 @@ from ..utils import ( class DCNIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?' + _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?' def _real_extract(self, url): show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() @@ -55,30 +55,32 @@ class DCNBaseIE(InfoExtractor): 'is_live': is_live, } - def _extract_video_formats(self, webpage, video_id, entry_protocol): + def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol): formats = [] - m3u8_url = self._html_search_regex( - r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False) - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None)) - - rtsp_url = self._search_regex( - r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) - if rtsp_url: - formats.append({ - 'url': rtsp_url, - 'format_id': 'rtsp', - }) - + format_url_base = 'http' + self._html_search_regex( + [ + r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8', + r'<a[^>]+href="rtsp(://[^"]+)"' + ], webpage, 'format url') + # TODO: Current DASH formats are broken - $Time$ pattern in + # <SegmentTemplate> not implemented yet + # formats.extend(self._extract_mpd_formats( + # format_url_base + '/manifest.mpd', + # video_id, mpd_id='dash', fatal=False)) + formats.extend(self._extract_m3u8_formats( + format_url_base + '/playlist.m3u8', video_id, 'mp4', + m3u8_entry_protocol, m3u8_id='hls', fatal=False)) + formats.extend(self._extract_f4m_formats( + format_url_base + '/manifest.f4m', + video_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) return formats class DCNVideoIE(DCNBaseIE): IE_NAME = 'dcn:video' - _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)' + _TESTS = [{ 'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375', 'info_dict': { @@ -94,7 +96,10 @@ class DCNVideoIE(DCNBaseIE): # m3u8 download 'skip_download': True, }, - } + }, { + 'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -120,7 +125,7 @@ class DCNVideoIE(DCNBaseIE): class DCNLiveIE(DCNBaseIE): IE_NAME = 'dcn:live' - _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)' def _real_extract(self, url): channel_id = self._match_id(url) @@ -147,7 +152,7 @@ class DCNLiveIE(DCNBaseIE): class DCNSeasonIE(InfoExtractor): IE_NAME = 'dcn:season' - _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))' + _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))' _TEST = { 'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A', 'info_dict': From fee70322d76f416c3d68f58abdc73f9d3960083e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 23 Jun 2016 19:03:34 +0100 Subject: [PATCH 0880/3599] [appletrailers] correct thumbnail fallback --- youtube_dl/extractor/appletrailers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index babbd0265..a6801f3d4 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -127,7 +127,7 @@ class AppleTrailersIE(InfoExtractor): 'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(), 'formats': formats, 'title': clip_title, - 'thumbnail': clip.get('screen') or clip.get('runtime'), + 'thumbnail': clip.get('screen') or clip.get('thumb'), 'duration': parse_duration(clip.get('runtime') or clip.get('faded')), 'upload_date': unified_strdate(clip.get('posted')), 'uploader_id': uploader_id, From c1ff6e1ad08c781ce1d486ddb7389fe90c79af35 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 24 Jun 2016 16:48:37 +0800 Subject: [PATCH 0881/3599] [vimeo:review] Fix extraction for password-protected videos Closes #9853 --- youtube_dl/extractor/vimeo.py | 72 ++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 4bdeb1187..d9c9852d4 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -16,6 +16,7 @@ from ..utils import ( ExtractorError, InAdvancePagedList, int_or_none, + NO_DEFAULT, RegexNotFoundError, sanitized_Request, smuggle_url, @@ -56,6 +57,26 @@ class VimeoBaseInfoExtractor(InfoExtractor): self._set_vimeo_cookie('vuid', vuid) self._download_webpage(login_request, None, False, 'Wrong login info') + def _verify_video_password(self, url, video_id, webpage): + password = self._downloader.params.get('videopassword') + if password is None: + raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) + token, vuid = self._extract_xsrft_and_vuid(webpage) + data = urlencode_postdata({ + 'password': password, + 'token': token, + }) + if url.startswith('http://'): + # vimeo only supports https now, but the user can give an http url + url = url.replace('http://', 'https://') + password_request = sanitized_Request(url + '/password', data) + password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') + password_request.add_header('Referer', url) + self._set_vimeo_cookie('vuid', vuid) + return self._download_webpage( + password_request, video_id, + 'Verifying the password', 'Wrong password') + def _extract_xsrft_and_vuid(self, webpage): xsrft = self._search_regex( r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)', @@ -344,26 +365,6 @@ class VimeoIE(VimeoBaseInfoExtractor): if mobj: return mobj.group(1) - def _verify_video_password(self, url, video_id, webpage): - password = self._downloader.params.get('videopassword') - if password is None: - raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) - token, vuid = self._extract_xsrft_and_vuid(webpage) - data = urlencode_postdata({ - 'password': password, - 'token': token, - }) - if url.startswith('http://'): - # vimeo only supports https now, but the user can give an http url - url = url.replace('http://', 'https://') - password_request = sanitized_Request(url + '/password', data) - password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - password_request.add_header('Referer', url) - self._set_vimeo_cookie('vuid', vuid) - return self._download_webpage( - password_request, video_id, - 'Verifying the password', 'Wrong password') - def _verify_player_video_password(self, url, video_id): password = self._downloader.params.get('videopassword') if password is None: @@ -791,12 +792,39 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', 'uploader_id': 'user22258446', } + }, { + 'note': 'Password protected', + 'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde', + 'info_dict': { + 'id': '138823582', + 'ext': 'mp4', + 'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1', + 'uploader': 'TMB', + 'uploader_id': 'user37284429', + }, + 'params': { + 'videopassword': 'holygrail', + }, }] + def _real_initialize(self): + self._login() + + def _get_config_url(self, webpage_url, video_id, video_password_verified=False): + webpage = self._download_webpage(webpage_url, video_id) + config_url = self._html_search_regex( + r'data-config-url="([^"]+)"', webpage, 'config URL', + default=NO_DEFAULT if video_password_verified else None) + if config_url is None: + self._verify_video_password(webpage_url, video_id, webpage) + config_url = self._get_config_url( + webpage_url, video_id, video_password_verified=True) + return config_url + def _real_extract(self, url): video_id = self._match_id(url) - config = self._download_json( - 'https://player.vimeo.com/video/%s/config' % video_id, video_id) + config_url = self._get_config_url(url, video_id) + config = self._download_json(config_url, video_id) info_dict = self._parse_config(config, video_id) self._vimeo_sort_formats(info_dict['formats']) info_dict['id'] = video_id From 896cc727508f1d1054d88405c64e731c4d5c4ce4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 24 Jun 2016 17:26:12 +0800 Subject: [PATCH 0882/3599] [mixcloud] View count and like count may be absent Closes #9874 --- youtube_dl/extractor/mixcloud.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 483f6925f..560fe188b 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -102,11 +102,11 @@ class MixcloudIE(InfoExtractor): description = self._og_search_description(webpage) like_count = parse_count(self._search_regex( r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)', - webpage, 'like count', fatal=False)) + webpage, 'like count', default=None)) view_count = str_to_int(self._search_regex( [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', r'/listeners/?">([0-9,.]+)</a>'], - webpage, 'play count', fatal=False)) + webpage, 'play count', default=None)) return { 'id': track_id, From de3c7fe0d42fd6027b99ab87ee5b4a4b4054daf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Jun 2016 22:27:55 +0700 Subject: [PATCH 0883/3599] [youtube] Fix 141 format tests --- youtube_dl/extractor/youtube.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 00dd602ff..54c6e45f8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -501,6 +501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'youtube_include_dash_manifest': True, 'format': '141', }, + 'skip': 'format 141 not served anymore', }, # DASH manifest with encrypted signature { @@ -517,7 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'params': { 'youtube_include_dash_manifest': True, - 'format': '141', + 'format': '141/bestaudio[ext=m4a]', }, }, # JS player signature function name containing $ @@ -537,7 +538,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'params': { 'youtube_include_dash_manifest': True, - 'format': '141', + 'format': '141/bestaudio[ext=m4a]', }, }, # Controversy video From 525cedb971c091793da5a0ef90670e6b5faded62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Jun 2016 22:37:13 +0700 Subject: [PATCH 0884/3599] [youtube] Relax URL expansion in description --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 54c6e45f8..46b9dc66c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1332,7 +1332,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:[a-zA-Z-]+="[^"]*"\s+)*? (?:title|href)="([^"]+)"\s+ (?:[a-zA-Z-]+="[^"]*"\s+)*? - class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*> + class="[^"]*"[^>]*> [^<]+\.{3}\s* </a> ''', r'\1', video_description) From be49068d65ae39bef5797071f8a7cf1c733f033b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Jun 2016 22:47:19 +0700 Subject: [PATCH 0885/3599] [youtube] Fix and skip some tests --- youtube_dl/extractor/youtube.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 46b9dc66c..c8d54f22a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -619,7 +619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic', 'license': 'Standard YouTube License', 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', - 'uploader': 'Olympics', + 'uploader': 'Olympic', 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games', }, 'params': { @@ -672,7 +672,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000', 'uploader': 'dorappi2000', 'license': 'Standard YouTube License', - 'formats': 'mincount:33', + 'formats': 'mincount:32', }, }, # DASH manifest with segment_list @@ -692,7 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'params': { 'youtube_include_dash_manifest': True, 'format': '135', # bestvideo - } + }, + 'skip': 'This live event has ended.', }, { # Multifeed videos (multiple cameras), URL is for Main Camera @@ -763,6 +764,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30', }, 'playlist_count': 2, + 'skip': 'Not multifeed anymore', }, { 'url': 'http://vid.plus/FlRa-iH7PGw', @@ -815,6 +817,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'This video does not exist.', }, { # Video licensed under Creative Commons From 3d4b08dfc7a1cf49686b68d405053475a4c3c490 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Jun 2016 02:50:12 +0700 Subject: [PATCH 0886/3599] [setup.py] Add file version information and quotes consistency (Closes #9878) --- setup.py | 63 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/setup.py b/setup.py index c1e923f71..508b27f37 100644 --- a/setup.py +++ b/setup.py @@ -21,25 +21,37 @@ try: import py2exe except ImportError: if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': - print("Cannot import py2exe", file=sys.stderr) + print('Cannot import py2exe', file=sys.stderr) exit(1) py2exe_options = { - "bundle_files": 1, - "compressed": 1, - "optimize": 2, - "dist_dir": '.', - "dll_excludes": ['w9xpopen.exe', 'crypt32.dll'], + 'bundle_files': 1, + 'compressed': 1, + 'optimize': 2, + 'dist_dir': '.', + 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], } +# Get the version from youtube_dl/version.py without importing the package +exec(compile(open('youtube_dl/version.py').read(), + 'youtube_dl/version.py', 'exec')) + +DESCRIPTION = 'YouTube video downloader' +LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites' + py2exe_console = [{ - "script": "./youtube_dl/__main__.py", - "dest_base": "youtube-dl", + 'script': './youtube_dl/__main__.py', + 'dest_base': 'youtube-dl', + 'version': __version__, + 'description': DESCRIPTION, + 'comments': LONG_DESCRIPTION, + 'product_name': 'youtube-dl', + 'product_version': __version__, }] py2exe_params = { 'console': py2exe_console, - 'options': {"py2exe": py2exe_options}, + 'options': {'py2exe': py2exe_options}, 'zipfile': None } @@ -72,7 +84,7 @@ else: params['scripts'] = ['bin/youtube-dl'] class build_lazy_extractors(Command): - description = "Build the extractor lazy loading module" + description = 'Build the extractor lazy loading module' user_options = [] def initialize_options(self): @@ -87,16 +99,11 @@ class build_lazy_extractors(Command): dry_run=self.dry_run, ) -# Get the version from youtube_dl/version.py without importing the package -exec(compile(open('youtube_dl/version.py').read(), - 'youtube_dl/version.py', 'exec')) - setup( name='youtube_dl', version=__version__, - description='YouTube video downloader', - long_description='Small command-line program to download videos from' - ' YouTube.com and other video sites.', + description=DESCRIPTION, + long_description=LONG_DESCRIPTION, url='https://github.com/rg3/youtube-dl', author='Ricardo Garcia', author_email='ytdl@yt-dl.org', @@ -112,17 +119,17 @@ setup( # test_requires = ['nosetest'], classifiers=[ - "Topic :: Multimedia :: Video", - "Development Status :: 5 - Production/Stable", - "Environment :: Console", - "License :: Public Domain", - "Programming Language :: Python :: 2.6", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.2", - "Programming Language :: Python :: 3.3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", + 'Topic :: Multimedia :: Video', + 'Development Status :: 5 - Production/Stable', + 'Environment :: Console', + 'License :: Public Domain', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.2', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', ], cmdclass={'build_lazy_extractors': build_lazy_extractors}, From b4241e308e9b2d38d564833cb6c43c9fcc0fd280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Jun 2016 03:03:20 +0700 Subject: [PATCH 0887/3599] release 2016.06.25 --- .github/ISSUE_TEMPLATE.md | 6 +++--- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 62cb18d7d..c73f9a904 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.23.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.23.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.25** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.23.1 +[debug] youtube-dl version 2016.06.25 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0238dc97c..2b7a4c98d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.23.1' +__version__ = '2016.06.25' From 46f59e89ea1e75bf2bd1657f0863a3e5e81f91ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Jun 2016 22:30:35 +0700 Subject: [PATCH 0888/3599] [utils] Add unified_timestamp --- test/test_utils.py | 21 +++++++ youtube_dl/utils.py | 150 +++++++++++++++++++++++++++----------------- 2 files changed, 113 insertions(+), 58 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index b7ef51f8d..7f9385deb 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -60,6 +60,7 @@ from youtube_dl.utils import ( timeconvert, unescapeHTML, unified_strdate, + unified_timestamp, unsmuggle_url, uppercase_escape, lowercase_escape, @@ -283,8 +284,28 @@ class TestUtil(unittest.TestCase): '20150202') self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214') self.assertEqual(unified_strdate('25-09-2014'), '20140925') + self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) + def test_unified_timestamps(self): + self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) + self.assertEqual(unified_timestamp('8/7/2009'), 1247011200) + self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200) + self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598) + self.assertEqual(unified_timestamp('1968 12 10'), -33436800) + self.assertEqual(unified_timestamp('1968-12-10'), -33436800) + self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200) + self.assertEqual( + unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False), + 1417001400) + self.assertEqual( + unified_timestamp('2/2/2015 6:47:40 PM', day_first=False), + 1422902860) + self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900) + self.assertEqual(unified_timestamp('25-09-2014'), 1411603200) + self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200) + self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) + def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 562031fe1..de66cb482 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -110,6 +110,49 @@ ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐ،٠itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) +DATE_FORMATS = ( + '%d %B %Y', + '%d %b %Y', + '%B %d %Y', + '%b %d %Y', + '%b %dst %Y %I:%M', + '%b %dnd %Y %I:%M', + '%b %dth %Y %I:%M', + '%Y %m %d', + '%Y-%m-%d', + '%Y/%m/%d', + '%Y/%m/%d %H:%M:%S', + '%Y-%m-%d %H:%M:%S', + '%Y-%m-%d %H:%M:%S.%f', + '%d.%m.%Y %H:%M', + '%d.%m.%Y %H.%M', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%dT%H:%M:%S.%fZ', + '%Y-%m-%dT%H:%M:%S.%f0Z', + '%Y-%m-%dT%H:%M:%S', + '%Y-%m-%dT%H:%M:%S.%f', + '%Y-%m-%dT%H:%M', +) + +DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) +DATE_FORMATS_DAY_FIRST.extend([ + '%d-%m-%Y', + '%d.%m.%Y', + '%d.%m.%y', + '%d/%m/%Y', + '%d/%m/%y', + '%d/%m/%Y %H:%M:%S', +]) + +DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS) +DATE_FORMATS_MONTH_FIRST.extend([ + '%m-%d-%Y', + '%m.%d.%Y', + '%m/%d/%Y', + '%m/%d/%y', + '%m/%d/%Y %H:%M:%S', +]) + def preferredencoding(): """Get preferred encoding. @@ -975,6 +1018,24 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): https_response = http_response +def extract_timezone(date_str): + m = re.search( + r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', + date_str) + if not m: + timezone = datetime.timedelta() + else: + date_str = date_str[:-len(m.group('tz'))] + if not m.group('sign'): + timezone = datetime.timedelta() + else: + sign = 1 if m.group('sign') == '+' else -1 + timezone = datetime.timedelta( + hours=sign * int(m.group('hours')), + minutes=sign * int(m.group('minutes'))) + return timezone, date_str + + def parse_iso8601(date_str, delimiter='T', timezone=None): """ Return a UNIX timestamp from the given date """ @@ -984,20 +1045,8 @@ def parse_iso8601(date_str, delimiter='T', timezone=None): date_str = re.sub(r'\.[0-9]+', '', date_str) if timezone is None: - m = re.search( - r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', - date_str) - if not m: - timezone = datetime.timedelta() - else: - date_str = date_str[:-len(m.group(0))] - if not m.group('sign'): - timezone = datetime.timedelta() - else: - sign = 1 if m.group('sign') == '+' else -1 - timezone = datetime.timedelta( - hours=sign * int(m.group('hours')), - minutes=sign * int(m.group('minutes'))) + timezone, date_str = extract_timezone(date_str) + try: date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) dt = datetime.datetime.strptime(date_str, date_format) - timezone @@ -1006,6 +1055,10 @@ def parse_iso8601(date_str, delimiter='T', timezone=None): pass +def date_formats(day_first=True): + return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST + + def unified_strdate(date_str, day_first=True): """Return a string with the date in the format YYYYMMDD""" @@ -1014,53 +1067,11 @@ def unified_strdate(date_str, day_first=True): upload_date = None # Replace commas date_str = date_str.replace(',', ' ') - # %z (UTC offset) is only supported in python>=3.2 - if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str): - date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) # Remove AM/PM + timezone date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) + _, date_str = extract_timezone(date_str) - format_expressions = [ - '%d %B %Y', - '%d %b %Y', - '%B %d %Y', - '%b %d %Y', - '%b %dst %Y %I:%M', - '%b %dnd %Y %I:%M', - '%b %dth %Y %I:%M', - '%Y %m %d', - '%Y-%m-%d', - '%Y/%m/%d', - '%Y/%m/%d %H:%M:%S', - '%Y-%m-%d %H:%M:%S', - '%Y-%m-%d %H:%M:%S.%f', - '%d.%m.%Y %H:%M', - '%d.%m.%Y %H.%M', - '%Y-%m-%dT%H:%M:%SZ', - '%Y-%m-%dT%H:%M:%S.%fZ', - '%Y-%m-%dT%H:%M:%S.%f0Z', - '%Y-%m-%dT%H:%M:%S', - '%Y-%m-%dT%H:%M:%S.%f', - '%Y-%m-%dT%H:%M', - ] - if day_first: - format_expressions.extend([ - '%d-%m-%Y', - '%d.%m.%Y', - '%d.%m.%y', - '%d/%m/%Y', - '%d/%m/%y', - '%d/%m/%Y %H:%M:%S', - ]) - else: - format_expressions.extend([ - '%m-%d-%Y', - '%m.%d.%Y', - '%m/%d/%Y', - '%m/%d/%y', - '%m/%d/%Y %H:%M:%S', - ]) - for expression in format_expressions: + for expression in date_formats(day_first): try: upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') except ValueError: @@ -1076,6 +1087,29 @@ def unified_strdate(date_str, day_first=True): return compat_str(upload_date) +def unified_timestamp(date_str, day_first=True): + if date_str is None: + return None + + date_str = date_str.replace(',', ' ') + + pm_delta = datetime.timedelta(hours=12 if re.search(r'(?i)PM', date_str) else 0) + timezone, date_str = extract_timezone(date_str) + + # Remove AM/PM + timezone + date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) + + for expression in date_formats(day_first): + try: + dt = datetime.datetime.strptime(date_str, expression) - timezone + pm_delta + return calendar.timegm(dt.timetuple()) + except ValueError: + pass + timetuple = email.utils.parsedate_tz(date_str) + if timetuple: + return calendar.timegm(timetuple.timetuple()) + + def determine_ext(url, default_ext='unknown_video'): if url is None: return default_ext From b72b44318c0de52befe003c14297cae9f7a283c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Jun 2016 22:32:02 +0700 Subject: [PATCH 0889/3599] [utils] Add strip_or_none --- youtube_dl/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index de66cb482..a375282f2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1660,6 +1660,10 @@ def float_or_none(v, scale=1, invscale=1, default=None): return default +def strip_or_none(v): + return None if v is None else v.strip() + + def parse_duration(s): if not isinstance(s, compat_basestring): return None From 2d185706ea0236a30bd1037a3ab97fbe5fe575a5 Mon Sep 17 00:00:00 2001 From: Jakub Adam Wieczorek <ja.wieczorek@student.uw.edu.pl> Date: Thu, 16 Jun 2016 21:00:27 +0200 Subject: [PATCH 0890/3599] [polskieradio] Add support for Polskie Radio. Polskie Radio is the main Polish state-funded radio broadcasting service. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/polskieradio.py | 74 ++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 youtube_dl/extractor/polskieradio.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6fc5a18f5..9f98a1490 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -606,6 +606,7 @@ from .pluralsight import ( PluralsightCourseIE, ) from .podomatic import PodomaticIE +from .polskieradio import PolskieRadioIE from .porn91 import Porn91IE from .pornhd import PornHdIE from .pornhub import ( diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py new file mode 100644 index 000000000..5d4b116d8 --- /dev/null +++ b/youtube_dl/extractor/polskieradio.py @@ -0,0 +1,74 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + +import calendar +from datetime import datetime + + +class PolskieRadioIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/[0-9]+/[0-9]+/Artykul/(?P<id>[0-9]+),.+' + _TESTS = [{ + 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie', + 'md5': '2984ee6ce9046d91fc233bc1a864a09a', + 'info_dict': { + 'id': '1587943', + 'ext': 'mp3', + 'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie', + 'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5', + 'release_date': '20160227', + 'upload_date': '20160227', + 'timestamp': 1456594200, + 'duration': 2364 + } + }, { + 'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis', + 'md5': '68a393e25b942c1a76872f56d303a31a', + 'info_dict': { + 'id': '1632955', + 'ext': 'mp3', + 'title': 'Bardzo popularne słowo: remis', + 'description': 'md5:3b58dfae614100abc0f175a0b26d5680', + 'release_date': '20160617', + 'upload_date': '20160617', + 'timestamp': 1466184900, + 'duration': 393 + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + metadata_string = self._html_search_regex(r'<span class="play pr-media-play" data-media=(\{.+\})>', webpage, 'metadata') + metadata = self._parse_json(metadata_string, video_id) + + title = self._og_search_title(webpage) + if title is not None: + title = title.strip() + + description = self._og_search_description(webpage) + if description is not None: + description = description.strip() + + release_date = self._html_search_regex(r'Data emisji:[^0-9]+([0-9]{1,2}\.[0-9]{2}\.[0-9]{4})', webpage, 'release date', fatal=False) + if release_date is not None: + release_date = datetime.strptime(release_date, '%d.%m.%Y').strftime('%Y%m%d') + + upload_datetime = self._html_search_regex(r'<span id="datetime2" class="time">\s+(.+)\s+</span>', webpage, 'release time', fatal=False) + if upload_datetime is not None: + timestamp = calendar.timegm(datetime.strptime(upload_datetime, '%d.%m.%Y %H:%M').timetuple()) + else: + timestamp = None + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'display_id': metadata.get('id'), + 'duration': int_or_none(metadata.get('length')), + 'url': self._proto_relative_url(metadata.get('file'), 'http:'), + 'release_date': release_date, + 'timestamp': timestamp + } From 0463b77a1f83f3f9239c6c5f5d1ca251afd267e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Jun 2016 23:18:40 +0700 Subject: [PATCH 0891/3599] [polskieradio] Improve extraction (Closes #9813) --- youtube_dl/extractor/polskieradio.py | 119 +++++++++++++++------------ 1 file changed, 68 insertions(+), 51 deletions(-) diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py index 5d4b116d8..f5adff08f 100644 --- a/youtube_dl/extractor/polskieradio.py +++ b/youtube_dl/extractor/polskieradio.py @@ -1,74 +1,91 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import int_or_none +import re -import calendar -from datetime import datetime +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urllib_parse_unquote, +) +from ..utils import ( + int_or_none, + strip_or_none, + unified_timestamp, +) class PolskieRadioIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/[0-9]+/[0-9]+/Artykul/(?P<id>[0-9]+),.+' + _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie', - 'md5': '2984ee6ce9046d91fc233bc1a864a09a', 'info_dict': { 'id': '1587943', - 'ext': 'mp3', 'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie', 'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5', - 'release_date': '20160227', - 'upload_date': '20160227', - 'timestamp': 1456594200, - 'duration': 2364 - } + }, + 'playlist': [{ + 'md5': '2984ee6ce9046d91fc233bc1a864a09a', + 'info_dict': { + 'id': '1540576', + 'ext': 'mp3', + 'title': 'md5:d4623290d4ac983bf924061c75c23a0d', + 'timestamp': 1456594200, + 'upload_date': '20160227', + 'duration': 2364, + }, + }], + }, { + 'url': 'http://www.polskieradio.pl/265/5217/Artykul/1635803,Euro-2016-nie-ma-miejsca-na-blad-Polacy-graja-ze-Szwajcaria-o-cwiercfinal', + 'info_dict': { + 'id': '1635803', + 'title': 'Euro 2016: nie ma miejsca na błąd. Polacy grają ze Szwajcarią o ćwierćfinał', + 'description': 'md5:01cb7d0cad58664095d72b51a1ebada2', + }, + 'playlist_mincount': 12, }, { 'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis', - 'md5': '68a393e25b942c1a76872f56d303a31a', - 'info_dict': { - 'id': '1632955', - 'ext': 'mp3', - 'title': 'Bardzo popularne słowo: remis', - 'description': 'md5:3b58dfae614100abc0f175a0b26d5680', - 'release_date': '20160617', - 'upload_date': '20160617', - 'timestamp': 1466184900, - 'duration': 393 - } + 'only_matching': True, + }, { + 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - metadata_string = self._html_search_regex(r'<span class="play pr-media-play" data-media=(\{.+\})>', webpage, 'metadata') - metadata = self._parse_json(metadata_string, video_id) + playlist_id = self._match_id(url) - title = self._og_search_title(webpage) - if title is not None: - title = title.strip() + webpage = self._download_webpage(url, playlist_id) - description = self._og_search_description(webpage) - if description is not None: - description = description.strip() + content = self._search_regex( + r'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>', + webpage, 'content') - release_date = self._html_search_regex(r'Data emisji:[^0-9]+([0-9]{1,2}\.[0-9]{2}\.[0-9]{4})', webpage, 'release date', fatal=False) - if release_date is not None: - release_date = datetime.strptime(release_date, '%d.%m.%Y').strftime('%Y%m%d') + timestamp = unified_timestamp(self._html_search_regex( + r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>', + webpage, 'timestamp', fatal=False)) - upload_datetime = self._html_search_regex(r'<span id="datetime2" class="time">\s+(.+)\s+</span>', webpage, 'release time', fatal=False) - if upload_datetime is not None: - timestamp = calendar.timegm(datetime.strptime(upload_datetime, '%d.%m.%Y %H:%M').timetuple()) - else: - timestamp = None + entries = [] - return { - 'id': video_id, - 'title': title, - 'description': description, - 'display_id': metadata.get('id'), - 'duration': int_or_none(metadata.get('length')), - 'url': self._proto_relative_url(metadata.get('file'), 'http:'), - 'release_date': release_date, - 'timestamp': timestamp - } + media_urls = set() + + for data_media in re.findall(r'<[^>]+data-media=({[^>]+})', content): + media = self._parse_json(data_media, playlist_id, fatal=False) + if not media.get('file') or not media.get('desc'): + continue + media_url = self._proto_relative_url(media['file'], 'http:') + if media_url in media_urls: + continue + media_urls.add(media_url) + entries.append({ + 'id': compat_str(media['id']), + 'url': media_url, + 'title': compat_urllib_parse_unquote(media['desc']), + 'duration': int_or_none(media.get('length')), + 'vcodec': 'none' if media.get('provider') == 'audio' else None, + 'timestamp': timestamp, + }) + + title = self._og_search_title(webpage).strip() + description = strip_or_none(self._og_search_description(webpage)) + + return self.playlist_result(entries, playlist_id, title, description) From ce96ed05f42d42f8a506a2a527c776054c44ad1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Jun 2016 23:31:21 +0700 Subject: [PATCH 0892/3599] [polskieradio] Add test with video --- youtube_dl/extractor/polskieradio.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py index f5adff08f..d3bebaea3 100644 --- a/youtube_dl/extractor/polskieradio.py +++ b/youtube_dl/extractor/polskieradio.py @@ -49,6 +49,10 @@ class PolskieRadioIE(InfoExtractor): }, { 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943', 'only_matching': True, + }, { + # with mp4 video + 'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej', + 'only_matching': True, }] def _real_extract(self, url): From 0c00e889f3616cecd4de161681924c4cb12ce320 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Jun 2016 23:35:57 +0700 Subject: [PATCH 0893/3599] Credit @JakubAdamWieczorek for #9813 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index cdf655c39..bdd29687d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -175,3 +175,4 @@ Tomáš Čech Déstin Reed Roman Tsiupa Artur Krysiak +Jakub Adam Wieczorek From ac782306f18430479e881ffd1ac749baff4dd9aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Jun 2016 00:25:41 +0700 Subject: [PATCH 0894/3599] [iqiyi] Mark broken --- youtube_dl/extractor/iqiyi.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index ddcb3c916..5dd15e26f 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -165,6 +165,8 @@ class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' IE_DESC = '爱奇艺' + _WORKING = False + _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html' _NETRC_MACHINE = 'iqiyi' From 3b34ab538c5bc67be18df8376c25f433ea1ff92b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Jun 2016 00:29:53 +0700 Subject: [PATCH 0895/3599] [svtplay] Extend _VALID_URL (#9900) --- youtube_dl/extractor/svt.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 67f56fab8..1c04dfb7b 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -120,7 +120,7 @@ class SVTIE(SVTBaseIE): class SVTPlayIE(SVTBaseIE): IE_DESC = 'SVT Play and Öppet arkiv' - _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', 'md5': '2b6704fe4a28801e1a098bbf3c5ac611', @@ -141,6 +141,9 @@ class SVTPlayIE(SVTBaseIE): # geo restricted to Sweden 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', 'only_matching': True, + }, { + 'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg', + 'only_matching': True, }] def _real_extract(self, url): From a2406fce3c657af116f95c5e9b965315aa23cd95 Mon Sep 17 00:00:00 2001 From: stepshal <nessento@openmailbox.org> Date: Sun, 26 Jun 2016 01:23:48 +0700 Subject: [PATCH 0896/3599] Fix misspelling --- youtube_dl/socks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index fd49d7435..104807242 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -76,7 +76,7 @@ class Socks4Error(ProxyError): CODES = { 91: 'request rejected or failed', - 92: 'request rejected becasue SOCKS server cannot connect to identd on the client', + 92: 'request rejected because SOCKS server cannot connect to identd on the client', 93: 'request rejected because the client program and identd report different user-ids' } From 7d52c052efe7accf098bca84aef0ea70caa64889 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 26 Jun 2016 11:54:52 +0800 Subject: [PATCH 0897/3599] [generic] Fix test_Generic_76 Broken: https://travis-ci.org/rg3/youtube-dl/jobs/140251658 --- youtube_dl/extractor/generic.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4aa24061c..1592a8a3a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1091,12 +1091,17 @@ class GenericIE(InfoExtractor): # Dailymotion Cloud video { 'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910', - 'md5': '49444254273501a64675a7e68c502681', + 'md5': 'dcaf23ad0c67a256f4278bce6e0bae38', 'info_dict': { - 'id': '5585de919473990de4bee11b', + 'id': 'x2uy8t3', 'ext': 'mp4', - 'title': 'Le débat', + 'title': 'Sauvons les abeilles ! - Le débat', + 'description': 'md5:d9082128b1c5277987825d684939ca26', 'thumbnail': 're:^https?://.*\.jpe?g$', + 'timestamp': 1434970506, + 'upload_date': '20150622', + 'uploader': 'Public Sénat', + 'uploader_id': 'xa9gza', } }, # OnionStudios embed From 1143535d762fc4260aacc108f2c41079867f9f00 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 26 Jun 2016 15:16:49 +0800 Subject: [PATCH 0898/3599] [utils] Add urshift() Used in IqiyiIE and LeIE --- test/test_utils.py | 5 +++++ youtube_dl/utils.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 7f9385deb..ed61e4c27 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -66,6 +66,7 @@ from youtube_dl.utils import ( lowercase_escape, url_basename, urlencode_postdata, + urshift, update_url_query, version_tuple, xpath_with_ns, @@ -980,5 +981,9 @@ The first line self.assertRaises(ValueError, encode_base_n, 0, 70) self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table) + def test_urshift(self): + self.assertEqual(urshift(3, 1), 1) + self.assertEqual(urshift(-3, 1), 2147483646) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a375282f2..a2cfb48a6 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2899,3 +2899,7 @@ def parse_m3u8_attributes(attrib): val = val[1:-1] info[key] = val return info + + +def urshift(val, n): + return val >> n if val >= 0 else (val + 0x100000000) >> n From 30105f4ac0291bd3e1350a5bb383e88e260b9ad9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 26 Jun 2016 15:17:26 +0800 Subject: [PATCH 0899/3599] [le] Move urshift() to utils.py --- youtube_dl/extractor/leeco.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index 63f581cd9..959d71617 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -23,6 +23,7 @@ from ..utils import ( sanitized_Request, str_or_none, url_basename, + urshift, ) @@ -74,15 +75,11 @@ class LeIE(InfoExtractor): 'only_matching': True, }] - @staticmethod - def urshift(val, n): - return val >> n if val >= 0 else (val + 0x100000000) >> n - # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf def ror(self, param1, param2): _loc3_ = 0 while _loc3_ < param2: - param1 = self.urshift(param1, 1) + ((param1 & 1) << 31) + param1 = urshift(param1, 1) + ((param1 & 1) << 31) _loc3_ += 1 return param1 From 5b6ad8630c4947f3695513c9707406b2d12ae7b8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 26 Jun 2016 15:18:32 +0800 Subject: [PATCH 0900/3599] [iqiyi] Partially fix IqiyiIE Use the HTML5 API. Only low-resolution formats available Related: #9839 Thanks @zhangn1985 for the overall algorithm (soimort/you-get#1224) --- youtube_dl/extractor/iqiyi.py | 404 ++++++++++------------------------ 1 file changed, 118 insertions(+), 286 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 5dd15e26f..b717ca09c 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -1,30 +1,25 @@ # coding: utf-8 from __future__ import unicode_literals +import binascii import hashlib import itertools import math -import os -import random import re import time -import uuid from .common import InfoExtractor from ..compat import ( - compat_parse_qs, compat_str, compat_urllib_parse_urlencode, - compat_urllib_parse_urlparse, ) from ..utils import ( decode_packed_codes, ExtractorError, + intlist_to_bytes, ohdave_rsa_encrypt, remove_start, - sanitized_Request, - urlencode_postdata, - url_basename, + urshift, ) @@ -165,77 +160,28 @@ class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' IE_DESC = '爱奇艺' - _WORKING = False - _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html' _NETRC_MACHINE = 'iqiyi' _TESTS = [{ 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', - 'md5': '2cb594dc2781e6c941a110d8f358118b', + 'md5': '470a6c160618577166db1a7aac5a3606', 'info_dict': { 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', + 'ext': 'mp4', 'title': '美国德州空中惊现奇异云团 酷似UFO', - 'ext': 'f4v', } }, { 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', + 'md5': 'f09f0a6a59b2da66a26bf4eda669a4cc', 'info_dict': { 'id': 'e3f585b550a280af23c98b6cb2be19fb', - 'title': '名侦探柯南第752集', + 'ext': 'mp4', + 'title': '名侦探柯南 国语版', }, - 'playlist': [{ - 'info_dict': { - 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1', - 'ext': 'f4v', - 'title': '名侦探柯南第752集', - }, - }, { - 'info_dict': { - 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2', - 'ext': 'f4v', - 'title': '名侦探柯南第752集', - }, - }, { - 'info_dict': { - 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3', - 'ext': 'f4v', - 'title': '名侦探柯南第752集', - }, - }, { - 'info_dict': { - 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4', - 'ext': 'f4v', - 'title': '名侦探柯南第752集', - }, - }, { - 'info_dict': { - 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5', - 'ext': 'f4v', - 'title': '名侦探柯南第752集', - }, - }, { - 'info_dict': { - 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6', - 'ext': 'f4v', - 'title': '名侦探柯南第752集', - }, - }, { - 'info_dict': { - 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7', - 'ext': 'f4v', - 'title': '名侦探柯南第752集', - }, - }, { - 'info_dict': { - 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8', - 'ext': 'f4v', - 'title': '名侦探柯南第752集', - }, - }], 'params': { - 'skip_download': True, + 'cn_verification_proxy': 'http://proxy.uku.im:443/', }, }, { 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', @@ -289,13 +235,6 @@ class IqiyiIE(InfoExtractor): ('10', 'h1'), ] - AUTH_API_ERRORS = { - # No preview available (不允许试看鉴权失败) - 'Q00505': 'This video requires a VIP account', - # End of preview time (试看结束鉴权失败) - 'Q00506': 'Needs a VIP account for full video', - } - def _real_initialize(self): self._login() @@ -354,177 +293,101 @@ class IqiyiIE(InfoExtractor): return True - def _authenticate_vip_video(self, api_video_url, video_id, tvid, _uuid, do_report_warning): - auth_params = { - # version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as - 'version': '2.0', - 'platform': 'b6c13e26323c537d', - 'aid': tvid, + @staticmethod + def _gen_sc(tvid, timestamp): + M = [1732584193, -271733879] + M.extend([~M[0], ~M[1]]) + I_table = [7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21] + C_base = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8388608, 432] + + def L(n, t): + if t is None: + t = 0 + return trunc(((n >> 1) + (t >> 1) << 1) + (n & 1) + (t & 1)) + + def trunc(n): + n = n % 0x100000000 + if n > 0x7fffffff: + n -= 0x100000000 + return n + + def transform(string, mod): + num = int(string, 16) + return (num >> 8 * (i % 4) & 255 ^ i % mod) << ((a & 3) << 3) + + C = list(C_base) + o = list(M) + k = str(timestamp - 7) + for i in range(13): + a = i + C[a >> 2] |= ord(k[a]) << 8 * (a % 4) + + for i in range(16): + a = i + 13 + start = (i >> 2) * 8 + r = '03967743b643f66763d623d637e30733' + C[a >> 2] |= transform(''.join(reversed(r[start:start + 8])), 7) + + for i in range(16): + a = i + 29 + start = (i >> 2) * 8 + r = '7038766939776a32776a32706b337139' + C[a >> 2] |= transform(r[start:start + 8], 1) + + for i in range(9): + a = i + 45 + if i < len(tvid): + C[a >> 2] |= ord(tvid[i]) << 8 * (a % 4) + + for a in range(64): + i = a + I = i >> 4 + C_index = [i, 5 * i + 1, 3 * i + 5, 7 * i][I] % 16 + urshift(a, 6) + m = L(L(o[0], [ + trunc(o[1] & o[2]) | trunc(~o[1] & o[3]), + trunc(o[3] & o[1]) | trunc(~o[3] & o[2]), + o[1] ^ o[2] ^ o[3], + o[2] ^ trunc(o[1] | ~o[3]) + ][I]), L( + trunc(int(abs(math.sin(i + 1)) * 4294967296)), + C[C_index] if C_index < len(C) else None)) + I = I_table[4 * I + i % 4] + o = [o[3], + L(o[1], trunc(trunc(m << I) | urshift(m, 32 - I))), + o[1], + o[2]] + + new_M = [L(o[0], M[0]), L(o[1], M[1]), L(o[2], M[2]), L(o[3], M[3])] + s = [new_M[a >> 3] >> (1 ^ a & 7) * 4 & 15 for a in range(32)] + return binascii.hexlify(intlist_to_bytes(s))[1::2].decode('ascii') + + def get_raw_data(self, tvid, video_id): + tm = int(time.time() * 1000) + + sc = self._gen_sc(tvid, tm) + params = { + 'platForm': 'h5', + 'rate': 1, 'tvid': tvid, - 'uid': '', - 'deviceId': _uuid, - 'playType': 'main', # XXX: always main? - 'filename': os.path.splitext(url_basename(api_video_url))[0], - } - - qd_items = compat_parse_qs(compat_urllib_parse_urlparse(api_video_url).query) - for key, val in qd_items.items(): - auth_params[key] = val[0] - - auth_req = sanitized_Request( - 'http://api.vip.iqiyi.com/services/ckn.action', - urlencode_postdata(auth_params)) - # iQiyi server throws HTTP 405 error without the following header - auth_req.add_header('Content-Type', 'application/x-www-form-urlencoded') - auth_result = self._download_json( - auth_req, video_id, - note='Downloading video authentication JSON', - errnote='Unable to download video authentication JSON') - - code = auth_result.get('code') - msg = self.AUTH_API_ERRORS.get(code) or auth_result.get('msg') or code - if code == 'Q00506': - if do_report_warning: - self.report_warning(msg) - return False - if 'data' not in auth_result: - if msg is not None: - raise ExtractorError('%s said: %s' % (self.IE_NAME, msg), expected=True) - raise ExtractorError('Unexpected error from Iqiyi auth API') - - return auth_result['data'] - - def construct_video_urls(self, data, video_id, _uuid, tvid): - def do_xor(x, y): - a = y % 3 - if a == 1: - return x ^ 121 - if a == 2: - return x ^ 72 - return x ^ 103 - - def get_encode_code(l): - a = 0 - b = l.split('-') - c = len(b) - s = '' - for i in range(c - 1, -1, -1): - a = do_xor(int(b[c - i - 1], 16), i) - s += chr(a) - return s[::-1] - - def get_path_key(x, format_id, segment_index): - mg = ')(*&^flash@#$%a' - tm = self._download_json( - 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id, - note='Download path key of segment %d for format %s' % (segment_index + 1, format_id) - )['t'] - t = str(int(math.floor(int(tm) / (600.0)))) - return md5_text(t + mg + x) - - video_urls_dict = {} - need_vip_warning_report = True - for format_item in data['vp']['tkl'][0]['vs']: - if 0 < int(format_item['bid']) <= 10: - format_id = self.get_format(format_item['bid']) - else: - continue - - video_urls = [] - - video_urls_info = format_item['fs'] - if not format_item['fs'][0]['l'].startswith('/'): - t = get_encode_code(format_item['fs'][0]['l']) - if t.endswith('mp4'): - video_urls_info = format_item['flvs'] - - for segment_index, segment in enumerate(video_urls_info): - vl = segment['l'] - if not vl.startswith('/'): - vl = get_encode_code(vl) - is_vip_video = '/vip/' in vl - filesize = segment['b'] - base_url = data['vp']['du'].split('/') - if not is_vip_video: - key = get_path_key( - vl.split('/')[-1].split('.')[0], format_id, segment_index) - base_url.insert(-1, key) - base_url = '/'.join(base_url) - param = { - 'su': _uuid, - 'qyid': uuid.uuid4().hex, - 'client': '', - 'z': '', - 'bt': '', - 'ct': '', - 'tn': str(int(time.time())) - } - api_video_url = base_url + vl - if is_vip_video: - api_video_url = api_video_url.replace('.f4v', '.hml') - auth_result = self._authenticate_vip_video( - api_video_url, video_id, tvid, _uuid, need_vip_warning_report) - if auth_result is False: - need_vip_warning_report = False - break - param.update({ - 't': auth_result['t'], - # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as - 'cid': 'afbe8fd3d73448c9', - 'vid': video_id, - 'QY00001': auth_result['u'], - }) - api_video_url += '?' if '?' not in api_video_url else '&' - api_video_url += compat_urllib_parse_urlencode(param) - js = self._download_json( - api_video_url, video_id, - note='Download video info of segment %d for format %s' % (segment_index + 1, format_id)) - video_url = js['l'] - video_urls.append( - (video_url, filesize)) - - video_urls_dict[format_id] = video_urls - return video_urls_dict - - def get_format(self, bid): - matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)] - return matched_format_ids[0] if len(matched_format_ids) else None - - def get_bid(self, format_id): - matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id] - return matched_bids[0] if len(matched_bids) else None - - def get_raw_data(self, tvid, video_id, enc_key, _uuid): - tm = str(int(time.time())) - tail = tm + tvid - param = { - 'key': 'fvip', - 'src': md5_text('youtube-dl'), - 'tvId': tvid, 'vid': video_id, - 'vinfo': 1, - 'tm': tm, - 'enc': md5_text(enc_key + tail), - 'qyid': _uuid, - 'tn': random.random(), - # In iQiyi's flash player, um is set to 1 if there's a logged user - # Some 1080P formats are only available with a logged user. - # Here force um=1 to trick the iQiyi server - 'um': 1, - 'authkey': md5_text(md5_text('') + tail), - 'k_tag': 1, + 'cupid': 'qc_100001_100186', + 'type': 'mp4', + 'nolimit': 0, + 'agenttype': 13, + 'src': 'd846d0c32d664d32b6b54ea48997a589', + 'sc': sc, + 't': tm - 7, + '__jsT': None, } - api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ - compat_urllib_parse_urlencode(param) - raw_data = self._download_json(api_url, video_id) - return raw_data - - def get_enc_key(self, video_id): - # TODO: automatic key extraction - # last update at 2016-01-22 for Zombie::bite - enc_key = '4a1caba4b4465345366f28da7c117d20' - return enc_key + headers = {} + cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') + if cn_verification_proxy: + headers['Ytdl-request-proxy'] = cn_verification_proxy + return self._download_json( + 'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id), + video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='), + query=params, headers=headers) def _extract_playlist(self, webpage): PAGE_SIZE = 50 @@ -573,58 +436,27 @@ class IqiyiIE(InfoExtractor): r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') video_id = self._search_regex( r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') - _uuid = uuid.uuid4().hex - enc_key = self.get_enc_key(video_id) + for _ in range(5): + raw_data = self.get_raw_data(tvid, video_id) - raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) + if raw_data['code'] != 'A00000': + if raw_data['code'] == 'A00111': + self.raise_geo_restricted() + raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) - if raw_data['code'] != 'A000000': - raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) + data = raw_data['data'] - data = raw_data['data'] + # iQiYi sometimes returns Ads + if not isinstance(data['playInfo'], dict): + self._sleep(5, video_id) + continue - title = data['vi']['vn'] + title = data['playInfo']['an'] + break - # generate video_urls_dict - video_urls_dict = self.construct_video_urls( - data, video_id, _uuid, tvid) - - # construct info - entries = [] - for format_id in video_urls_dict: - video_urls = video_urls_dict[format_id] - for i, video_url_info in enumerate(video_urls): - if len(entries) < i + 1: - entries.append({'formats': []}) - entries[i]['formats'].append( - { - 'url': video_url_info[0], - 'filesize': video_url_info[-1], - 'format_id': format_id, - 'preference': int(self.get_bid(format_id)) - } - ) - - for i in range(len(entries)): - self._sort_formats(entries[i]['formats']) - entries[i].update( - { - 'id': '%s_part%d' % (video_id, i + 1), - 'title': title, - } - ) - - if len(entries) > 1: - info = { - '_type': 'multi_video', - 'id': video_id, - 'title': title, - 'entries': entries, - } - else: - info = entries[0] - info['id'] = video_id - info['title'] = title - - return info + return { + 'id': video_id, + 'title': title, + 'url': data['m3u'], + } From fc3996bfe15deae02f4d8f1f4dc34a89fb8bfb03 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 26 Jun 2016 15:45:41 +0800 Subject: [PATCH 0901/3599] [iqiyi] Remove codes for debugging --- youtube_dl/extractor/iqiyi.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index b717ca09c..fea26685e 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -180,9 +180,7 @@ class IqiyiIE(InfoExtractor): 'ext': 'mp4', 'title': '名侦探柯南 国语版', }, - 'params': { - 'cn_verification_proxy': 'http://proxy.uku.im:443/', - }, + 'skip': 'Geo-restricted to China', }, { 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', 'only_matching': True, From 3c9c088f9c51cce86d3df878feba1884c0234df5 Mon Sep 17 00:00:00 2001 From: TRox1972 <TRox1972@users.noreply.github.com> Date: Sun, 12 Jun 2016 03:18:56 +0200 Subject: [PATCH 0902/3599] [Vidbit] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vidbit.py | 36 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/vidbit.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9f98a1490..5ccac7c0c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -916,6 +916,7 @@ from .vice import ( ViceIE, ViceShowIE, ) +from .vidbit import VidbitIE from .viddler import ViddlerIE from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE diff --git a/youtube_dl/extractor/vidbit.py b/youtube_dl/extractor/vidbit.py new file mode 100644 index 000000000..39d508962 --- /dev/null +++ b/youtube_dl/extractor/vidbit.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import url_basename +from ..compat import compat_urlparse + + +class VidbitIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vidbit\.co/watch\?v=(?P<id>[\w-]+)' + _TEST = { + 'url': 'http://www.vidbit.co/watch?v=MrM7LeaMJq', + 'md5': 'f1a579a93282a78de7e1c53220ef0f12', + 'info_dict': { + 'id': 'MrM7LeaMJq', + 'ext': 'mp4', + 'title': 'RoboCop (1987) - Dick You\'re Fired', + 'thumbnail': 'http://www.vidbit.co/thumbnails/MrM7LeaMJq.jpg', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + return { + 'id': video_id, + 'title': self._html_search_regex(r'<h1>(.+)</h1>', webpage, 'title'), + 'url': compat_urlparse.urljoin(url, self._html_search_regex(r'file:\s*(["\'])((?:(?!\1).)+)\1', + webpage, 'video URL', group=2)), + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._html_search_regex(r'description:(["\'])((?:(?!\1).)+)\1', + webpage, 'description', None, group=2), + } From 88d9f6c0c4c3d1d2179ee4fe0af560f500e62579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Jun 2016 16:57:14 +0700 Subject: [PATCH 0903/3599] [utils] Add support for name list in _html_search_meta --- test/test_InfoExtractor.py | 7 ++++++- youtube_dl/extractor/common.py | 6 ++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 6404ac89f..88e8ff904 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor import YoutubeIE, get_info_extractor -from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError +from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError class TestIE(InfoExtractor): @@ -66,6 +66,11 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._html_search_meta('d', html), '4') self.assertEqual(ie._html_search_meta('e', html), '5') self.assertEqual(ie._html_search_meta('f', html), '6') + self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1') + self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3') + self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3') + self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) + self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) def test_download_json(self): uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5a2603b50..4eda4e2ea 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -749,10 +749,12 @@ class InfoExtractor(object): return self._og_search_property('url', html, **kargs) def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): + if not isinstance(name, (list, tuple)): + name = [name] if display_name is None: - display_name = name + display_name = name[0] return self._html_search_regex( - self._meta_regex(name), + [self._meta_regex(n) for n in name], html, display_name, fatal=fatal, group='content', **kwargs) def _dc_search_uploader(self, html): From f484c5fa257420201768158aaec31af25f904f9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Jun 2016 16:59:28 +0700 Subject: [PATCH 0904/3599] [vidbit] Improve (Closes #9759) --- youtube_dl/extractor/vidbit.py | 84 ++++++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/vidbit.py b/youtube_dl/extractor/vidbit.py index 39d508962..e7ac5a842 100644 --- a/youtube_dl/extractor/vidbit.py +++ b/youtube_dl/extractor/vidbit.py @@ -1,36 +1,84 @@ -# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import url_basename from ..compat import compat_urlparse +from ..utils import ( + int_or_none, + js_to_json, + remove_end, + unified_strdate, +) class VidbitIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vidbit\.co/watch\?v=(?P<id>[\w-]+)' - _TEST = { - 'url': 'http://www.vidbit.co/watch?v=MrM7LeaMJq', - 'md5': 'f1a579a93282a78de7e1c53220ef0f12', + _VALID_URL = r'https?://(?:www\.)?vidbit\.co/(?:watch|embed)\?.*?\bv=(?P<id>[\da-zA-Z]+)' + _TESTS = [{ + 'url': 'http://www.vidbit.co/watch?v=jkL2yDOEq2', + 'md5': '1a34b7f14defe3b8fafca9796892924d', 'info_dict': { - 'id': 'MrM7LeaMJq', + 'id': 'jkL2yDOEq2', 'ext': 'mp4', - 'title': 'RoboCop (1987) - Dick You\'re Fired', - 'thumbnail': 'http://www.vidbit.co/thumbnails/MrM7LeaMJq.jpg', + 'title': 'Intro to VidBit', + 'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7', + 'thumbnail': 're:https?://.*\.jpg$', + 'upload_date': '20160618', + 'view_count': int, + 'comment_count': int, } - } + }, { + 'url': 'http://www.vidbit.co/embed?v=jkL2yDOEq2&auto=0&water=0', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + + webpage = self._download_webpage( + compat_urlparse.urljoin(url, '/watch?v=%s' % video_id), video_id) + + video_url, title = [None] * 2 + + config = self._parse_json(self._search_regex( + r'(?s)\.setup\(({.+?})\);', webpage, 'setup', default='{}'), + video_id, transform_source=js_to_json) + if config: + if config.get('file'): + video_url = compat_urlparse.urljoin(url, config['file']) + title = config.get('title') + + if not video_url: + video_url = compat_urlparse.urljoin(url, self._search_regex( + r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'video URL', group='url')) + + if not title: + title = remove_end( + self._html_search_regex( + (r'<h1>(.+?)</h1>', r'<title>(.+?)'), + webpage, 'title', default=None) or self._og_search_title(webpage), + ' - VidBit') + + description = self._html_search_meta( + ('description', 'og:description', 'twitter:description'), + webpage, 'description') + + upload_date = unified_strdate(self._html_search_meta( + 'datePublished', webpage, 'upload date')) + + view_count = int_or_none(self._search_regex( + r'(\d+) views', + webpage, 'view count', fatal=False)) + comment_count = int_or_none(self._search_regex( + r'id=["\']cmt_num["\'][^>]*>\((\d+)\)', + webpage, 'comment count', fatal=False)) return { 'id': video_id, - 'title': self._html_search_regex(r'

(.+)

', webpage, 'title'), - 'url': compat_urlparse.urljoin(url, self._html_search_regex(r'file:\s*(["\'])((?:(?!\1).)+)\1', - webpage, 'video URL', group=2)), + 'url': video_url, + 'title': title, + 'description': description, 'thumbnail': self._og_search_thumbnail(webpage), - 'description': self._html_search_regex(r'description:(["\'])((?:(?!\1).)+)\1', - webpage, 'description', None, group=2), + 'upload_date': upload_date, + 'view_count': view_count, + 'comment_count': comment_count, } From 4f3c5e062715bb8c2084bda139ddcd9a2036f267 Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 16 Mar 2016 18:48:06 +0100 Subject: [PATCH 0905/3599] [utils] add helper function for parsing codecs --- test/test_utils.py | 24 ++++++++++++++++++++++++ youtube_dl/utils.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index b7ef51f8d..d84eb438f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -78,6 +78,7 @@ from youtube_dl.utils import ( cli_option, cli_valueless_option, cli_bool_option, + parse_codecs, ) from youtube_dl.compat import ( compat_chr, @@ -579,6 +580,29 @@ class TestUtil(unittest.TestCase): limit_length('foo bar baz asd', 12).startswith('foo bar')) self.assertTrue('...' in limit_length('foo bar baz asd', 12)) + def test_parse_codecs(self): + self.assertEqual(parse_codecs(''), {}) + self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { + 'vcodec': 'avc1.77.30', + 'acodec': 'mp4a.40.2', + }) + self.assertEqual(parse_codecs('mp4a.40.2'), { + 'vcodec': 'none', + 'acodec': 'mp4a.40.2', + }) + self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), { + 'vcodec': 'avc1.42001e', + 'acodec': 'mp4a.40.5', + }) + self.assertEqual(parse_codecs('avc3.640028'), { + 'vcodec': 'avc3.640028', + 'acodec': 'none', + }) + self.assertEqual(parse_codecs(', h264,,newcodec,aac'), { + 'vcodec': 'h264', + 'acodec': 'aac', + }) + def test_escape_rfc3986(self): reserved = "!*'();:@&=+$,/?#[]" unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~' diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 562031fe1..fe175e82c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2060,6 +2060,42 @@ def mimetype2ext(mt): }.get(res, res) +def parse_codecs(codecs_str): + # http://tools.ietf.org/html/rfc6381 + if not codecs_str: + return {} + splited_codecs = list(filter(None, map( + lambda str: str.strip(), codecs_str.strip().strip(',').split(',')))) + vcodec, acodec = None, None + for full_codec in splited_codecs: + codec = full_codec.split('.')[0] + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'): + if not vcodec: + vcodec = full_codec + elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac'): + if not acodec: + acodec = full_codec + else: + write_string('WARNING: Unknown codec %s' % full_codec, sys.stderr) + if not vcodec and not acodec: + if len(splited_codecs) == 2: + return { + 'vcodec': vcodec, + 'acodec': acodec, + } + elif len(splited_codecs) == 1: + return { + 'vcodec': 'none', + 'acodec': vcodec, + } + else: + return { + 'vcodec': vcodec or 'none', + 'acodec': acodec or 'none', + } + return {} + + def urlhandle_detect_ext(url_handle): getheader = url_handle.headers.get From 59bbe4911acd4493bf407925bfdeb1ad03db6ef3 Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 16 Mar 2016 18:50:45 +0100 Subject: [PATCH 0906/3599] [extractor/common] add helper method to extract html5 media entries --- youtube_dl/extractor/common.py | 58 ++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5a2603b50..661889593 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -54,6 +54,8 @@ from ..utils import ( update_Request, update_url_query, parse_m3u8_attributes, + extract_attributes, + parse_codecs, ) @@ -1610,6 +1612,62 @@ class InfoExtractor(object): self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats + def _parse_html5_media_entries(self, base_url, webpage): + def absolute_url(video_url): + return compat_urlparse.urljoin(base_url, video_url) + + def parse_content_type(content_type): + if not content_type: + return {} + ctr = re.search(r'(?P[^/]+/[^;]+)(?:;\s*codecs="?(?P[^"]+))?', content_type) + if ctr: + mimetype, codecs = ctr.groups() + f = parse_codecs(codecs) + f['ext'] = mimetype2ext(mimetype) + return f + return {} + + entries = [] + for media_tag, media_type, media_content in re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage): + media_info = { + 'formats': [], + 'subtitles': {}, + } + media_attributes = extract_attributes(media_tag) + src = media_attributes.get('src') + if src: + media_info['formats'].append({ + 'url': absolute_url(src), + 'vcodec': 'none' if media_type == 'audio' else None, + }) + media_info['thumbnail'] = media_attributes.get('poster') + if media_content: + for source_tag in re.findall(r']+>', media_content): + source_attributes = extract_attributes(source_tag) + src = source_attributes.get('src') + if not src: + continue + f = parse_content_type(source_attributes.get('type')) + f.update({ + 'url': absolute_url(src), + 'vcodec': 'none' if media_type == 'audio' else None, + }) + media_info['formats'].append(f) + for track_tag in re.findall(r']+>', media_content): + track_attributes = extract_attributes(track_tag) + kind = track_attributes.get('kind') + if not kind or kind == 'subtitles': + src = track_attributes.get('src') + if not src: + continue + lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label') + media_info['subtitles'].setdefault(lang, []).append({ + 'url': absolute_url(src), + }) + if media_info['formats']: + entries.append(media_info) + return entries + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() From c6781156aa023c1131db6c5b1f575e1833649b33 Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Thu, 19 May 2016 20:59:59 +0200 Subject: [PATCH 0907/3599] [MSN] add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/msn.py | 90 ++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 youtube_dl/extractor/msn.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5ccac7c0c..2f9ee1596 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -454,6 +454,7 @@ from .motherless import MotherlessIE from .motorsport import MotorsportIE from .movieclips import MovieClipsIE from .moviezine import MoviezineIE +from .msn import MSNIE from .mtv import ( MTVIE, MTVServicesEmbeddedIE, diff --git a/youtube_dl/extractor/msn.py b/youtube_dl/extractor/msn.py new file mode 100644 index 000000000..4dd57fca0 --- /dev/null +++ b/youtube_dl/extractor/msn.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor + +from ..utils import ( + unescapeHTML, + int_or_none, +) + +class MSNIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?msn\.com/[a-z-]{2,5}(?:/[a-z]+)+/(?P[a-z-]+)/[a-z]{2}-(?P[a-zA-Z]+)' + _TESTS = [{ + 'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/criminal-minds-shemar-moore-shares-a-touching-goodbye-message/vp-BBqQYNE', + 'info_dict': { + 'id': 'BBqQYNE', + 'title': 'Criminal Minds - Shemar Moore Shares A Touching Goodbye Message', + 'description': 'md5:e8e89b897b222eb33a6b5067a8f1bc25', + 'duration': 104, + 'ext': 'mp4', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf', + 'info_dict': { + 'id': 'BBt6ZKf', + 'title': 'All That Bling: Self-Made Millionaire Child Builds Fashion & Jewellery Empire', + 'description': 'md5:8e683bd5c729d5fb16d96539a582aa5e', + 'duration': 350, + 'ext': 'mp4', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id, display_id = mobj.group('id', 'display_id') + + webpage = self._download_webpage(url, display_id) + + self.report_extraction(display_id) + video_data = self._parse_json(self._html_search_regex(r'data-metadata\s*=\s*["\'](.+)["\']', + webpage, 'video data'), display_id) + + formats = [] + for video_file in video_data.get('videoFiles', []): + if not '.ism' in video_file.get('url', '.ism'): + formats.append({ + 'url': unescapeHTML(video_file.get('url')), + 'ext': 'mp4', + 'width': int_or_none(video_file.get('width')), + 'height': int_or_none(video_file.get('height')), + }) + elif 'm3u8' in video_file.get('url'): + formats.extend(self._extract_m3u8_formats( + video_file.get('url'), display_id, 'mp4')) + # There (often) exists an Microsoft Smooth Streaming manifest + # (.ism) which is not yet supported + # (https://github.com/rg3/youtube-dl/issues/8118) + + self._sort_formats(formats) + + subtitles = {} + for f in video_data.get('files', []): + if f.get('formatCode', '') == '3100': + lang = f.get('culture', '') + if not lang: + continue + subtitles.setdefault(lang, []).append({ + 'ext': 'ttml', + 'url': unescapeHTML(f.get('url')), + }) + + return { + 'id': video_id, + 'title': video_data['title'], + 'formats': formats, + 'thumbnail': video_data.get('headlineImage', {}).get('url'), + 'description': video_data.get('description'), + 'creator': video_data.get('creator'), + 'subtitles': subtitles, + 'duration': int_or_none(video_data.get('durationSecs')), + } From bf8dd790456acc4a96d2961e61e96c4771e4d787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 26 Jun 2016 21:09:07 +0700 Subject: [PATCH 0908/3599] [extractor/common] Fix sorting with custom field preference --- youtube_dl/extractor/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4eda4e2ea..e6c15de42 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -878,7 +878,11 @@ class InfoExtractor(object): f['ext'] = determine_ext(f['url']) if isinstance(field_preference, (list, tuple)): - return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference) + return tuple( + f.get(field) + if f.get(field) is not None + else ('' if field == 'format_id' else -1) + for field in field_preference) preference = f.get('preference') if preference is None: From f1f336322da6e719cf4298b08680c3e903e956c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 26 Jun 2016 21:10:05 +0700 Subject: [PATCH 0909/3599] [msn] Fix extraction (Closes #8960, closes #9542) --- youtube_dl/extractor/msn.py | 125 ++++++++++++++++++++++-------------- 1 file changed, 77 insertions(+), 48 deletions(-) diff --git a/youtube_dl/extractor/msn.py b/youtube_dl/extractor/msn.py index 4dd57fca0..d4569e325 100644 --- a/youtube_dl/extractor/msn.py +++ b/youtube_dl/extractor/msn.py @@ -2,41 +2,42 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( - unescapeHTML, + determine_ext, + ExtractorError, int_or_none, + unescapeHTML, ) + class MSNIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?msn\.com/[a-z-]{2,5}(?:/[a-z]+)+/(?P[a-z-]+)/[a-z]{2}-(?P[a-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?msn\.com/(?:[^/]+/)+(?P[^/]+)/[a-z]{2}-(?P[\da-zA-Z]+)' _TESTS = [{ 'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/criminal-minds-shemar-moore-shares-a-touching-goodbye-message/vp-BBqQYNE', + 'md5': '8442f66c116cbab1ff7098f986983458', 'info_dict': { 'id': 'BBqQYNE', + 'display_id': 'criminal-minds-shemar-moore-shares-a-touching-goodbye-message', + 'ext': 'mp4', 'title': 'Criminal Minds - Shemar Moore Shares A Touching Goodbye Message', 'description': 'md5:e8e89b897b222eb33a6b5067a8f1bc25', 'duration': 104, - 'ext': 'mp4', + 'uploader': 'CBS Entertainment', + 'uploader_id': 'IT0X5aoJ6bJgYerJXSDCgFmYPB1__54v', }, - 'params': { - # m3u8 download - 'skip_download': True, - } }, { 'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf', - 'info_dict': { - 'id': 'BBt6ZKf', - 'title': 'All That Bling: Self-Made Millionaire Child Builds Fashion & Jewellery Empire', - 'description': 'md5:8e683bd5c729d5fb16d96539a582aa5e', - 'duration': 350, - 'ext': 'mp4', - }, - 'params': { - # m3u8 download - 'skip_download': True, - } + 'only_matching': True, + }, { + 'url': 'http://www.msn.com/en-ae/video/watch/obama-a-lot-of-people-will-be-disappointed/vi-AAhxUMH', + 'only_matching': True, + }, { + # geo restricted + 'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU', + 'only_matching': True, }] def _real_extract(self, url): @@ -45,46 +46,74 @@ class MSNIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - self.report_extraction(display_id) - video_data = self._parse_json(self._html_search_regex(r'data-metadata\s*=\s*["\'](.+)["\']', - webpage, 'video data'), display_id) + video = self._parse_json( + self._search_regex( + r'data-metadata\s*=\s*(["\'])(?P.+?)\1', + webpage, 'video data', default='{}', group='data'), + display_id, transform_source=unescapeHTML) + + if not video: + error = unescapeHTML(self._search_regex( + r'data-error=(["\'])(?P.+?)\1', + webpage, 'error', group='error')) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + + title = video['title'] formats = [] - for video_file in video_data.get('videoFiles', []): - if not '.ism' in video_file.get('url', '.ism'): + for file_ in video.get('videoFiles', []): + format_url = file_.get('url') + if not format_url: + continue + ext = determine_ext(format_url) + # .ism is not yet supported (see + # https://github.com/rg3/youtube-dl/issues/8118) + if ext == 'ism': + continue + if 'm3u8' in format_url: + # m3u8_native should not be used here until + # https://github.com/rg3/youtube-dl/issues/9913 is fixed + m3u8_formats = self._extract_m3u8_formats( + format_url, display_id, 'mp4', + m3u8_id='hls', fatal=False) + # Despite metadata in m3u8 all video+audio formats are + # actually video-only (no audio) + for f in m3u8_formats: + if f.get('acodec') != 'none' and f.get('vcodec') != 'none': + f['acodec'] = 'none' + formats.extend(m3u8_formats) + else: formats.append({ - 'url': unescapeHTML(video_file.get('url')), + 'url': format_url, 'ext': 'mp4', - 'width': int_or_none(video_file.get('width')), - 'height': int_or_none(video_file.get('height')), + 'format_id': 'http', + 'width': int_or_none(file_.get('width')), + 'height': int_or_none(file_.get('height')), }) - elif 'm3u8' in video_file.get('url'): - formats.extend(self._extract_m3u8_formats( - video_file.get('url'), display_id, 'mp4')) - # There (often) exists an Microsoft Smooth Streaming manifest - # (.ism) which is not yet supported - # (https://github.com/rg3/youtube-dl/issues/8118) - self._sort_formats(formats) subtitles = {} - for f in video_data.get('files', []): - if f.get('formatCode', '') == '3100': - lang = f.get('culture', '') - if not lang: - continue - subtitles.setdefault(lang, []).append({ - 'ext': 'ttml', - 'url': unescapeHTML(f.get('url')), + for file_ in video.get('files', []): + format_url = file_.get('url') + format_code = file_.get('formatCode') + if not format_url or not format_code: + continue + if compat_str(format_code) == '3100': + subtitles.setdefault(file_.get('culture', 'en'), []).append({ + 'ext': determine_ext(format_url, 'ttml'), + 'url': format_url, }) return { 'id': video_id, - 'title': video_data['title'], - 'formats': formats, - 'thumbnail': video_data.get('headlineImage', {}).get('url'), - 'description': video_data.get('description'), - 'creator': video_data.get('creator'), + 'display_id': display_id, + 'title': title, + 'description': video.get('description'), + 'thumbnail': video.get('headlineImage', {}).get('url'), + 'duration': int_or_none(video.get('durationSecs')), + 'uploader': video.get('sourceFriendly'), + 'uploader_id': video.get('providerId'), + 'creator': video.get('creator'), 'subtitles': subtitles, - 'duration': int_or_none(video_data.get('durationSecs')), + 'formats': formats, } From 92747e664a70e6739644a9c2b3abfbdcc68fd136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 26 Jun 2016 21:15:24 +0700 Subject: [PATCH 0910/3599] release 2016.06.26 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index c73f9a904..63b687fef 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.25*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.25** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.26*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.26** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.25 +[debug] youtube-dl version 2016.06.26 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 891499f59..a725e8c6b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -385,6 +385,7 @@ - **MovieFap** - **Moviezine** - **MPORA** + - **MSN** - **MTV** - **mtv.de** - **mtviggy.com** @@ -501,6 +502,7 @@ - **plus.google**: Google Plus - **pluzz.francetv.fr** - **podomatic** + - **PolskieRadio** - **PornHd** - **PornHub** - **PornHubPlaylist** @@ -736,6 +738,7 @@ - **vh1.com** - **Vice** - **ViceShow** + - **Vidbit** - **Viddler** - **video.google:search**: Google Video search - **video.mit.edu** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2b7a4c98d..52de19517 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.25' +__version__ = '2016.06.26' From b0c200f1ec594b7c7d5a5023853970ff789a3470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 26 Jun 2016 22:02:46 +0700 Subject: [PATCH 0911/3599] [msn] Add test URL with non-alphanumeric characters --- youtube_dl/extractor/msn.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/msn.py b/youtube_dl/extractor/msn.py index d4569e325..1ec8e0f50 100644 --- a/youtube_dl/extractor/msn.py +++ b/youtube_dl/extractor/msn.py @@ -38,6 +38,9 @@ class MSNIE(InfoExtractor): # geo restricted 'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU', 'only_matching': True, + }, { + 'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6', + 'only_matching': True, }] def _real_extract(self, url): From 427cd050a3b64319c19e4596d8885378604e388e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Jun 2016 04:11:53 +0700 Subject: [PATCH 0912/3599] [extractor/generic] Improve kaltura embed detection (Closes #9911) --- youtube_dl/extractor/generic.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1592a8a3a..26a7d10be 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -920,6 +920,24 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, + { + # Kaltura embedded via quoted entry_id + 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures', + 'info_dict': { + 'id': '0_utuok90b', + 'ext': 'mp4', + 'title': '06_matthew_brender_raj_dutt', + 'timestamp': 1466638791, + 'upload_date': '20160622', + }, + 'add_ie': ['Kaltura'], + 'expected_warnings': [ + 'Could not send HEAD request' + ], + 'params': { + 'skip_download': True, + } + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', @@ -1909,7 +1927,7 @@ class GenericIE(InfoExtractor): # Look for Kaltura embeds mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P['\"])wid(?P=q1)\s*:\s*(?P['\"])_?(?P[^'\"]+)(?P=q2),.*?(?P['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P['\"])(?P[^'\"]+)(?P=q4),", webpage) or - re.search(r'(?s)(?P["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P["\'])(?P.+?)(?P=q2)', webpage)) + re.search(r'(?s)(?P["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*?(?P=q1).*?(?P["\'])?entry_?[Ii]d(?P=q2)\s*:\s*(?P["\'])(?P.+?)(?P=q3)', webpage)) if mobj is not None: return self.url_result(smuggle_url( 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), From 81fda1536924db0ec4f583ae83bc77cb91ca6835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Jun 2016 05:07:12 +0700 Subject: [PATCH 0913/3599] [sr:mediathek] Clarify IE_NAME --- youtube_dl/extractor/srmediathek.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py index 74d01183f..a2569dfba 100644 --- a/youtube_dl/extractor/srmediathek.py +++ b/youtube_dl/extractor/srmediathek.py @@ -9,6 +9,7 @@ from ..utils import ( class SRMediathekIE(ARDMediathekIE): + IE_NAME = 'sr:mediathek' IE_DESC = 'Saarländischer Rundfunk' _VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P[0-9]+)' From f41ffc00d15697c6d4c8975d261ffd5b0c5e971f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Jun 2016 05:08:09 +0700 Subject: [PATCH 0914/3599] [skynewsarabia:article] Clarify IE_NAME --- youtube_dl/extractor/skynewsarabia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/skynewsarabia.py b/youtube_dl/extractor/skynewsarabia.py index 05e1b02ad..fffc9aa22 100644 --- a/youtube_dl/extractor/skynewsarabia.py +++ b/youtube_dl/extractor/skynewsarabia.py @@ -67,7 +67,7 @@ class SkyNewsArabiaIE(SkyNewsArabiaBaseIE): class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE): - IE_NAME = 'skynewsarabia:video' + IE_NAME = 'skynewsarabia:article' _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P[0-9]+)' _TESTS = [{ 'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9', From e3a6747d8f19ad0ba8aee7c3214cdb64903beba0 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sun, 26 Jun 2016 23:31:55 +0100 Subject: [PATCH 0915/3599] New test-case: extractor names are supposed to be unique @dstftw explained in https://github.com/rg3/youtube-dl/pull/9918#issuecomment-228625878 that extractor names are supposed to be unique. @dstftw has fixed the two offending extractors, and here I add a test to ensure this does not happen in the future. --- test/test_all_urls.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index f5af184e6..133d438eb 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -6,6 +6,7 @@ from __future__ import unicode_literals import os import sys import unittest +import collections sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -130,6 +131,13 @@ class TestAllURLsMatching(unittest.TestCase): 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', ['Yahoo']) + def test_no_duplicated_ie_names(self): + name_accu = collections.defaultdict(list) + for ie in self.ies: + name_accu[ie.IE_NAME.lower()].append(ie) + for (ie_name,ie_list) in name_accu.items(): + self.assertEqual(len(ie_list), 1, 'Only 1 extractor with IE_NAME "%s" (%s)' % (ie_name, ie_list)) + if __name__ == '__main__': unittest.main() From fd7a7498a47c5d79663ec8d86a87325aa634c652 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Jun 2016 22:11:45 +0700 Subject: [PATCH 0916/3599] [test_all_urls] PEP 8 and change wording --- test/test_all_urls.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 133d438eb..1f6079c29 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -134,9 +134,11 @@ class TestAllURLsMatching(unittest.TestCase): def test_no_duplicated_ie_names(self): name_accu = collections.defaultdict(list) for ie in self.ies: - name_accu[ie.IE_NAME.lower()].append(ie) - for (ie_name,ie_list) in name_accu.items(): - self.assertEqual(len(ie_list), 1, 'Only 1 extractor with IE_NAME "%s" (%s)' % (ie_name, ie_list)) + name_accu[ie.IE_NAME.lower()].append(type(ie).__name__) + for (ie_name, ie_list) in name_accu.items(): + self.assertEqual( + len(ie_list), 1, + 'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list))) if __name__ == '__main__': From 9ea5c04c0d16f5519079ae04fdad62fc28c884b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Jun 2016 22:44:17 +0700 Subject: [PATCH 0917/3599] [kaltura] Add _extract_url with fixed regex --- youtube_dl/extractor/kaltura.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index a65697ff5..c75a958ba 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -64,6 +64,32 @@ class KalturaIE(InfoExtractor): } ] + @staticmethod + def _extract_url(webpage): + mobj = ( + re.search( + r"""(?xs) + kWidget\.(?:thumb)?[Ee]mbed\( + \{.*? + (?P['\"])wid(?P=q1)\s*:\s* + (?P['\"])_?(?P[^'\"]+)(?P=q2),.*? + (?P['\"])entry_?[Ii]d(?P=q3)\s*:\s* + (?P['\"])(?P[^'\"]+)(?P=q4), + """, webpage) or + re.search( + r'''(?xs) + (?P["\']) + (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*? + (?P=q1).*? + (?: + entry_?[Ii]d| + (?P["\'])entry_?[Ii]d(?P=q2) + )\s*:\s* + (?P["\'])(?P.+?)(?P=q3) + ''', webpage)) + if mobj: + return 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict() + def _kaltura_api_call(self, video_id, actions, *args, **kwargs): params = actions[0] if len(actions) > 1: From c287f2bc6073182323aada26309539d724943fa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Jun 2016 22:45:26 +0700 Subject: [PATCH 0918/3599] [extractor/generic] Use _extract_url for kaltura embeds (Closes #9922) --- youtube_dl/extractor/generic.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 26a7d10be..2188f8bb2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -64,6 +64,7 @@ from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE from .theplatform import ThePlatformIE from .vessel import VesselIE +from .kaltura import KalturaIE class GenericIE(InfoExtractor): @@ -1926,12 +1927,9 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url'), 'Zapiks') # Look for Kaltura embeds - mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P['\"])wid(?P=q1)\s*:\s*(?P['\"])_?(?P[^'\"]+)(?P=q2),.*?(?P['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P['\"])(?P[^'\"]+)(?P=q4),", webpage) or - re.search(r'(?s)(?P["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*?(?P=q1).*?(?P["\'])?entry_?[Ii]d(?P=q2)\s*:\s*(?P["\'])(?P.+?)(?P=q3)', webpage)) - if mobj is not None: - return self.url_result(smuggle_url( - 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), - {'source_url': url}), 'Kaltura') + kaltura_url = KalturaIE._extract_url(webpage) + if kaltura_url: + return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) # Look for Eagle.Platform embeds mobj = re.search( From 8704778d95d2abef021757c85efd75664c6a424a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Jun 2016 23:06:42 +0700 Subject: [PATCH 0919/3599] [pbs] Check manually constructed http links (Closes #9921) --- youtube_dl/extractor/pbs.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 81918ac6e..f6f423597 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -516,9 +516,14 @@ class PBSIE(InfoExtractor): # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'): continue + f_url = re.sub(r'\d+k|baseline', bitrate, http_url) + # This may produce invalid links sometimes (e.g. + # http://www.pbs.org/wgbh/frontline/film/suicide-plan) + if not self._is_valid_url(f_url, display_id, 'http-%s video' % bitrate): + continue f = m3u8_format.copy() f.update({ - 'url': re.sub(r'\d+k|baseline', bitrate, http_url), + 'url': f_url, 'format_id': m3u8_format['format_id'].replace('hls', 'http'), 'protocol': 'http', }) From 8174d0fe95db736f0fc53fd5e2d25c0c471a97fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 27 Jun 2016 23:09:39 +0700 Subject: [PATCH 0920/3599] release 2016.06.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 4 ++-- youtube_dl/version.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 63b687fef..f9a1aa990 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.26*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.26** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.26 +[debug] youtube-dl version 2016.06.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a725e8c6b..2a94f4feb 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -45,7 +45,6 @@ - **archive.org**: archive.org videos - **ARD** - **ARD:mediathek** - - **ARD:mediathek**: Saarländischer Rundfunk - **arte.tv** - **arte.tv:+7** - **arte.tv:cinema** @@ -588,7 +587,7 @@ - **Shared**: shared.sx and vivo.sx - **ShareSix** - **Sina** - - **skynewsarabia:video** + - **skynewsarabia:article** - **skynewsarabia:video** - **Slideshare** - **Slutload** @@ -621,6 +620,7 @@ - **SportBoxEmbed** - **SportDeutschland** - **Sportschau** + - **sr:mediathek**: Saarländischer Rundfunk - **SRGSSR** - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **SSA** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 52de19517..2dd24dec1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.26' +__version__ = '2016.06.27' From 32616c14ccf5051484e9236c54bdbeccb6e85ee4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 28 Jun 2016 14:02:03 +0100 Subject: [PATCH 0921/3599] [vrt] extract all formats --- youtube_dl/extractor/vrt.py | 61 ++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index 8e35f24e8..bec7ab327 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -25,7 +25,8 @@ class VRTIE(InfoExtractor): 'timestamp': 1414271750.949, 'upload_date': '20141025', 'duration': 929, - } + }, + 'skip': 'HTTP Error 404: Not Found', }, # sporza.be { @@ -39,7 +40,8 @@ class VRTIE(InfoExtractor): 'timestamp': 1413835980.560, 'upload_date': '20141020', 'duration': 3238, - } + }, + 'skip': 'HTTP Error 404: Not Found', }, # cobra.be { @@ -53,16 +55,39 @@ class VRTIE(InfoExtractor): 'timestamp': 1413967500.494, 'upload_date': '20141022', 'duration': 661, - } + }, + 'skip': 'HTTP Error 404: Not Found', }, { # YouTube video 'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957', - 'only_matching': True, + 'md5': 'b8b93da1df1cea6c8556255a796b7d61', + 'info_dict': { + 'id': 'Wji-BZ0oCwg', + 'ext': 'mp4', + 'title': 'ROGUE ONE: A STAR WARS STORY Official Teaser Trailer', + 'description': 'md5:8e468944dce15567a786a67f74262583', + 'uploader': 'Star Wars', + 'uploader_id': 'starwars', + 'upload_date': '20160407', + }, + 'add_ie': ['Youtube'], }, { 'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055', - 'only_matching': True, + 'md5': '', + 'info_dict': { + 'id': '2377055', + 'ext': 'mp4', + 'title': 'Cafe Derby', + 'description': 'Lenny Van Wesemael debuteert met de langspeelfilm Café Derby. Een waar gebeurd maar ook verzonnen verhaal.', + 'upload_date': '20150626', + 'timestamp': 1435305240.769, + }, + 'params': { + # m3u8 download + 'skip_download': True, + } } ] @@ -98,6 +123,32 @@ class VRTIE(InfoExtractor): formats.extend(self._extract_m3u8_formats( src, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_f4m_formats( + src.replace('playlist.m3u8', 'manifest.f4m'), + video_id, f4m_id='hds', fatal=False)) + if 'data-video-geoblocking="true"' not in webpage: + rtmp_formats = self._extract_smil_formats( + src.replace('playlist.m3u8', 'jwplayer.smil'), + video_id, fatal=False) + formats.extend(rtmp_formats) + for rtmp_format in rtmp_formats: + rtmp_format_c = rtmp_format.copy() + rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) + del rtmp_format_c['play_path'] + del rtmp_format_c['ext'] + http_format = rtmp_format_c.copy() + http_format.update({ + 'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''), + 'format_id': rtmp_format['format_id'].replace('rtmp', 'http'), + 'protocol': 'http', + }) + rtsp_format = rtmp_format_c.copy() + rtsp_format.update({ + 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), + 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), + 'protocol': 'rtsp', + }) + formats.extend([http_format, rtsp_format]) else: formats.extend(self._extract_f4m_formats( '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False)) From 97124e572db5f5d5d11ee630aeb18a4c5585d087 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 28 Jun 2016 22:39:53 +0700 Subject: [PATCH 0922/3599] [arte:playlist] Fix test --- youtube_dl/extractor/arte.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 049f1fa9e..e0c5c1804 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -419,6 +419,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE): 'info_dict': { 'id': 'PL-013263', 'title': 'Areva & Uramin', + 'description': 'md5:a1dc0312ce357c262259139cfd48c9bf', }, 'playlist_mincount': 6, }, { From 42362fdb5e780611b7054e52eb28621f5a9fd7ba Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 29 Jun 2016 15:49:17 +0100 Subject: [PATCH 0923/3599] [aenetworks] add support for show and season for A&E Network sites and History topics(closes #9816) --- youtube_dl/extractor/aenetworks.py | 181 +++++++++++++++++++++-------- youtube_dl/extractor/extractors.py | 5 +- 2 files changed, 135 insertions(+), 51 deletions(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 1bbfe2641..cbde8b46e 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -7,18 +7,118 @@ from ..utils import ( smuggle_url, update_url_query, unescapeHTML, + extract_attributes, +) +from ..compat import ( + compat_urlparse, ) -class AENetworksIE(InfoExtractor): +class AENetworksBaseIE(InfoExtractor): + def theplatform_url_result(self, theplatform_url, video_id, query): + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': smuggle_url( + update_url_query(theplatform_url, query), + { + 'sig': { + 'key': 'crazyjava', + 'secret': 's3cr3t' + }, + 'force_smil_url': True + }), + 'ie_key': 'ThePlatform', + } + + +class AENetworksIE(AENetworksBaseIE): IE_NAME = 'aenetworks' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network' - _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P[^/]+)/(?:[^/]+/)+(?P[^/]+?)(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/shows/(?P[^/]+(?:/[^/]+){0,2})' + _TESTS = [{ + 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', + 'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', + 'info_dict': { + 'id': '22253814', + 'ext': 'mp4', + 'title': 'Winter Is Coming', + 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', + 'timestamp': 1338306241, + 'upload_date': '20120529', + 'uploader': 'AENE-NEW', + }, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'http://www.history.com/shows/ancient-aliens/season-1', + 'info_dict': { + 'id': '71889446852', + }, + 'playlist_mincount': 5, + }, { + 'url': 'http://www.mylifetime.com/shows/atlanta-plastic', + 'info_dict': { + 'id': 'SERIES4317', + 'title': 'Atlanta Plastic', + }, + 'playlist_mincount': 2, + }, { + 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', + 'only_matching': True + }, { + 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', + 'only_matching': True + }, { + 'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6', + 'only_matching': True + }] + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + url_parts = display_id.split('/') + url_parts_len = len(url_parts) + if url_parts_len == 1: + entries = [] + for season_url_path in re.findall(r'(?s)]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage): + entries.append(self.url_result( + compat_urlparse.urljoin(url, season_url_path), 'AENetworks')) + return self.playlist_result( + entries, self._html_search_meta('aetn:SeriesId', webpage), + self._html_search_meta('aetn:SeriesTitle', webpage)) + elif url_parts_len == 2: + entries = [] + for episode_item in re.findall(r'(?s)]+class="[^"]*episode-item[^"]*"[^>]*>', webpage): + episode_attributes = extract_attributes(episode_item) + episode_url = compat_urlparse.urljoin( + url, episode_attributes['data-canonical']) + entries.append(self.url_result( + episode_url, 'AENetworks', + episode_attributes['data-videoid'])) + return self.playlist_result( + entries, self._html_search_meta('aetn:SeasonId', webpage)) + else: + video_id = self._html_search_meta('aetn:VideoID', webpage) + media_url = self._search_regex( + r"media_url\s*=\s*'([^']+)'", webpage, 'video url') + + info = self._search_json_ld(webpage, video_id, fatal=False) + info.update(self.theplatform_url_result( + media_url, video_id, { + 'mbr': 'true', + 'assetTypes': 'medium_video_s3' + })) + return info + + +class HistoryTopicIE(AENetworksBaseIE): + IE_NAME = 'history:topic' + IE_DESC = 'History.com Topic' + _VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P[^/]+)/videos(?:/(?P[^/?#]+))?' _TESTS = [{ 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', 'info_dict': { - 'id': 'g12m5Gyt3fdR', + 'id': '40700995724', 'ext': 'mp4', 'title': "Bet You Didn't Know: Valentine's Day", 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', @@ -31,57 +131,38 @@ class AENetworksIE(InfoExtractor): 'skip_download': True, }, 'add_ie': ['ThePlatform'], - 'expected_warnings': ['JSON-LD'], }, { - 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', - 'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', - 'info_dict': { - 'id': 'eg47EERs_JsZ', - 'ext': 'mp4', - 'title': 'Winter Is Coming', - 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', - 'timestamp': 1338306241, - 'upload_date': '20120529', - 'uploader': 'AENE-NEW', + 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos', + 'info_dict': + { + 'id': 'world-war-i-history', }, - 'add_ie': ['ThePlatform'], + 'playlist_mincount': 24, }, { - 'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry', - 'only_matching': True - }, { - 'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage', - 'only_matching': True - }, { - 'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients', - 'only_matching': True + 'url': 'http://www.history.com/topics/world-war-i-history/videos', + 'only_matching': True, }] def _real_extract(self, url): - page_type, video_id = re.match(self._VALID_URL, url).groups() + topic_id, display_id = re.match(self._VALID_URL, url).groups() + if display_id: + webpage = self._download_webpage(url, display_id) + release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups() + release_url = unescapeHTML(release_url) - webpage = self._download_webpage(url, video_id) - - video_url_re = [ - r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id, - r"media_url\s*=\s*'([^']+)'" - ] - video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url')) - query = {'mbr': 'true'} - if page_type == 'shows': - query['assetTypes'] = 'medium_video_s3' - if 'switch=hds' in video_url: - query['switch'] = 'hls' - - info = self._search_json_ld(webpage, video_id, fatal=False) - info.update({ - '_type': 'url_transparent', - 'url': smuggle_url( - update_url_query(video_url, query), - { - 'sig': { - 'key': 'crazyjava', - 'secret': 's3cr3t'}, - 'force_smil_url': True - }), - }) - return info + return self.theplatform_url_result( + release_url, video_id, { + 'mbr': 'true', + 'switch': 'hls' + }) + else: + webpage = self._download_webpage(url, topic_id) + entries = [] + for episode_item in re.findall(r']*>', webpage): + video_attributes = extract_attributes(episode_item) + entries.append(self.theplatform_url_result( + video_attributes['data-href'], video_attributes['data-id'], { + 'mbr': 'true', + 'switch': 'hls' + })) + return self.playlist_result(entries, topic_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2f9ee1596..bba88e9eb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -20,7 +20,10 @@ from .adobetv import ( AdobeTVVideoIE, ) from .adultswim import AdultSwimIE -from .aenetworks import AENetworksIE +from .aenetworks import ( + AENetworksIE, + HistoryTopicIE, +) from .afreecatv import AfreecaTVIE from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE From 4c7821227c54836a17d9c02d4f8d3dcbd97105fc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 29 Jun 2016 16:03:32 +0100 Subject: [PATCH 0924/3599] [aenetworks:historytopic] fix topic video url --- youtube_dl/extractor/aenetworks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index cbde8b46e..1376dd70f 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -161,7 +161,7 @@ class HistoryTopicIE(AENetworksBaseIE): for episode_item in re.findall(r']*>', webpage): video_attributes = extract_attributes(episode_item) entries.append(self.theplatform_url_result( - video_attributes['data-href'], video_attributes['data-id'], { + video_attributes['data-release-url'], video_attributes['data-id'], { 'mbr': 'true', 'switch': 'hls' })) From c58ed8563d37e39235332b35e7feafe32711c623 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 29 Jun 2016 16:18:16 +0100 Subject: [PATCH 0925/3599] [aenetworks] extract history topic playlist title --- youtube_dl/extractor/aenetworks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 1376dd70f..2536f75d6 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -8,6 +8,7 @@ from ..utils import ( update_url_query, unescapeHTML, extract_attributes, + get_element_by_attribute, ) from ..compat import ( compat_urlparse, @@ -136,6 +137,7 @@ class HistoryTopicIE(AENetworksBaseIE): 'info_dict': { 'id': 'world-war-i-history', + 'title': 'World War I History', }, 'playlist_mincount': 24, }, { @@ -165,4 +167,4 @@ class HistoryTopicIE(AENetworksBaseIE): 'mbr': 'true', 'switch': 'hls' })) - return self.playlist_result(entries, topic_id) + return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage)) From 70157c2c43068b23007a6d71e16967ba85b274d5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 29 Jun 2016 16:55:17 +0100 Subject: [PATCH 0926/3599] [aenetworks] add support for movie pages --- youtube_dl/extractor/aenetworks.py | 80 ++++++++++++++++-------------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 2536f75d6..8b60e2ab6 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -36,7 +36,7 @@ class AENetworksBaseIE(InfoExtractor): class AENetworksIE(AENetworksBaseIE): IE_NAME = 'aenetworks' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network' - _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/shows/(?P[^/]+(?:/[^/]+){0,2})' + _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P[^/]+(?:/[^/]+){0,2})|movies/(?P[^/]+)/full-movie)' _TESTS = [{ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', 'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', @@ -72,50 +72,54 @@ class AENetworksIE(AENetworksBaseIE): }, { 'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6', 'only_matching': True + }, { + 'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie', + 'only_matching': True }] def _real_extract(self, url): - display_id = self._match_id(url) + show_path, movie_display_id = re.match(self._VALID_URL, url).groups() + display_id = show_path or movie_display_id webpage = self._download_webpage(url, display_id) - url_parts = display_id.split('/') - url_parts_len = len(url_parts) - if url_parts_len == 1: - entries = [] - for season_url_path in re.findall(r'(?s)]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage): - entries.append(self.url_result( - compat_urlparse.urljoin(url, season_url_path), 'AENetworks')) - return self.playlist_result( - entries, self._html_search_meta('aetn:SeriesId', webpage), - self._html_search_meta('aetn:SeriesTitle', webpage)) - elif url_parts_len == 2: - entries = [] - for episode_item in re.findall(r'(?s)]+class="[^"]*episode-item[^"]*"[^>]*>', webpage): - episode_attributes = extract_attributes(episode_item) - episode_url = compat_urlparse.urljoin( - url, episode_attributes['data-canonical']) - entries.append(self.url_result( - episode_url, 'AENetworks', - episode_attributes['data-videoid'])) - return self.playlist_result( - entries, self._html_search_meta('aetn:SeasonId', webpage)) - else: - video_id = self._html_search_meta('aetn:VideoID', webpage) - media_url = self._search_regex( - r"media_url\s*=\s*'([^']+)'", webpage, 'video url') + if show_path: + url_parts = show_path.split('/') + url_parts_len = len(url_parts) + if url_parts_len == 1: + entries = [] + for season_url_path in re.findall(r'(?s)]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage): + entries.append(self.url_result( + compat_urlparse.urljoin(url, season_url_path), 'AENetworks')) + return self.playlist_result( + entries, self._html_search_meta('aetn:SeriesId', webpage), + self._html_search_meta('aetn:SeriesTitle', webpage)) + elif url_parts_len == 2: + entries = [] + for episode_item in re.findall(r'(?s)]+class="[^"]*episode-item[^"]*"[^>]*>', webpage): + episode_attributes = extract_attributes(episode_item) + episode_url = compat_urlparse.urljoin( + url, episode_attributes['data-canonical']) + entries.append(self.url_result( + episode_url, 'AENetworks', + episode_attributes['data-videoid'])) + return self.playlist_result( + entries, self._html_search_meta('aetn:SeasonId', webpage)) + video_id = self._html_search_meta('aetn:VideoID', webpage) + media_url = self._search_regex( + r"media_url\s*=\s*'([^']+)'", webpage, 'video url') - info = self._search_json_ld(webpage, video_id, fatal=False) - info.update(self.theplatform_url_result( - media_url, video_id, { - 'mbr': 'true', - 'assetTypes': 'medium_video_s3' - })) - return info + info = self._search_json_ld(webpage, video_id, fatal=False) + info.update(self.theplatform_url_result( + media_url, video_id, { + 'mbr': 'true', + 'assetTypes': 'medium_video_s3' + })) + return info class HistoryTopicIE(AENetworksBaseIE): IE_NAME = 'history:topic' IE_DESC = 'History.com Topic' - _VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P[^/]+)/videos(?:/(?P[^/?#]+))?' + _VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P[^/]+)/videos(?:/(?P[^/?#]+))?' _TESTS = [{ 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', 'info_dict': { @@ -146,9 +150,9 @@ class HistoryTopicIE(AENetworksBaseIE): }] def _real_extract(self, url): - topic_id, display_id = re.match(self._VALID_URL, url).groups() - if display_id: - webpage = self._download_webpage(url, display_id) + topic_id, video_display_id = re.match(self._VALID_URL, url).groups() + if video_display_id: + webpage = self._download_webpage(url, video_display_id) release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups() release_url = unescapeHTML(release_url) From 06a96da15bfde93a2f2aa17cdaa10e1bf11dde0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 29 Jun 2016 23:01:34 +0700 Subject: [PATCH 0927/3599] [eagleplatform] Improve embed detection and extract in separate routine (Closes #9926) --- youtube_dl/extractor/eagleplatform.py | 8 ++++++++ youtube_dl/extractor/generic.py | 8 ++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 113a4966f..12d28d3b9 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -50,6 +50,14 @@ class EaglePlatformIE(InfoExtractor): 'skip': 'Georestricted', }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+src=(["\'])(?P(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1', + webpage) + if mobj is not None: + return mobj.group('url') + @staticmethod def _handle_error(response): status = int_or_none(response.get('status', 200)) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2188f8bb2..712dd8a94 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -65,6 +65,7 @@ from .threeqsdn import ThreeQSDNIE from .theplatform import ThePlatformIE from .vessel import VesselIE from .kaltura import KalturaIE +from .eagleplatform import EaglePlatformIE class GenericIE(InfoExtractor): @@ -1932,10 +1933,9 @@ class GenericIE(InfoExtractor): return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) # Look for Eagle.Platform embeds - mobj = re.search( - r']+src="(?Phttps?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage) - if mobj is not None: - return self.url_result(mobj.group('url'), 'EaglePlatform') + eagleplatform_url = EaglePlatformIE._extract_url(webpage) + if eagleplatform_url: + return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key()) # Look for ClipYou (uses Eagle.Platform) embeds mobj = re.search( From e496fa50cd82877d8daeda8e29056c5d7fce2de0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 29 Jun 2016 20:19:31 +0100 Subject: [PATCH 0928/3599] [urplay] Add new extractor(closes #9332) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/urplay.py | 67 ++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 youtube_dl/extractor/urplay.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bba88e9eb..eeedc675b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -894,6 +894,7 @@ from .udn import UDNEmbedIE from .digiteka import DigitekaIE from .unistra import UnistraIE from .urort import UrortIE +from .urplay import URPlayIE from .usatoday import USATodayIE from .ustream import UstreamIE, UstreamChannelIE from .ustudio import ( diff --git a/youtube_dl/extractor/urplay.py b/youtube_dl/extractor/urplay.py new file mode 100644 index 000000000..24ecdd2b1 --- /dev/null +++ b/youtube_dl/extractor/urplay.py @@ -0,0 +1,67 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class URPlayIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?urplay\.se/program/(?P[0-9]+)' + _TEST = { + 'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde', + 'md5': '15ca67b63fd8fb320ac2bcd854bad7b6', + 'info_dict': { + 'id': '190031', + 'ext': 'mp4', + 'title': 'Tripp, Trapp, Träd : Sovkudde', + 'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + urplayer_data = self._parse_json(self._search_regex( + r'urPlayer.init\(({.+?})\);', webpage, 'urplayer data'), video_id) + host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] + + formats = [] + for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)): + file_rtmp = urplayer_data.get('file_rtmp' + quality_attr) + if file_rtmp: + formats.append({ + 'url': 'rtmp://%s/urplay/mp4:%s' % (host, file_rtmp), + 'format_id': quality + '-rtmp', + 'ext': 'flv', + 'preference': preference, + }) + file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr) + if file_http: + file_http_base_url = 'http://%s/%s' % (host, file_http) + formats.extend(self._extract_f4m_formats( + file_http_base_url + 'manifest.f4m', video_id, + preference, '%s-hds' % quality, fatal=False)) + formats.extend(self._extract_m3u8_formats( + file_http_base_url + 'playlist.m3u8', video_id, 'mp4', + 'm3u8_native', preference, '%s-hls' % quality, fatal=False)) + self._sort_formats(formats) + + subtitles = {} + for subtitle in urplayer_data.get('subtitles', []): + subtitle_url = subtitle.get('file') + kind = subtitle.get('kind') + if subtitle_url or kind and kind != 'captions': + continue + subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({ + 'url': subtitle_url, + }) + + return { + 'id': video_id, + 'title': urplayer_data['title'], + 'description': self._og_search_description(webpage), + 'thumbnail': urplayer_data.get('image'), + 'series': urplayer_data.get('series_title'), + 'subtitles': subtitles, + 'formats': formats, + } \ No newline at end of file From 397b305cfe1a7ec2957331602727edb009c71e99 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 30 Jun 2016 00:21:03 +0100 Subject: [PATCH 0929/3599] [meta] Add new extractor(closes #8789) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/meta.py | 56 ++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 youtube_dl/extractor/meta.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index eeedc675b..84c39ab48 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -425,6 +425,7 @@ from .makerschannel import MakersChannelIE from .makertv import MakerTVIE from .matchtv import MatchTVIE from .mdr import MDRIE +from .meta import METAIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE diff --git a/youtube_dl/extractor/meta.py b/youtube_dl/extractor/meta.py new file mode 100644 index 000000000..674b8d264 --- /dev/null +++ b/youtube_dl/extractor/meta.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + unescapeHTML, + int_or_none, + ExtractorError, +) + + +class METAIE(InfoExtractor): + _VALID_URL = r'https?://video\.meta\.ua/(?P[0-9]+)' + _TEST = { + 'url': 'http://video.meta.ua/5502115.video', + 'md5': '71b6f3ee274bef16f1ab410f7f56b476', + 'info_dict': { + 'id': '5502115', + 'ext': 'mp4', + 'title': 'Sony Xperia Z camera test [HQ]', + 'description': 'Xperia Z shoots video in FullHD HDR.', + 'uploader_id': 'nomobile', + 'uploader': 'CHЁZA.TV', + 'upload_date': '20130211', + }, + 'add_ie': ['Youtube'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + st_html5 = self._search_regex(r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st') + json_str = '' + for i in range(0, len(st_html5), 3): + json_str += '�%s;' % st_html5[i:i + 3] + uppod_data = self._parse_json(unescapeHTML(json_str), video_id) + error = uppod_data.get('customnotfound') + if error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + + video_url = uppod_data['file'] + info = { + 'id': video_id, + 'url': video_url, + 'title': uppod_data.get('comment') or self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage), + 'duration': int_or_none(self._og_search_property('video:duration', webpage)), + } + if 'youtube.com/' in video_url: + info.update({ + '_type': 'url_transparent', + 'ie_key': 'Youtube', + }) + return info From df43389ade6e7a6394521ae91c0640508dceb4dc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 30 Jun 2016 02:54:21 +0100 Subject: [PATCH 0930/3599] [skysports] Add new extractor(closes #7066) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/skysports.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 youtube_dl/extractor/skysports.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 84c39ab48..80d1bbe20 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -714,6 +714,7 @@ from .skynewsarabia import ( SkyNewsArabiaIE, SkyNewsArabiaArticleIE, ) +from .skysports import SkySportsIE from .slideshare import SlideshareIE from .slutload import SlutloadIE from .smotri import ( diff --git a/youtube_dl/extractor/skysports.py b/youtube_dl/extractor/skysports.py new file mode 100644 index 000000000..9dc78c7d2 --- /dev/null +++ b/youtube_dl/extractor/skysports.py @@ -0,0 +1,33 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class SkySportsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine', + 'md5': 'c44a1db29f27daf9a0003e010af82100', + 'info_dict': { + 'id': '10328419', + 'ext': 'flv', + 'title': 'Bale: Its our time to shine', + 'description': 'md5:9fd1de3614d525f5addda32ac3c482c9', + }, + 'add_ie': ['Ooyala'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'ooyala:%s' % self._search_regex( + r'data-video-id="([^"]+)"', webpage, 'ooyala id'), + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'ie_key': 'Ooyala', + } From ab47b6e881269a0329b78a294318745a54e9e7c7 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 30 Jun 2016 04:08:24 +0100 Subject: [PATCH 0931/3599] [theatlantic] Add new extractor(closes #6611) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/theatlantic.py | 40 +++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 youtube_dl/extractor/theatlantic.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 80d1bbe20..d9ffde449 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -799,6 +799,7 @@ from .teletask import TeleTaskIE from .telewebion import TelewebionIE from .testurl import TestURLIE from .tf1 import TF1IE +from .theatlantic import TheAtlanticIE from .theintercept import TheInterceptIE from .theplatform import ( ThePlatformIE, diff --git a/youtube_dl/extractor/theatlantic.py b/youtube_dl/extractor/theatlantic.py new file mode 100644 index 000000000..df4254fea --- /dev/null +++ b/youtube_dl/extractor/theatlantic.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TheAtlanticIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?theatlantic\.com/video/index/(?P\d+)' + _TEST = { + 'url': 'http://www.theatlantic.com/video/index/477918/capture-a-unified-theory-on-mental-health/', + 'md5': '', + 'info_dict': { + 'id': '477918', + 'ext': 'mp4', + 'title': 'Are All Mental Illnesses Related?', + 'description': 'Depression, anxiety, overeating, addiction, and all other mental disorders share a common mechanism.', + 'timestamp': 1460490952, + 'uploader': 'TheAtlantic', + 'upload_date': '20160412', + 'uploader_id': '29913724001', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['BrightcoveLegacy'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + return { + '_type': 'url_transparent', + 'url': self._html_search_meta('twitter:player', webpage), + 'id': video_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'ie_key': 'BrightcoveLegacy', + } From 4d86d2008eeae5d4e75d8f688a666e7b9504bbeb Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 30 Jun 2016 11:30:42 +0100 Subject: [PATCH 0932/3599] [urplay] fix typo and check with flake8 --- youtube_dl/extractor/urplay.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/urplay.py b/youtube_dl/extractor/urplay.py index 24ecdd2b1..ce3bf6b02 100644 --- a/youtube_dl/extractor/urplay.py +++ b/youtube_dl/extractor/urplay.py @@ -22,7 +22,7 @@ class URPlayIE(InfoExtractor): webpage = self._download_webpage(url, video_id) urplayer_data = self._parse_json(self._search_regex( - r'urPlayer.init\(({.+?})\);', webpage, 'urplayer data'), video_id) + r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id) host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] formats = [] @@ -64,4 +64,4 @@ class URPlayIE(InfoExtractor): 'series': urplayer_data.get('series_title'), 'subtitles': subtitles, 'formats': formats, - } \ No newline at end of file + } From 329179073b93e37ab76e759d1fe96d8f984367f3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 30 Jun 2016 12:01:30 +0100 Subject: [PATCH 0933/3599] [generic] add generic support for twitter:player embeds --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/generic.py | 21 +++++++++++++++ youtube_dl/extractor/theatlantic.py | 40 ----------------------------- 3 files changed, 21 insertions(+), 41 deletions(-) delete mode 100644 youtube_dl/extractor/theatlantic.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d9ffde449..80d1bbe20 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -799,7 +799,6 @@ from .teletask import TeleTaskIE from .telewebion import TelewebionIE from .testurl import TestURLIE from .tf1 import TF1IE -from .theatlantic import TheAtlanticIE from .theintercept import TheInterceptIE from .theplatform import ( ThePlatformIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 712dd8a94..c2a7f9202 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1245,6 +1245,22 @@ class GenericIE(InfoExtractor): 'uploader': 'www.hudl.com', }, }, + # twitter:player embed + { + 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/', + 'md5': 'a3e0df96369831de324f0778e126653c', + 'info_dict': { + 'id': '4909620399001', + 'ext': 'mp4', + 'title': 'What Do Black Holes Sound Like?', + 'description': 'what do black holes sound like', + 'upload_date': '20160524', + 'uploader_id': '29913724001', + 'timestamp': 1464107587, + 'uploader': 'TheAtlantic', + }, + 'add_ie': ['BrightcoveLegacy'], + } ] def report_following_redirect(self, new_url): @@ -2081,6 +2097,11 @@ class GenericIE(InfoExtractor): 'uploader': video_uploader, } + # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser + embed_url = self._twitter_search_player(webpage) + if embed_url: + return self.url_result(embed_url) + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/theatlantic.py b/youtube_dl/extractor/theatlantic.py deleted file mode 100644 index df4254fea..000000000 --- a/youtube_dl/extractor/theatlantic.py +++ /dev/null @@ -1,40 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class TheAtlanticIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?theatlantic\.com/video/index/(?P\d+)' - _TEST = { - 'url': 'http://www.theatlantic.com/video/index/477918/capture-a-unified-theory-on-mental-health/', - 'md5': '', - 'info_dict': { - 'id': '477918', - 'ext': 'mp4', - 'title': 'Are All Mental Illnesses Related?', - 'description': 'Depression, anxiety, overeating, addiction, and all other mental disorders share a common mechanism.', - 'timestamp': 1460490952, - 'uploader': 'TheAtlantic', - 'upload_date': '20160412', - 'uploader_id': '29913724001', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'add_ie': ['BrightcoveLegacy'], - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - return { - '_type': 'url_transparent', - 'url': self._html_search_meta('twitter:player', webpage), - 'id': video_id, - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - 'ie_key': 'BrightcoveLegacy', - } From 93ad6c6bfaae8f1ce87a832ece92fa099f0e2095 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 30 Jun 2016 13:50:49 +0100 Subject: [PATCH 0934/3599] [sixplay] Add new extractor(closes #2183) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/m6.py | 35 +---------------- youtube_dl/extractor/sixplay.py | 60 ++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 33 deletions(-) create mode 100644 youtube_dl/extractor/sixplay.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 80d1bbe20..a7b110450 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -710,6 +710,7 @@ from .shahid import ShahidIE from .shared import SharedIE from .sharesix import ShareSixIE from .sina import SinaIE +from .sixplay import SixPlayIE from .skynewsarabia import ( SkyNewsArabiaIE, SkyNewsArabiaArticleIE, diff --git a/youtube_dl/extractor/m6.py b/youtube_dl/extractor/m6.py index d5945ad66..39d2742c8 100644 --- a/youtube_dl/extractor/m6.py +++ b/youtube_dl/extractor/m6.py @@ -1,8 +1,6 @@ # encoding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -23,34 +21,5 @@ class M6IE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id, - 'Downloading video RSS') - - title = rss.find('./channel/item/title').text - description = rss.find('./channel/item/description').text - thumbnail = rss.find('./channel/item/visuel_clip_big').text - duration = int(rss.find('./channel/item/duration').text) - view_count = int(rss.find('./channel/item/nombre_vues').text) - - formats = [] - for format_id in ['lq', 'sd', 'hq', 'hd']: - video_url = rss.find('./channel/item/url_video_%s' % format_id) - if video_url is None: - continue - formats.append({ - 'url': video_url.text, - 'format_id': format_id, - }) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, - 'formats': formats, - } + video_id = self._match_id(url) + return self.url_result('6play:%s' % video_id, 'SixPlay', video_id) diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py new file mode 100644 index 000000000..f855a1a00 --- /dev/null +++ b/youtube_dl/extractor/sixplay.py @@ -0,0 +1,60 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + qualities, + int_or_none, +) + + +class SixPlayIE(InfoExtractor): + _VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P[0-9]+)' + _TEST = { + 'url': 'http://www.6play.fr/jamel-et-ses-amis-au-marrakech-du-rire-p_1316/jamel-et-ses-amis-au-marrakech-du-rire-2015-c_11495320', + 'md5': '42310bffe4ba3982db112b9cd3467328', + 'info_dict': { + 'id': '11495320', + 'ext': 'mp4', + 'title': 'Jamel et ses amis au Marrakech du rire 2015', + 'description': 'md5:ba2149d5c321d5201b78070ee839d872', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + clip_data = self._download_json( + 'https://player.m6web.fr/v2/video/config/6play-auth/FR/%s.json' % video_id, + video_id) + video_data = clip_data['videoInfo'] + + preference = qualities(['lq', 'sd', 'hq', 'hd']) + formats = [] + for source in clip_data['sources']: + source_type, source_url = source.get('type'), source.get('src') + if not source_url or source_type == 'hls/primetime': + continue + if source_type == 'application/vnd.apple.mpegURL': + formats.extend(self._extract_m3u8_formats( + source_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + formats.extend(self._extract_f4m_formats( + source_url.replace('.m3u8', '.f4m'), + video_id, f4m_id='hds', fatal=False)) + elif source_type == 'video/mp4': + quality = source.get('quality') + formats.append({ + 'url': source_url, + 'format_id': quality, + 'preference': preference(quality), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_data['title'].strip(), + 'description': video_data.get('description'), + 'duration': int_or_none(video_data.get('duration')), + 'series': video_data.get('titlePgm'), + 'formats': formats, + } From 7dbeee7e229a357cfc8acf0a908b10f3f326cd96 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 30 Jun 2016 14:11:55 +0100 Subject: [PATCH 0935/3599] [generic] make twitter:player extraction non fatal --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c2a7f9202..9315b9e21 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2098,7 +2098,7 @@ class GenericIE(InfoExtractor): } # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser - embed_url = self._twitter_search_player(webpage) + embed_url = self._html_search_meta('twitter:player', webpage, default=None) if embed_url: return self.url_result(embed_url) From 049da7cb6cc7d6b47020480fa780907be265b9cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 30 Jun 2016 23:04:18 +0700 Subject: [PATCH 0936/3599] [meta] Extend _VALID_URL --- youtube_dl/extractor/meta.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/meta.py b/youtube_dl/extractor/meta.py index 674b8d264..2ca7092e5 100644 --- a/youtube_dl/extractor/meta.py +++ b/youtube_dl/extractor/meta.py @@ -10,8 +10,8 @@ from ..utils import ( class METAIE(InfoExtractor): - _VALID_URL = r'https?://video\.meta\.ua/(?P[0-9]+)' - _TEST = { + _VALID_URL = r'https?://video\.meta\.ua/(?:iframe/)?(?P[0-9]+)' + _TESTS = [{ 'url': 'http://video.meta.ua/5502115.video', 'md5': '71b6f3ee274bef16f1ab410f7f56b476', 'info_dict': { @@ -24,7 +24,10 @@ class METAIE(InfoExtractor): 'upload_date': '20130211', }, 'add_ie': ['Youtube'], - } + }, { + 'url': 'http://video.meta.ua/iframe/5502115', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From eafa643715c0989dff927c9a44e837ca62247b4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 30 Jun 2016 23:06:13 +0700 Subject: [PATCH 0937/3599] [meta] Make duration and description optional For iframe URLs --- youtube_dl/extractor/meta.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/meta.py b/youtube_dl/extractor/meta.py index 2ca7092e5..2e2db5620 100644 --- a/youtube_dl/extractor/meta.py +++ b/youtube_dl/extractor/meta.py @@ -47,9 +47,10 @@ class METAIE(InfoExtractor): 'id': video_id, 'url': video_url, 'title': uppod_data.get('comment') or self._og_search_title(webpage), - 'description': self._og_search_description(webpage), + 'description': self._og_search_description(webpage, default=None), 'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage), - 'duration': int_or_none(self._og_search_property('video:duration', webpage)), + 'duration': int_or_none(self._og_search_property( + 'video:duration', webpage, default=None)), } if 'youtube.com/' in video_url: info.update({ From 8ff6697861b918ff9221c7dd46a6e1109ad0ef34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 30 Jun 2016 23:19:29 +0700 Subject: [PATCH 0938/3599] [pladform] Improve embed detection --- youtube_dl/extractor/pladform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py index bc559d1df..77e1211d6 100644 --- a/youtube_dl/extractor/pladform.py +++ b/youtube_dl/extractor/pladform.py @@ -49,7 +49,7 @@ class PladformIE(InfoExtractor): @staticmethod def _extract_url(webpage): mobj = re.search( - r']+src="(?P(?:https?:)?//out\.pladform\.ru/player\?.+?)"', webpage) + r']+src=(["\'])(?P(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage) if mobj: return mobj.group('url') From fd94e2671a8b2174d38f9e81b0956e31f90df326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 30 Jun 2016 23:20:44 +0700 Subject: [PATCH 0939/3599] [meta] Add support for pladform embeds --- youtube_dl/extractor/meta.py | 60 +++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/meta.py b/youtube_dl/extractor/meta.py index 2e2db5620..42bedc48f 100644 --- a/youtube_dl/extractor/meta.py +++ b/youtube_dl/extractor/meta.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .pladform import PladformIE from ..utils import ( unescapeHTML, int_or_none, @@ -27,34 +28,45 @@ class METAIE(InfoExtractor): }, { 'url': 'http://video.meta.ua/iframe/5502115', 'only_matching': True, + }, { + # pladform embed + 'url': 'http://video.meta.ua/7121015.video', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - st_html5 = self._search_regex(r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st') - json_str = '' - for i in range(0, len(st_html5), 3): - json_str += '�%s;' % st_html5[i:i + 3] - uppod_data = self._parse_json(unescapeHTML(json_str), video_id) - error = uppod_data.get('customnotfound') - if error: - raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + st_html5 = self._search_regex( + r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st', default=None) - video_url = uppod_data['file'] - info = { - 'id': video_id, - 'url': video_url, - 'title': uppod_data.get('comment') or self._og_search_title(webpage), - 'description': self._og_search_description(webpage, default=None), - 'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage), - 'duration': int_or_none(self._og_search_property( - 'video:duration', webpage, default=None)), - } - if 'youtube.com/' in video_url: - info.update({ - '_type': 'url_transparent', - 'ie_key': 'Youtube', - }) - return info + if st_html5: + json_str = '' + for i in range(0, len(st_html5), 3): + json_str += '�%s;' % st_html5[i:i + 3] + uppod_data = self._parse_json(unescapeHTML(json_str), video_id) + error = uppod_data.get('customnotfound') + if error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + + video_url = uppod_data['file'] + info = { + 'id': video_id, + 'url': video_url, + 'title': uppod_data.get('comment') or self._og_search_title(webpage), + 'description': self._og_search_description(webpage, default=None), + 'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage), + 'duration': int_or_none(self._og_search_property( + 'video:duration', webpage, default=None)), + } + if 'youtube.com/' in video_url: + info.update({ + '_type': 'url_transparent', + 'ie_key': 'Youtube', + }) + return info + + pladform_url = PladformIE._extract_url(webpage) + if pladform_url: + return self.url_result(pladform_url) From 66a42309fa235af2bb92b7fb73d90d8b79d6bf5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 30 Jun 2016 23:56:55 +0700 Subject: [PATCH 0940/3599] release 2016.06.30 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 5 +++++ youtube_dl/version.py | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index f9a1aa990..8d24c14f3 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.30*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.30** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.27 +[debug] youtube-dl version 2016.06.30 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2a94f4feb..c05cda6ab 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -272,6 +272,7 @@ - **Helsinki**: helsinki.fi - **HentaiStigma** - **HistoricFilms** + - **history:topic**: History.com Topic - **hitbox** - **hitbox:live** - **HornBunny** @@ -358,6 +359,7 @@ - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** + - **META** - **metacafe** - **Metacritic** - **Mgoon** @@ -587,8 +589,10 @@ - **Shared**: shared.sx and vivo.sx - **ShareSix** - **Sina** + - **SixPlay** - **skynewsarabia:article** - **skynewsarabia:video** + - **SkySports** - **Slideshare** - **Slutload** - **smotri**: Smotri.com @@ -721,6 +725,7 @@ - **UDNEmbed**: 聯合影音 - **Unistra** - **Urort**: NRK P3 Urørt + - **URPlay** - **USAToday** - **ustream** - **ustream:channel** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2dd24dec1..cf725db9b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.27' +__version__ = '2016.06.30' From 05a0068a7680d41f56545a22ab34b004cd6d72e9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 30 Jun 2016 18:13:49 +0100 Subject: [PATCH 0941/3599] [9c9media] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/ninecninemedia.py | 55 ++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/ninecninemedia.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a7b110450..1b2854cb9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -526,6 +526,7 @@ from .nick import ( NickDeIE, ) from .niconico import NiconicoIE, NiconicoPlaylistIE +from .ninecninemedia import NineCNineMediaIE from .ninegag import NineGagIE from .noco import NocoIE from .normalboots import NormalbootsIE diff --git a/youtube_dl/extractor/ninecninemedia.py b/youtube_dl/extractor/ninecninemedia.py new file mode 100644 index 000000000..d889245ad --- /dev/null +++ b/youtube_dl/extractor/ninecninemedia.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + parse_duration, + ExtractorError +) + + +class NineCNineMediaIE(InfoExtractor): + _VALID_URL = r'9c9media:(?P[^:]+):(?P\d+)' + + def _real_extract(self, url): + destination_code, video_id = re.match(self._VALID_URL, url).groups() + api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id) + content = self._download_json(api_base_url, video_id, query={ + '$include': '[contentpackages]', + }) + title = content['Name'] + if len(content['ContentPackages']) > 1: + raise ExtractorError('multiple content packages') + content_package = content['ContentPackages'][0] + stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id'] + stacks = self._download_json(stacks_base_url, video_id)['Items'] + if len(stacks) > 1: + raise ExtractorError('multiple stacks') + stack = stacks[0] + stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id']) + formats = [] + formats.extend(self._extract_m3u8_formats( + stack_base_url + 'm3u8', video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_f4m_formats( + stack_base_url + 'f4m', video_id, + f4m_id='hds', fatal=False)) + mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False) + if mp4_url: + formats.append({ + 'url': mp4_url, + 'format_id': 'mp4', + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': content.get('Desc') or content.get('ShortDesc'), + 'timestamp': parse_iso8601(content.get('BroadcastDateTime')), + 'duration': parse_duration(content.get('BroadcastTime')), + 'formats': formats, + } From 20361b4f2511a4395ae489c04a68c6098ffab7a4 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 30 Jun 2016 18:14:23 +0100 Subject: [PATCH 0942/3599] [rds] extract 9c9media formats --- youtube_dl/extractor/rds.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/rds.py b/youtube_dl/extractor/rds.py index 796adfdf9..bf200ea4d 100644 --- a/youtube_dl/extractor/rds.py +++ b/youtube_dl/extractor/rds.py @@ -1,23 +1,23 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( parse_duration, parse_iso8601, + js_to_json, ) +from ..compat import compat_str class RDSIE(InfoExtractor): IE_DESC = 'RDS.ca' - _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P[^/]+)-(?P\d+\.\d+)' + _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P[^/]+)-\d+\.\d+' _TESTS = [{ 'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799', 'info_dict': { - 'id': '3.1132799', + 'id': '604333', 'display_id': 'fowler-jr-prend-la-direction-de-jacksonville', 'ext': 'mp4', 'title': 'Fowler Jr. prend la direction de Jacksonville', @@ -33,22 +33,17 @@ class RDSIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - # TODO: extract f4m from 9c9media.com - video_url = self._search_regex( - r']+itemprop="contentURL"[^>]+content="([^"]+)"', - webpage, 'video url') - - title = self._og_search_title(webpage) or self._html_search_meta( + item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json) + video_id = compat_str(item['id']) + title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta( 'title', webpage, 'title', fatal=True) description = self._og_search_description(webpage) or self._html_search_meta( 'description', webpage, 'description') - thumbnail = self._og_search_thumbnail(webpage) or self._search_regex( + thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex( [r']+itemprop="thumbnailUrl"[^>]+href="([^"]+)"', r']+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'], webpage, 'thumbnail', fatal=False) @@ -61,13 +56,15 @@ class RDSIE(InfoExtractor): age_limit = self._family_friendly_search(webpage) return { + '_type': 'url_transparent', 'id': video_id, 'display_id': display_id, - 'url': video_url, + 'url': '9c9media:rds_web:%s' % video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'timestamp': timestamp, 'duration': duration, 'age_limit': age_limit, + 'ie_key': 'NineCNineMedia', } From bf4fa24414d2f4f4418b17ed379eb60df5726c4f Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 30 Jun 2016 18:14:59 +0100 Subject: [PATCH 0943/3599] [ctvnews] Add new extractor(closes #2156) --- youtube_dl/extractor/ctvnews.py | 64 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 65 insertions(+) create mode 100644 youtube_dl/extractor/ctvnews.py diff --git a/youtube_dl/extractor/ctvnews.py b/youtube_dl/extractor/ctvnews.py new file mode 100644 index 000000000..e14b30085 --- /dev/null +++ b/youtube_dl/extractor/ctvnews.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class CTVNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P[0-9.]+)' + _TESTS = [{ + 'url': 'http://www.ctvnews.ca/video?clipId=901995', + 'md5': '10deb320dc0ccb8d01d34d12fc2ea672', + 'info_dict': { + 'id': '901995', + 'ext': 'mp4', + 'title': 'Extended: \'That person cannot be me\' Johnson says', + 'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285', + 'timestamp': 1467286284, + 'upload_date': '20160630', + } + }, { + 'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224', + 'info_dict': + { + 'id': '1.2966224', + }, + 'playlist_mincount': 19, + }, { + 'url': 'http://www.ctvnews.ca/video?binId=1.810401', + 'info_dict': + { + 'id': '1.810401', + }, + 'playlist_mincount': 91, + }, { + 'url': 'http://www.ctvnews.ca/1.810401', + 'only_matching': True, + }, { + 'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231', + 'only_matching': True, + }] + + def _real_extract(self, url): + page_id = self._match_id(url) + + def ninecninemedia_url_result(clip_id): + return { + '_type': 'url_transparent', + 'id': clip_id, + 'url': '9c9media:ctvnews_web:%s' % clip_id, + 'ie_key': 'NineCNineMedia', + } + + if page_id.isdigit(): + return ninecninemedia_url_result(page_id) + else: + webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={ + 'ot': 'example.AjaxPageLayout.ot', + 'maxItemsPerPage': 20, + }) + entries = [ninecninemedia_url_result(clip_id) for clip_id in set( + re.findall(r'clip\.id\s*=\s*(\d+);', webpage))] + return self.playlist_result(entries, page_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 1b2854cb9..4765fbc77 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -171,6 +171,7 @@ from .crunchyroll import ( ) from .cspan import CSpanIE from .ctsnews import CtsNewsIE +from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .cwtv import CWTVIE from .dailymail import DailyMailIE From 9617b557aa2a96840026a9c915bc57e335a76272 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 30 Jun 2016 18:21:47 +0100 Subject: [PATCH 0944/3599] [ctv] Add new extractor(closes #4077) --- youtube_dl/extractor/ctv.py | 30 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 31 insertions(+) create mode 100644 youtube_dl/extractor/ctv.py diff --git a/youtube_dl/extractor/ctv.py b/youtube_dl/extractor/ctv.py new file mode 100644 index 000000000..5807fbac9 --- /dev/null +++ b/youtube_dl/extractor/ctv.py @@ -0,0 +1,30 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class CTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P[0-9.]+)' + _TESTS = [{ + 'url': 'http://www.ctv.ca/video/player?vid=706966', + 'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', + 'info_dict': { + 'id': '706966', + 'ext': 'mp4', + 'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'', + 'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.', + 'upload_date': '20150919', + 'timestamp': 1442624700, + }, + 'expected_warnings': ['HTTP Error 404'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': '9c9media:ctv_web:%s' % video_id, + 'ie_key': 'NineCNineMedia', + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4765fbc77..62b5fed18 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -171,6 +171,7 @@ from .crunchyroll import ( ) from .cspan import CSpanIE from .ctsnews import CtsNewsIE +from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .cwtv import CWTVIE From 76dad392f5bd82493777d8efc35bcfccf70fafec Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 30 Jun 2016 18:27:57 +0100 Subject: [PATCH 0945/3599] [meta] Clarify the source of uppod st decryption algorithm --- youtube_dl/extractor/meta.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/meta.py b/youtube_dl/extractor/meta.py index 42bedc48f..cdb46e163 100644 --- a/youtube_dl/extractor/meta.py +++ b/youtube_dl/extractor/meta.py @@ -42,6 +42,7 @@ class METAIE(InfoExtractor): r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st', default=None) if st_html5: + # uppod st decryption algorithm is reverse engineered from function un(s) at uppod.js json_str = '' for i in range(0, len(st_html5), 3): json_str += '�%s;' % st_html5[i:i + 3] From c9e538a3b1cde6ce140323a029c7b6f7386eb004 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 30 Jun 2016 19:52:32 +0100 Subject: [PATCH 0946/3599] [ctvnews] use orderedSet, increase the number of items for playlists and use smaller bin list for test --- youtube_dl/extractor/ctvnews.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/ctvnews.py b/youtube_dl/extractor/ctvnews.py index e14b30085..1023b6130 100644 --- a/youtube_dl/extractor/ctvnews.py +++ b/youtube_dl/extractor/ctvnews.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import orderedSet class CTVNewsIE(InfoExtractor): @@ -27,12 +28,12 @@ class CTVNewsIE(InfoExtractor): }, 'playlist_mincount': 19, }, { - 'url': 'http://www.ctvnews.ca/video?binId=1.810401', + 'url': 'http://www.ctvnews.ca/video?binId=1.2876780', 'info_dict': { - 'id': '1.810401', + 'id': '1.2876780', }, - 'playlist_mincount': 91, + 'playlist_mincount': 100, }, { 'url': 'http://www.ctvnews.ca/1.810401', 'only_matching': True, @@ -57,8 +58,8 @@ class CTVNewsIE(InfoExtractor): else: webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={ 'ot': 'example.AjaxPageLayout.ot', - 'maxItemsPerPage': 20, + 'maxItemsPerPage': 1000000, }) - entries = [ninecninemedia_url_result(clip_id) for clip_id in set( + entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet( re.findall(r'clip\.id\s*=\s*(\d+);', webpage))] return self.playlist_result(entries, page_id) From 044e3d91b5715f7aa63c578097b77fd510ed0f73 Mon Sep 17 00:00:00 2001 From: kidol Date: Thu, 30 Jun 2016 21:06:22 +0200 Subject: [PATCH 0947/3599] [Pornhub] Fix error detection --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 6d57e1d35..4bbf1ec3b 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -87,7 +87,7 @@ class PornHubIE(InfoExtractor): webpage = self._download_webpage(req, video_id) error_msg = self._html_search_regex( - r'(?s)
(.*?)
', + r']+class="removed">\s*]*>\s*

\s*([^<]*)', webpage, 'error message', default=None) if error_msg: error_msg = re.sub(r'\s+', ' ', error_msg) From 3cb3b60064fc8d99a8175b751000892b141e8de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 Jul 2016 03:14:23 +0700 Subject: [PATCH 0948/3599] [pornhub] Relax removed message regex (Closes #9964) --- youtube_dl/extractor/pornhub.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 4bbf1ec3b..c1694893c 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -87,8 +87,8 @@ class PornHubIE(InfoExtractor): webpage = self._download_webpage(req, video_id) error_msg = self._html_search_regex( - r']+class="removed">\s*]*>\s*

\s*([^<]*)', - webpage, 'error message', default=None) + r'(?s)]+class=(["\']).*?\bremoved\b.*?\1[^>]*>(?P.+?)

', + webpage, 'error message', default=None, group='error') if error_msg: error_msg = re.sub(r'\s+', ' ', error_msg) raise ExtractorError( From eaaaaec042f8e6afa8f8ec6a2a8b137943f802df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 Jul 2016 03:18:27 +0700 Subject: [PATCH 0949/3599] [pornhub] Add more tests with removed videos --- youtube_dl/extractor/pornhub.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index c1694893c..77182bf07 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -63,8 +63,17 @@ class PornHubIE(InfoExtractor): 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'only_matching': True, }, { + # removed at the request of cam4.com 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', 'only_matching': True, + }, { + # removed at the request of the copyright owner + 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859', + 'only_matching': True, + }, { + # removed by uploader + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111', + 'only_matching': True, }] @classmethod From 9e29ef13a378769c19ccec200aba377ad504fe8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 29 Jun 2016 14:56:05 +0200 Subject: [PATCH 0950/3599] [options] Accept quoted string across multiple lines (#9940) Like: -f " bestvideo+bestaudio/ best " --- youtube_dl/options.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 99ce4131f..c9033e3cb 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -26,9 +26,7 @@ def parseOpts(overrideArguments=None): except IOError: return default # silently skip if file is not present try: - res = [] - for l in optionf: - res += compat_shlex_split(l, comments=True) + res = compat_shlex_split(optionf.read(), comments=True) finally: optionf.close() return res From a9eede3913a9e9c7e094907f36a22bc6719ce73d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 29 Jun 2016 18:54:30 +0200 Subject: [PATCH 0951/3599] [test/compat] compat_shlex_split: test with newlines --- test/test_compat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_compat.py b/test/test_compat.py index f5317ac3e..1d7ac9f16 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -87,6 +87,7 @@ class TestCompat(unittest.TestCase): def test_compat_shlex_split(self): self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) + self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag']) def test_compat_etree_fromstring(self): xml = ''' From 0c2ac64bb81462bed0c31be5a2a549601f95f166 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 Jul 2016 03:57:59 +0700 Subject: [PATCH 0952/3599] [sixplay] Rename preference key to quality in format dict --- youtube_dl/extractor/sixplay.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py index f855a1a00..759a332d2 100644 --- a/youtube_dl/extractor/sixplay.py +++ b/youtube_dl/extractor/sixplay.py @@ -28,7 +28,7 @@ class SixPlayIE(InfoExtractor): video_id) video_data = clip_data['videoInfo'] - preference = qualities(['lq', 'sd', 'hq', 'hd']) + quality_key = qualities(['lq', 'sd', 'hq', 'hd']) formats = [] for source in clip_data['sources']: source_type, source_url = source.get('type'), source.get('src') @@ -46,7 +46,7 @@ class SixPlayIE(InfoExtractor): formats.append({ 'url': source_url, 'format_id': quality, - 'preference': preference(quality), + 'quality': quality_key(quality), }) self._sort_formats(formats) From f11315e8d4239611c42d8fe438c7ded58293d54d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 Jul 2016 03:59:57 +0700 Subject: [PATCH 0953/3599] release 2016.07.01 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 8d24c14f3..d9b196e52 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.30*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.30** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.30 +[debug] youtube-dl version 2016.07.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c05cda6ab..6e4041614 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -152,6 +152,8 @@ - **CSNNE** - **CSpan**: C-SPAN - **CtsNews**: 華視新聞 + - **CTV** + - **CTVNews** - **culturebox.francetvinfo.fr** - **CultureUnplugged** - **CWTV** @@ -440,6 +442,7 @@ - **nick.de** - **niconico**: ニコニコ動画 - **NiconicoPlaylist** + - **NineCNineMedia** - **njoy**: N-JOY - **njoy:embed** - **Noco** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index cf725db9b..e5f10da39 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.30' +__version__ = '2016.07.01' From 9f4576a7ebbc4af4971796325f0799f894daaa1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 1 Jul 2016 23:16:43 +0700 Subject: [PATCH 0954/3599] [twitch] Update usher URL (Closes #9975) --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 20919774d..67b1277cc 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -29,7 +29,7 @@ class TwitchBaseIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv' _API_BASE = 'https://api.twitch.tv' - _USHER_BASE = 'http://usher.twitch.tv' + _USHER_BASE = 'https://usher.ttvnw.net' _LOGIN_URL = 'http://www.twitch.tv/login' _NETRC_MACHINE = 'twitch' From 564dc3c6e8ad235160cfea01e41fc01fefc39be8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 2 Jul 2016 01:24:57 +0700 Subject: [PATCH 0955/3599] [vine] Fix extraction (Closes #9970) --- youtube_dl/extractor/vine.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 5b801849c..5a2c53b36 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -90,9 +90,11 @@ class VineIE(InfoExtractor): data = self._parse_json( self._search_regex( - r'window\.POST_DATA\s*=\s*{\s*%s\s*:\s*({.+?})\s*};\s*' % video_id, + r'window\.POST_DATA\s*=\s*({.+?});\s*', webpage, 'vine data'), video_id) + + data = data[list(data.keys())[0]] formats = [{ 'format_id': '%(format)s-%(rate)s' % f, From 347227237b5a101c3bed260f8efbdbfe65c5f196 Mon Sep 17 00:00:00 2001 From: cant-think-of-a-name Date: Thu, 30 Jun 2016 20:19:17 -0500 Subject: [PATCH 0956/3599] [periscope] fix playlist extraction (#9967) The JSON response changed and the extractor needed to be updated in order to gather the video IDs. --- youtube_dl/extractor/periscope.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index c23b314e7..34e0d3d30 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -122,7 +122,7 @@ class PeriscopeUserIE(InfoExtractor): entries = [ self.url_result( - 'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id'])) - for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])] + 'https://www.periscope.tv/%s/%s' % (user_id, broadcast)) + for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcastIds', [])] return self.playlist_result(entries, user_id, title, description) From 35fc3021ba6e1e0d7b7d400fdaccc709546a4bc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 2 Jul 2016 01:35:57 +0700 Subject: [PATCH 0957/3599] [periscope] Add another fallback source --- youtube_dl/extractor/periscope.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 34e0d3d30..75f5884a9 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -120,9 +120,12 @@ class PeriscopeUserIE(InfoExtractor): title = user.get('display_name') or user.get('username') description = user.get('description') + broadcast_ids = (data_store.get('UserBroadcastHistory', {}).get('broadcastIds') or + data_store.get('BroadcastCache', {}).get('broadcastIds', [])) + entries = [ self.url_result( - 'https://www.periscope.tv/%s/%s' % (user_id, broadcast)) - for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcastIds', [])] + 'https://www.periscope.tv/%s/%s' % (user_id, broadcast_id)) + for broadcast_id in broadcast_ids] return self.playlist_result(entries, user_id, title, description) From bc4b2d75ba5b8fbd2ac9d42f0fb7a4fcd2ea3038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 2 Jul 2016 02:11:07 +0700 Subject: [PATCH 0958/3599] [pornhub] Add support for thumbzilla (Closes #8696) --- youtube_dl/extractor/pornhub.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 77182bf07..c76afe1c4 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -25,7 +25,15 @@ from ..aes import ( class PornHubIE(InfoExtractor): - _VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P[0-9a-z]+)' + IE_DESC = 'PornHub and Thumbzilla' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)| + (?:www\.)?thumbzilla\.com/video/ + ) + (?P[0-9a-z]+) + ''' _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'md5': '1e19b41231a02eba417839222ac9d58e', @@ -74,6 +82,9 @@ class PornHubIE(InfoExtractor): # removed by uploader 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111', 'only_matching': True, + }, { + 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex', + 'only_matching': True, }] @classmethod From bb08101ec4a8728677ee23466608ab6aa65cbb4f Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Thu, 30 Jun 2016 14:57:42 +0200 Subject: [PATCH 0959/3599] [Fusion] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/fusion.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 youtube_dl/extractor/fusion.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 62b5fed18..16fa4d35c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -281,6 +281,7 @@ from .freespeech import FreespeechIE from .freevideo import FreeVideoIE from .funimation import FunimationIE from .funnyordie import FunnyOrDieIE +from .fusion import FusionIE from .gameinformer import GameInformerIE from .gamekings import GamekingsIE from .gameone import ( diff --git a/youtube_dl/extractor/fusion.py b/youtube_dl/extractor/fusion.py new file mode 100644 index 000000000..771abcdb1 --- /dev/null +++ b/youtube_dl/extractor/fusion.py @@ -0,0 +1,29 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from .ooyala import OoyalaIE + + +class FusionIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?fusion\.net/video/\d+/(?P[\w-]+)' + _TEST = { + 'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/', + 'md5': '55c3dd61d2b96dc17c4ab6711d02a39e', + 'info_dict': { + 'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P', + 'ext': 'mp4', + 'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs', + 'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7', + 'duration': 140.0, + }, + 'add_ie': ['Ooyala'], + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + ooyala_code = self._search_regex(r'data-video-id="([^"]{32})"', + webpage, 'ooyala code') + + return OoyalaIE._build_url_result(ooyala_code) From 14ff6baa0ee3fa0ead2e2b460017abe5e853647c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 2 Jul 2016 02:44:37 +0700 Subject: [PATCH 0960/3599] [fusion] Improve --- youtube_dl/extractor/fusion.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/fusion.py b/youtube_dl/extractor/fusion.py index 771abcdb1..b4ab4cbb7 100644 --- a/youtube_dl/extractor/fusion.py +++ b/youtube_dl/extractor/fusion.py @@ -5,10 +5,9 @@ from .ooyala import OoyalaIE class FusionIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?fusion\.net/video/\d+/(?P[\w-]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P\d+)' + _TESTS = [{ 'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/', - 'md5': '55c3dd61d2b96dc17c4ab6711d02a39e', 'info_dict': { 'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P', 'ext': 'mp4', @@ -16,14 +15,21 @@ class FusionIE(InfoExtractor): 'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7', 'duration': 140.0, }, + 'params': { + 'skip_download': True, + }, 'add_ie': ['Ooyala'], - } + }, { + 'url': 'http://fusion.net/video/201781', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - ooyala_code = self._search_regex(r'data-video-id="([^"]{32})"', - webpage, 'ooyala code') + ooyala_code = self._search_regex( + r'data-video-id=(["\'])(?P.+?)\1', + webpage, 'ooyala code', group='code') return OoyalaIE._build_url_result(ooyala_code) From ac2d8f54d1f95ff54cae6808602d5ddb39bc978b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 2 Jul 2016 02:45:00 +0700 Subject: [PATCH 0961/3599] [vine] Remove superfluous whitespace --- youtube_dl/extractor/vine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 5a2c53b36..0183f052a 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -93,7 +93,7 @@ class VineIE(InfoExtractor): r'window\.POST_DATA\s*=\s*({.+?});\s*', webpage, 'vine data'), video_id) - + data = data[list(data.keys())[0]] formats = [{ From 7a1e71575e8bf6918ece07cb72a58e7425692fd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 2 Jul 2016 02:47:42 +0700 Subject: [PATCH 0962/3599] release 2016.07.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index d9b196e52..637103b6b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.07.01 +[debug] youtube-dl version 2016.07.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 6e4041614..8fd1ab5af 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -242,6 +242,7 @@ - **FreeVideo** - **Funimation** - **FunnyOrDie** + - **Fusion** - **GameInformer** - **Gamekings** - **GameOne** @@ -508,7 +509,7 @@ - **podomatic** - **PolskieRadio** - **PornHd** - - **PornHub** + - **PornHub**: PornHub and Thumbzilla - **PornHubPlaylist** - **PornHubUserVideos** - **Pornotube** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e5f10da39..d0483f83b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.07.01' +__version__ = '2016.07.02' From bdafd88da07046f91e0585f083dea7795096e5d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 2 Jul 2016 16:43:19 +0700 Subject: [PATCH 0963/3599] [vk] Extend _VALID_URLs to support new domain (Closes #9981) --- youtube_dl/extractor/vk.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index cfc5ffd8b..758d9c86b 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -27,12 +27,12 @@ class VKIE(InfoExtractor): https?:// (?: (?: - (?:m\.)?vk\.com/video_| + (?:(?:m|new)\.)?vk\.com/video_| (?:www\.)?daxab.com/ ) ext\.php\?(?P.*?\boid=(?P-?\d+).*?\bid=(?P\d+).*)| (?: - (?:m\.)?vk\.com/(?:.+?\?.*?z=)?video| + (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?video| (?:www\.)?daxab.com/embed/ ) (?P-?\d+_\d+)(?:.*\blist=(?P[\da-f]+))? @@ -182,6 +182,10 @@ class VKIE(InfoExtractor): # pladform embed 'url': 'https://vk.com/video-76116461_171554880', 'only_matching': True, + }, + { + 'url': 'http://new.vk.com/video205387401_165548505', + 'only_matching': True, } ] @@ -354,7 +358,7 @@ class VKIE(InfoExtractor): class VKUserVideosIE(InfoExtractor): IE_NAME = 'vk:uservideos' IE_DESC = "VK - User's Videos" - _VALID_URL = r'https?://vk\.com/videos(?P-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' + _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' _TEMPLATE_URL = 'https://vk.com/videos' _TESTS = [{ 'url': 'http://vk.com/videos205387401', @@ -369,6 +373,12 @@ class VKUserVideosIE(InfoExtractor): }, { 'url': 'http://vk.com/videos-97664626?section=all', 'only_matching': True, + }, { + 'url': 'http://m.vk.com/videos205387401', + 'only_matching': True, + }, { + 'url': 'http://new.vk.com/videos205387401', + 'only_matching': True, }] def _real_extract(self, url): From fd6ca382628afbc4a229a15cd26552e226ac4536 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 2 Jul 2016 21:33:23 +0800 Subject: [PATCH 0964/3599] [facebook] Improve Facebook embedded detection Related to #9938. Another example comes from 9834872bf63b4e03b66c5e3b8f306556e735d8c5. --- youtube_dl/extractor/facebook.py | 15 +++++++++++++++ youtube_dl/extractor/generic.py | 26 ++++++++++++++++++++++---- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 9b87b37ae..6eaa22d89 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -129,6 +129,21 @@ class FacebookIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+?src=(["\'])(?Phttps://www\.facebook\.com/video/embed.+?)\1', webpage) + if mobj is not None: + return mobj.group('url') + + # Facebook API embed + # see https://developers.facebook.com/docs/plugins/embedded-video-player + mobj = re.search(r'''(?x)]+ + class=(?P[\'"])[^\'"]*\bfb-video\b[^\'"]*(?P=q1)[^>]+ + data-href=(?P[\'"])(?P[^\'"]+)(?P=q2)''', webpage) + if mobj is not None: + return mobj.group('url') + def _login(self): (useremail, password) = self._get_login_info() if useremail is None: diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9315b9e21..7212e0edd 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -66,6 +66,7 @@ from .theplatform import ThePlatformIE from .vessel import VesselIE from .kaltura import KalturaIE from .eagleplatform import EaglePlatformIE +from .facebook import FacebookIE class GenericIE(InfoExtractor): @@ -1260,6 +1261,24 @@ class GenericIE(InfoExtractor): 'uploader': 'TheAtlantic', }, 'add_ie': ['BrightcoveLegacy'], + }, + # Facebook ', webpage): url = self._search_regex( @@ -432,10 +428,10 @@ class PBSIE(InfoExtractor): video_id = mobj.group('id') display_id = video_id - return video_id, display_id, None + return video_id, display_id, None, description def _real_extract(self, url): - video_id, display_id, upload_date = self._extract_webpage(url) + video_id, display_id, upload_date, description = self._extract_webpage(url) if isinstance(video_id, list): entries = [self.url_result( @@ -564,11 +560,14 @@ class PBSIE(InfoExtractor): if alt_title: info['title'] = alt_title + ' - ' + re.sub(r'^' + alt_title + '[\s\-:]+', '', info['title']) + description = info.get('description') or info.get( + 'program', {}).get('description') or description + return { 'id': video_id, 'display_id': display_id, 'title': info['title'], - 'description': info.get('description') or info.get('program', {}).get('description'), + 'description': description, 'thumbnail': info.get('image_url'), 'duration': int_or_none(info.get('duration')), 'age_limit': age_limit, From 5ec5461e1a805595c5fef4ae482e86d7d7872d8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 22:50:18 +0700 Subject: [PATCH 1349/3599] [pbs] Clarify comment on http formats --- youtube_dl/extractor/pbs.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 09aef7fb9..b490ef74c 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -506,12 +506,12 @@ class PBSIE(InfoExtractor): if http_url: for m3u8_format in m3u8_formats: bitrate = self._search_regex(r'(\d+)k', m3u8_format['url'], 'bitrate', default=None) - # lower qualities(150k and 192k) are not available as http formats - # https://github.com/rg3/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656 - # we will try to extract any http format higher than than the lowest quality documented in - # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications - # as there also undocumented http formats formats(4500k and 6500k) - # http://www.pbs.org/video/2365815229/ + # Lower qualities (150k and 192k) are not available as HTTP formats (see [1]), + # we won't try extracting them. + # Since summer 2016 higher quality formats (4500k and 6500k) are also available + # albeit they are not documented in [2]. + # 1. https://github.com/rg3/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656 + # 2. https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications if not bitrate or int(bitrate) < 400: continue f_url = re.sub(r'\d+k|baseline', bitrate + 'k', http_url) From a560f28c98445e2ae2528795609d5ac718ec5b2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 23:01:35 +0700 Subject: [PATCH 1350/3599] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ChangeLog b/ChangeLog index b1ce63d75..5efcb2316 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,27 @@ version +Core +* Show progress for curl external downloader +* Forward more options to curl external downloader + Extractors +* [pbs] Fix description extraction +* [franceculture] Fix extraction (#10324) +* [pornotube] Fix extraction (#10322) +* [4tube] Fix metadata extraction (#10321) +* [imgur] Fix width and height extraction (#10325) +* [expotv] Improve extraction ++ [vbox7] Fix extraction (#10309) - [tapely] Remove extractor (#10323) +* [muenchentv] Fix extraction (#10313) ++ [24video] Add support for .me and .xxx TLDs +* [24video] Fix comment count extraction +* [sunporno] Add support for embed URLs +* [sunporno] Fix metadata extraction (#10316) ++ [hgtv] Add extractor for hgtv.ca (#3999) +- [pbs] Remove request to unavailable API ++ [pbs] Add support for high quality HTTP formats ++ [crunchyroll] Add support for HLS formats (#10301) version 2016.08.12 From 73a85620eeb2d595cd86f73357bc4cb081cb3bc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Aug 2016 23:17:11 +0700 Subject: [PATCH 1351/3599] release 2016.08.13 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +-- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 6fdb2f77b..1e0d99b43 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.12** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.13** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.12 +[debug] youtube-dl version 2016.08.13 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 5efcb2316..fc99b9f73 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.08.13 Core * Show progress for curl external downloader diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 8fb581d2b..56fc41a40 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -238,7 +238,6 @@ - **FoxSports** - **france2.fr:generation-quoi** - **FranceCulture** - - **FranceCultureEmission** - **FranceInter** - **francetv**: France 2, 3, 4, 5 and Ô - **francetvinfo.fr** @@ -277,6 +276,7 @@ - **HellPorno** - **Helsinki**: helsinki.fi - **HentaiStigma** + - **HGTV** - **HistoricFilms** - **history:topic**: History.com Topic - **hitbox** @@ -664,7 +664,6 @@ - **SztvHu** - **Tagesschau** - **tagesschau:player** - - **Tapely** - **Tass** - **TDSLifeway** - **teachertube**: teachertube.com videos diff --git a/youtube_dl/version.py b/youtube_dl/version.py index becf14458..cc93d22aa 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.12' +__version__ = '2016.08.13' From 097eba019d0d5cab93e9ce66e1b727b782d48250 Mon Sep 17 00:00:00 2001 From: phi Date: Sun, 14 Aug 2016 02:18:59 +0800 Subject: [PATCH 1352/3599] bug fix for extractor xiami.py Before applying this patch, when downloading resources from xiami.com, it crashes with these: Traceback (most recent call last): File "/home/phi/.local/bin/youtube-dl", line 11, in sys.exit(main()) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/__init__.py", line 433, in main _real_main(argv) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/__init__.py", line 423, in _real_main retcode = ydl.download(all_urls) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/YoutubeDL.py", line 1786, in download url, force_generic_extractor=self.params.get('force_generic_extractor', False)) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/YoutubeDL.py", line 691, in extract_info ie_result = ie.extract(url) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/common.py", line 347, in extract return self._real_extract(url) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/xiami.py", line 116, in _real_extract return self._extract_tracks(self._match_id(url))[0] File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/xiami.py", line 43, in _extract_tracks '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), item_id) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/common.py", line 562, in _download_json json_string, video_id, transform_source=transform_source, fatal=fatal) File "/home/phi/.local/lib/python3.5/site-packages/youtube_dl/extractor/common.py", line 568, in _parse_json return json.loads(json_string) File "/usr/lib/python3.5/json/__init__.py", line 312, in loads s.__class__.__name__)) TypeError: the JSON object must be str, not 'NoneType' This patch solves exactly this problem. --- youtube_dl/extractor/xiami.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py index a6dfc4af9..86abef257 100644 --- a/youtube_dl/extractor/xiami.py +++ b/youtube_dl/extractor/xiami.py @@ -13,6 +13,7 @@ class XiamiBaseIE(InfoExtractor): webpage = super(XiamiBaseIE, self)._download_webpage(*args, **kwargs) if '>Xiami is currently not available in your country.<' in webpage: self.raise_geo_restricted('Xiami is currently not available in your country') + return webpage def _extract_track(self, track, track_id=None): title = track['title'] From fafabc0712d95e6a5b2ac56e9375fe90060738f5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 14 Aug 2016 02:33:15 +0800 Subject: [PATCH 1353/3599] Update ChangeLog for #10342 [skip ci] --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index fc99b9f73..d04c5fc2a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [xiami] Fix extraction (#10342) + + version 2016.08.13 Core @@ -23,6 +29,7 @@ Extractors + [pbs] Add support for high quality HTTP formats + [crunchyroll] Add support for HLS formats (#10301) + version 2016.08.12 Core From aaf44a2f47f013e8d864ac9f98b2833904a8be78 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 13 Aug 2016 22:53:07 +0100 Subject: [PATCH 1354/3599] [uplynk] Add new extractor --- youtube_dl/downloader/hls.py | 6 +++ youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/uplynk.py | 64 ++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+) create mode 100644 youtube_dl/extractor/uplynk.py diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 3b7bb3508..8d7971e5d 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -20,6 +20,7 @@ from ..utils import ( encodeFilename, sanitize_open, parse_m3u8_attributes, + update_url_query, ) @@ -82,6 +83,7 @@ class HlsFD(FragmentFD): self._prepare_and_start_frag_download(ctx) + extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} @@ -95,6 +97,8 @@ class HlsFD(FragmentFD): if re.match(r'^https?://', line) else compat_urlparse.urljoin(man_url, line)) frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) + if extra_param_to_segment_url: + frag_url = update_url_query(frag_url, extra_param_to_segment_url) success = ctx['dl'].download(frag_filename, {'url': frag_url}) if not success: return False @@ -120,6 +124,8 @@ class HlsFD(FragmentFD): if not re.match(r'^https?://', decrypt_info['URI']): decrypt_info['URI'] = compat_urlparse.urljoin( man_url, decrypt_info['URI']) + if extra_param_to_segment_url: + decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_param_to_segment_url) decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read() elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 82d4ed153..901847509 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -926,6 +926,10 @@ from .udn import UDNEmbedIE from .digiteka import DigitekaIE from .unistra import UnistraIE from .uol import UOLIE +from .uplynk import ( + UplynkIE, + UplynkPreplayIE, +) from .urort import UrortIE from .urplay import URPlayIE from .usatoday import USATodayIE diff --git a/youtube_dl/extractor/uplynk.py b/youtube_dl/extractor/uplynk.py new file mode 100644 index 000000000..a6a685c9d --- /dev/null +++ b/youtube_dl/extractor/uplynk.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + ExtractorError, +) + + +class UplynkIE(InfoExtractor): + _VALID_URL = r'https?://.*?\.uplynk\.com/(?Pext/[0-9a-f]{32}/(?P[^/?&]+)|(?P[0-9a-f]{32}))\.(?:m3u8|json)(?:.*?\bpbs=(?P[^&]+))?' + _TEST = { + 'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8', + 'info_dict': { + 'id': 'e89eaf2ce9054aa89d92ddb2d817a52e', + 'ext': 'mp4', + 'title': '030816-kgo-530pm-solar-eclipse-vid_web.mp4', + 'uploader_id': '4413701bf5a1488db55b767f8ae9d4fa', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + path, external_id, video_id, session_id = re.match(self._VALID_URL, url).groups() + display_id = video_id or external_id + formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4') + if session_id: + for f in formats: + f['extra_param_to_segment_url'] = { + 'pbs': session_id, + } + self._sort_formats(formats) + asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id) + if asset.get('error') == 1: + raise ExtractorError('% said: %s' % (self.IE_NAME, asset['msg']), expected=True) + + return { + 'id': asset['asset'], + 'title': asset['desc'], + 'thumbnail': asset.get('default_poster_url'), + 'duration': float_or_none(asset.get('duration')), + 'uploader_id': asset.get('owner'), + 'formats': formats, + } + + +class UplynkPreplayIE(InfoExtractor): + _VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?Pext/[0-9a-f]{32}/(?P[^/?&]+)|(?P[0-9a-f]{32}))\.json' + + def _real_extract(self, url): + path, external_id, video_id = re.match(self._VALID_URL, url).groups() + display_id = video_id or external_id + preplay = self._download_json(url, display_id) + content_url = 'http://content.uplynk.com/%s.m3u8' % path + session_id = preplay.get('sid') + if session_id: + content_url += '?pbs=' + session_id + return self.url_result(content_url, 'Uplynk') From 320d597c21e7a0981f1dc9c4167fce53473ab488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Aug 2016 16:25:14 +0700 Subject: [PATCH 1355/3599] [vgtv] Detect geo restricted videos (#10348) --- youtube_dl/extractor/vgtv.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index b11cd254c..185756301 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -8,6 +8,7 @@ from .xstream import XstreamIE from ..utils import ( ExtractorError, float_or_none, + try_get, ) @@ -129,6 +130,11 @@ class VGTVIE(XstreamIE): 'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil', 'only_matching': True, }, + { + # geoblocked + 'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk', + 'only_matching': True, + }, ] def _real_extract(self, url): @@ -196,6 +202,12 @@ class VGTVIE(XstreamIE): info['formats'].extend(formats) + if not info['formats']: + properties = try_get( + data, lambda x: x['streamConfiguration']['properties'], list) + if properties and 'geoblocked' in properties: + raise self.raise_geo_restricted() + self._sort_formats(info['formats']) info.update({ From 2118fdd1a96ed7a904b53ed5aad50a203d0e0c70 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 11:48:13 +0100 Subject: [PATCH 1356/3599] [common] add separate method for getting netrc ligin info --- youtube_dl/extractor/common.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e47770c1d..9427ff449 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -662,6 +662,24 @@ class InfoExtractor(object): else: return res + def _get_netrc_login_info(self, netrc_machine=None): + username = None + password = None + netrc_machine = netrc_machine or self._NETRC_MACHINE + + if self._downloader.params.get('usenetrc', False): + try: + info = netrc.netrc().authenticators(netrc_machine) + if info is not None: + username = info[0] + password = info[2] + else: + raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine) + except (IOError, netrc.NetrcParseError) as err: + self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err)) + + return (username, password) + def _get_login_info(self): """ Get the login info as (username, password) @@ -679,16 +697,8 @@ class InfoExtractor(object): if downloader_params.get('username') is not None: username = downloader_params['username'] password = downloader_params['password'] - elif downloader_params.get('usenetrc', False): - try: - info = netrc.netrc().authenticators(self._NETRC_MACHINE) - if info is not None: - username = info[0] - password = info[2] - else: - raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) - except (IOError, netrc.NetrcParseError) as err: - self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err)) + else: + username, password = self._get_netrc_login_info() return (username, password) From 9771b1f901b19ad5ba6632a37fc6348e8e6e98dd Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 11:52:48 +0100 Subject: [PATCH 1357/3599] [theplatform] use _get_netrc_login_info and fix session expiration check(#10345) --- youtube_dl/extractor/theplatform.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index bb3efc4ea..9ca765a5f 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -218,15 +218,16 @@ class ThePlatformIE(ThePlatformBaseIE): requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token: - token_expires = unified_timestamp(xml_text(authn_token, 'simpleTokenExpires').replace('_GMT', '')) - if token_expires and token_expires >= time.time(): + token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires'))) + if token_expires and token_expires <= int(time.time()): authn_token = None + requestor_info = {} if not authn_token: # TODO add support for other TV Providers mso_id = 'DTV' - login_info = netrc.netrc().authenticators(mso_id) - if not login_info: - return None + username, password = self._get_netrc_login_info(mso_id) + if not username or not password: + return '' def post_form(form_page, note, data={}): post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') @@ -248,8 +249,8 @@ class ThePlatformIE(ThePlatformBaseIE): provider_login_page = post_form( provider_redirect_page, 'Downloading Provider Login Page') mvpd_confirm_page = post_form(provider_login_page, 'Logging in', { - 'username': login_info[0], - 'password': login_info[2], + 'username': username, + 'password': password, }) post_form(mvpd_confirm_page, 'Confirming Login') From 884cdb6cd9c872ea68a03341e462b58e51fba58a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Aug 2016 20:49:11 +0700 Subject: [PATCH 1358/3599] [life:embed] Improve extraction --- youtube_dl/extractor/lifenews.py | 68 +++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index c2b4490c4..87120ecd1 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( determine_ext, ExtractorError, @@ -96,7 +99,7 @@ class LifeNewsIE(InfoExtractor): r']+>]+src=["\'](.+?)["\']', webpage) iframe_links = re.findall( - r']+src=["\']((?:https?:)?//embed\.life\.ru/embed/.+?)["\']', + r']+src=["\']((?:https?:)?//embed\.life\.ru/(?:embed|video)/.+?)["\']', webpage) if not video_urls and not iframe_links: @@ -164,9 +167,9 @@ class LifeNewsIE(InfoExtractor): class LifeEmbedIE(InfoExtractor): IE_NAME = 'life:embed' - _VALID_URL = r'https?://embed\.life\.ru/embed/(?P[\da-f]{32})' + _VALID_URL = r'https?://embed\.life\.ru/(?:embed|video)/(?P[\da-f]{32})' - _TEST = { + _TESTS = [{ 'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291', 'md5': 'b889715c9e49cb1981281d0e5458fbbe', 'info_dict': { @@ -175,30 +178,57 @@ class LifeEmbedIE(InfoExtractor): 'title': 'e50c2dec2867350528e2574c899b8291', 'thumbnail': 're:http://.*\.jpg', } - } + }, { + # with 1080p + 'url': 'https://embed.life.ru/video/e50c2dec2867350528e2574c899b8291', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + thumbnail = None formats = [] - for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage): - video_url = compat_urlparse.urljoin(url, video_url) - ext = determine_ext(video_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='m3u8')) - else: - formats.append({ - 'url': video_url, - 'format_id': ext, - 'preference': 1, - }) + + def extract_m3u8(manifest_url): + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='m3u8')) + + def extract_original(original_url): + formats.append({ + 'url': original_url, + 'format_id': determine_ext(original_url, None), + 'preference': 1, + }) + + playlist = self._parse_json( + self._search_regex( + r'options\s*=\s*({.+?});', webpage, 'options', default='{}'), + video_id).get('playlist', {}) + if playlist: + master = playlist.get('master') + if isinstance(master, compat_str) and determine_ext(master) == 'm3u8': + extract_m3u8(compat_urlparse.urljoin(url, master)) + original = playlist.get('original') + if isinstance(original, compat_str): + extract_original(original) + thumbnail = playlist.get('image') + + # Old rendition fallback + if not formats: + for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage): + video_url = compat_urlparse.urljoin(url, video_url) + if determine_ext(video_url) == 'm3u8': + extract_m3u8(video_url) + else: + extract_original(video_url) + self._sort_formats(formats) - thumbnail = self._search_regex( + thumbnail = thumbnail or self._search_regex( r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None) return { From 1fd6e30988f44d372c7112c2d5e44c0d5cdbc4ed Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 17:55:56 +0100 Subject: [PATCH 1359/3599] [adobepass] create separate class for adobe pass authentication --- youtube_dl/extractor/adobepass.py | 124 +++++++++++++++++++++ youtube_dl/extractor/aenetworks.py | 5 +- youtube_dl/extractor/nationalgeographic.py | 2 +- youtube_dl/extractor/syfy.py | 4 +- youtube_dl/extractor/theplatform.py | 98 +--------------- 5 files changed, 134 insertions(+), 99 deletions(-) create mode 100644 youtube_dl/extractor/adobepass.py diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py new file mode 100644 index 000000000..4e59302ab --- /dev/null +++ b/youtube_dl/extractor/adobepass.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import time +import xml.etree.ElementTree as etree + +from .common import InfoExtractor +from ..utils import ( + unescapeHTML, + urlencode_postdata, + unified_timestamp, +) + + +class AdobePass(InfoExtractor): + _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' + _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' + + @staticmethod + def _get_mvpd_resource(provider_id, title, guid, rating): + channel = etree.Element('channel') + channel_title = etree.SubElement(channel, 'title') + channel_title.text = provider_id + item = etree.SubElement(channel, 'item') + resource_title = etree.SubElement(item, 'title') + resource_title.text = title + resource_guid = etree.SubElement(item, 'guid') + resource_guid.text = guid + resource_rating = etree.SubElement(item, 'media:rating') + resource_rating.attrib = {'scheme': 'urn:v-chip'} + resource_rating.text = rating + return '' + etree.tostring(channel).decode() + '' + + def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): + def xml_text(xml_str, tag): + return self._search_regex( + '<%s>(.+?)' % (tag, tag), xml_str, tag) + + mvpd_headers = { + 'ap_42': 'anonymous', + 'ap_11': 'Linux i686', + 'ap_z': self._USER_AGENT, + 'User-Agent': self._USER_AGENT, + } + + guid = xml_text(resource, 'guid') + requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} + authn_token = requestor_info.get('authn_token') + if authn_token: + token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires'))) + if token_expires and token_expires <= int(time.time()): + authn_token = None + requestor_info = {} + if not authn_token: + # TODO add support for other TV Providers + mso_id = 'DTV' + username, password = self._get_netrc_login_info(mso_id) + if not username or not password: + return '' + + def post_form(form_page, note, data={}): + post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') + return self._download_webpage( + post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + + provider_redirect_page = self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, + 'Downloading Provider Redirect Page', query={ + 'noflash': 'true', + 'mso_id': mso_id, + 'requestor_id': requestor_id, + 'no_iframe': 'false', + 'domain_name': 'adobe.com', + 'redirect_url': url, + }) + provider_login_page = post_form( + provider_redirect_page, 'Downloading Provider Login Page') + mvpd_confirm_page = post_form(provider_login_page, 'Logging in', { + 'username': username, + 'password': password, + }) + post_form(mvpd_confirm_page, 'Confirming Login') + + session = self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, + 'Retrieving Session', data=urlencode_postdata({ + '_method': 'GET', + 'requestor_id': requestor_id, + }), headers=mvpd_headers) + authn_token = unescapeHTML(xml_text(session, 'authnToken')) + requestor_info['authn_token'] = authn_token + self._downloader.cache.store('mvpd', requestor_id, requestor_info) + + authz_token = requestor_info.get(guid) + if not authz_token: + authorize = self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id, + 'Retrieving Authorization Token', data=urlencode_postdata({ + 'resource_id': resource, + 'requestor_id': requestor_id, + 'authentication_token': authn_token, + 'mso_id': xml_text(authn_token, 'simpleTokenMsoID'), + 'userMeta': '1', + }), headers=mvpd_headers) + authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) + requestor_info[guid] = authz_token + self._downloader.cache.store('mvpd', requestor_id, requestor_info) + + mvpd_headers.update({ + 'ap_19': xml_text(authn_token, 'simpleSamlNameID'), + 'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'), + }) + + return self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize', + video_id, 'Retrieving Media Token', data=urlencode_postdata({ + 'authz_token': authz_token, + 'requestor_id': requestor_id, + 'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'), + 'hashed_guid': 'false', + }), headers=mvpd_headers) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 8f53050c9..6adb6d824 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -109,7 +109,10 @@ class AENetworksIE(AENetworksBaseIE): info = self._parse_theplatform_metadata(theplatform_metadata) if theplatform_metadata.get('AETN$isBehindWall'): requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] - resource = '%s%s%s%s' % (requestor_id, theplatform_metadata['title'], theplatform_metadata['AETN$PPL_pplProgramId'], theplatform_metadata['ratings'][0]['rating']) + resource = self._get_mvpd_resource( + requestor_id, theplatform_metadata['title'], + theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), + theplatform_metadata['ratings'][0]['rating']) query['auth'] = self._extract_mvpd_auth( url, video_id, requestor_id, resource) info.update(self._search_json_ld(webpage, video_id, fatal=False)) diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 0027ff1b8..890e8d5bc 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -119,7 +119,7 @@ class NationalGeographicIE(ThePlatformIE): auth_resource_id = self._search_regex( r"video_auth_resourceId\s*=\s*'([^']+)'", webpage, 'auth resource id') - query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id) or '' + query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id) return { '_type': 'url_transparent', diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index 53723b66e..764287a64 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -40,7 +40,9 @@ class SyfyIE(ThePlatformIE): 'manifest': 'm3u', } if syfy_mpx.get('entitlement') == 'auth': - resource = 'syfy<![CDATA[%s]]>%s%s' % (title, video_id, syfy_mpx.get('mpxRating', 'TV-14')) + resource = self._get_mvpd_resource( + 'syfy', title, video_id, + syfy_mpx.get('mpxRating', 'TV-14')) query['auth'] = self._extract_mvpd_auth( url, video_id, 'syfy', resource) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 9ca765a5f..108ddd3a9 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -6,10 +6,10 @@ import time import hmac import binascii import hashlib -import netrc from .once import OnceIE +from .adobepass import AdobePass from ..compat import ( compat_parse_qs, compat_urllib_parse_urlparse, @@ -25,9 +25,6 @@ from ..utils import ( xpath_with_ns, mimetype2ext, find_xpath_attr, - unescapeHTML, - urlencode_postdata, - unified_timestamp, ) default_ns = 'http://www.w3.org/2005/SMIL21/Language' @@ -96,7 +93,7 @@ class ThePlatformBaseIE(OnceIE): return self._parse_theplatform_metadata(info) -class ThePlatformIE(ThePlatformBaseIE): +class ThePlatformIE(ThePlatformBaseIE, AdobePass): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P[^/]+)/ (?:(?:(?:[^/]+/)+select/)?(?Pmedia/(?:guid/\d+/)?)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? @@ -202,97 +199,6 @@ class ThePlatformIE(ThePlatformBaseIE): sig = flags + expiration_date + checksum + str_to_hex(sig_secret) return '%s&sig=%s' % (url, sig) - def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): - def xml_text(xml_str, tag): - return self._search_regex( - '<%s>(.+?)' % (tag, tag), xml_str, tag) - - mvpd_headers = { - 'ap_42': 'anonymous', - 'ap_11': 'Linux i686', - 'ap_z': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0', - 'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0', - } - - guid = xml_text(resource, 'guid') - requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} - authn_token = requestor_info.get('authn_token') - if authn_token: - token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires'))) - if token_expires and token_expires <= int(time.time()): - authn_token = None - requestor_info = {} - if not authn_token: - # TODO add support for other TV Providers - mso_id = 'DTV' - username, password = self._get_netrc_login_info(mso_id) - if not username or not password: - return '' - - def post_form(form_page, note, data={}): - post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') - return self._download_webpage( - post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }) - - provider_redirect_page = self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, - 'Downloading Provider Redirect Page', query={ - 'noflash': 'true', - 'mso_id': mso_id, - 'requestor_id': requestor_id, - 'no_iframe': 'false', - 'domain_name': 'adobe.com', - 'redirect_url': url, - }) - provider_login_page = post_form( - provider_redirect_page, 'Downloading Provider Login Page') - mvpd_confirm_page = post_form(provider_login_page, 'Logging in', { - 'username': username, - 'password': password, - }) - post_form(mvpd_confirm_page, 'Confirming Login') - - session = self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, - 'Retrieving Session', data=urlencode_postdata({ - '_method': 'GET', - 'requestor_id': requestor_id, - }), headers=mvpd_headers) - authn_token = unescapeHTML(xml_text(session, 'authnToken')) - requestor_info['authn_token'] = authn_token - self._downloader.cache.store('mvpd', requestor_id, requestor_info) - - authz_token = requestor_info.get(guid) - if not authz_token: - authorize = self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id, - 'Retrieving Authorization Token', data=urlencode_postdata({ - 'resource_id': resource, - 'requestor_id': requestor_id, - 'authentication_token': authn_token, - 'mso_id': xml_text(authn_token, 'simpleTokenMsoID'), - 'userMeta': '1', - }), headers=mvpd_headers) - authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) - requestor_info[guid] = authz_token - self._downloader.cache.store('mvpd', requestor_id, requestor_info) - - mvpd_headers.update({ - 'ap_19': xml_text(authn_token, 'simpleSamlNameID'), - 'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'), - }) - - return self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize', - video_id, 'Retrieving Media Token', data=urlencode_postdata({ - 'authz_token': authz_token, - 'requestor_id': requestor_id, - 'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'), - 'hashed_guid': 'false', - }), headers=mvpd_headers) - def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) From d2ac04674d0d9085aedec229820c1d07082e5825 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 18:03:42 +0100 Subject: [PATCH 1360/3599] [viceland] Add new extractor(#8799) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/viceland.py | 100 +++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 youtube_dl/extractor/viceland.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 901847509..be96e34ba 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -958,6 +958,7 @@ from .vice import ( ViceIE, ViceShowIE, ) +from .viceland import VicelandIE from .vidbit import VidbitIE from .viddler import ViddlerIE from .videodetective import VideoDetectiveIE diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py new file mode 100644 index 000000000..c66e8eb95 --- /dev/null +++ b/youtube_dl/extractor/viceland.py @@ -0,0 +1,100 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import time +import hashlib +import json + +from .adobepass import AdobePass +from ..compat import compat_HTTPError +from ..utils import ( + int_or_none, + parse_age_limit, + str_or_none, + parse_duration, + ExtractorError, + extract_attributes, +) + + +class VicelandIE(AdobePass): + _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P[a-f0-9]+)' + _TEST = { + # FIXME: fill the test after fixing delegation problem + 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', + 'info_dict': { + 'id': '57608447973ee7705f6fbd4e', + 'ext': 'mp4', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['UplynkPreplay', 'Uplynk'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + watch_hub_data = extract_attributes(self._search_regex( + r'(?s)()', webpage, 'watch hub')) + video_id = watch_hub_data['vms-id'] + title = watch_hub_data['video-title'] + + query = {} + if watch_hub_data.get('video-locked') == '1': + resource = self._get_mvpd_resource( + 'VICELAND', title, video_id, + watch_hub_data.get('video-rating')) + query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource) + + # signature generation algorithm is reverse engineered from signatureGenerator in + # webpack:///../shared/~/vice-player/dist/js/vice-player.js in + # https://www.viceland.com/assets/common/js/web.vendor.bundle.js + exp = int(time.time()) + 14400 + query.update({ + 'exp': exp, + 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), + }) + + try: + preplay = self._download_json('https://www.viceland.com/en_us/preplay/%s' % video_id, video_id, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + error = json.loads(e.cause.read().decode()) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) + + video_data = preplay['video'] + base = video_data['base'] + uplynk_preplay_url = preplay['preplayURL'] + episode = video_data.get('episode', {}) + channel = video_data.get('channel', {}) + + subtitles = {} + cc_url = preplay.get('ccURL') + if cc_url: + subtitles['en'] = [{ + 'url': cc_url, + }] + + return { + '_type': 'url_transparent', + 'url': uplynk_preplay_url, + 'id': video_id, + 'title': title, + 'description': base.get('body'), + 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), + 'duration': parse_duration(video_data.get('video_duration') or watch_hub_data.get('video-duration')), + 'timestamp': int_or_none(video_data.get('created_at')), + 'age_limit': parse_age_limit(video_data.get('video_rating')), + 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), + 'episode_number': int_or_none(episode.get('episode_number') or watch_hub_data.get('episode')), + 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), + 'season_number': int_or_none(watch_hub_data.get('season')), + 'season_id': str_or_none(episode.get('season_id')), + 'uploader': channel.get('base', {}).get('title') or watch_hub_data.get('channel-title'), + 'uploader_id': str_or_none(channel.get('id')), + 'subtitles': subtitles, + 'ie_key': 'UplynkPreplay', + } From 9fa57892790ce205634f6a7c83de2b9e52ab5284 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 19:04:23 +0100 Subject: [PATCH 1361/3599] [viceland] fix info extraction(closes #8799) --- youtube_dl/extractor/uplynk.py | 11 +++++++---- youtube_dl/extractor/viceland.py | 7 +++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/uplynk.py b/youtube_dl/extractor/uplynk.py index a6a685c9d..4313bc9cb 100644 --- a/youtube_dl/extractor/uplynk.py +++ b/youtube_dl/extractor/uplynk.py @@ -26,8 +26,8 @@ class UplynkIE(InfoExtractor): }, } - def _real_extract(self, url): - path, external_id, video_id, session_id = re.match(self._VALID_URL, url).groups() + def _extract_uplynk_info(self, uplynk_content_url): + path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups() display_id = video_id or external_id formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4') if session_id: @@ -49,8 +49,11 @@ class UplynkIE(InfoExtractor): 'formats': formats, } + def _real_extract(self, url): + return self._extract_uplynk_info(url) -class UplynkPreplayIE(InfoExtractor): + +class UplynkPreplayIE(UplynkIE): _VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?Pext/[0-9a-f]{32}/(?P[^/?&]+)|(?P[0-9a-f]{32}))\.json' def _real_extract(self, url): @@ -61,4 +64,4 @@ class UplynkPreplayIE(InfoExtractor): session_id = preplay.get('sid') if session_id: content_url += '?pbs=' + session_id - return self.url_result(content_url, 'Uplynk') + return self._extract_uplynk_info(content_url) diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index c66e8eb95..f72294b51 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -25,6 +25,13 @@ class VicelandIE(AdobePass): 'info_dict': { 'id': '57608447973ee7705f6fbd4e', 'ext': 'mp4', + 'title': 'CYBERWAR (Trailer)', + 'description': 'Tapping into the geopolitics of hacking and surveillance, Ben Makuch travels the world to meet with hackers, government officials, and dissidents to investigate the ecosystem of cyberwarfare.', + 'age_limit': 14, + 'timestamp': 1466008539, + 'upload_date': '20160615', + 'uploader_id': '11', + 'uploader': 'Viceland', }, 'params': { # m3u8 download From 6103f59095bd1e514e43b3f84f4633e27ee09b69 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 19:08:35 +0100 Subject: [PATCH 1362/3599] [viceland] remove outdated comment --- youtube_dl/extractor/viceland.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index f72294b51..0be8a792f 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -20,7 +20,6 @@ from ..utils import ( class VicelandIE(AdobePass): _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P[a-f0-9]+)' _TEST = { - # FIXME: fill the test after fixing delegation problem 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', 'info_dict': { 'id': '57608447973ee7705f6fbd4e', From e811bcf8f820d92b6629920b7c3c5a902815e6d1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 20:12:53 +0100 Subject: [PATCH 1363/3599] [viceland] raise ExtractorError for errors other than HTTP 400 --- youtube_dl/extractor/viceland.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index 0be8a792f..814a72fa2 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -70,6 +70,7 @@ class VicelandIE(AdobePass): if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: error = json.loads(e.cause.read().decode()) raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) + raise video_data = preplay['video'] base = video_data['base'] From 7e60ce9cf7b104c15fcc4c495166dc57b950b987 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 14 Aug 2016 21:24:33 +0100 Subject: [PATCH 1364/3599] [adobepass] clear cache in case of pendingLogout errors --- youtube_dl/extractor/adobepass.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 4e59302ab..d315bfbc1 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -90,6 +90,9 @@ class AdobePass(InfoExtractor): '_method': 'GET', 'requestor_id': requestor_id, }), headers=mvpd_headers) + if ' Date: Sun, 14 Aug 2016 21:25:43 +0100 Subject: [PATCH 1365/3599] [adobepass] fix check for pendingLogout errors --- youtube_dl/extractor/adobepass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index d315bfbc1..cf3a15cbb 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -90,7 +90,7 @@ class AdobePass(InfoExtractor): '_method': 'GET', 'requestor_id': requestor_id, }), headers=mvpd_headers) - if ' Date: Sun, 14 Aug 2016 22:45:43 +0100 Subject: [PATCH 1366/3599] [uplynk,viceland] update tests and change uplynk extractors names --- youtube_dl/extractor/uplynk.py | 3 +++ youtube_dl/extractor/viceland.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/uplynk.py b/youtube_dl/extractor/uplynk.py index 4313bc9cb..ae529f690 100644 --- a/youtube_dl/extractor/uplynk.py +++ b/youtube_dl/extractor/uplynk.py @@ -11,6 +11,7 @@ from ..utils import ( class UplynkIE(InfoExtractor): + IE_NAME = 'uplynk' _VALID_URL = r'https?://.*?\.uplynk\.com/(?Pext/[0-9a-f]{32}/(?P[^/?&]+)|(?P[0-9a-f]{32}))\.(?:m3u8|json)(?:.*?\bpbs=(?P[^&]+))?' _TEST = { 'url': 'http://content.uplynk.com/e89eaf2ce9054aa89d92ddb2d817a52e.m3u8', @@ -54,7 +55,9 @@ class UplynkIE(InfoExtractor): class UplynkPreplayIE(UplynkIE): + IE_NAME = 'uplynk:preplay' _VALID_URL = r'https?://.*?\.uplynk\.com/preplay2?/(?Pext/[0-9a-f]{32}/(?P[^/?&]+)|(?P[0-9a-f]{32}))\.json' + _TEST = None def _real_extract(self, url): path, external_id, video_id = re.match(self._VALID_URL, url).groups() diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index 814a72fa2..da766d8db 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -36,7 +36,7 @@ class VicelandIE(AdobePass): # m3u8 download 'skip_download': True, }, - 'add_ie': ['UplynkPreplay', 'Uplynk'], + 'add_ie': ['UplynkPreplay'], } def _real_extract(self, url): From 1a57b8c18c9bdaf5e231f2178499041446b57a3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Aug 2016 08:25:24 +0700 Subject: [PATCH 1367/3599] [zippcast] Remove extractor (Closes #10332) ZippCast is shut down --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/zippcast.py | 94 ------------------------------ 2 files changed, 95 deletions(-) delete mode 100644 youtube_dl/extractor/zippcast.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index be96e34ba..15bc0a675 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1109,4 +1109,3 @@ from .zingmp3 import ( ZingMp3SongIE, ZingMp3AlbumIE, ) -from .zippcast import ZippCastIE diff --git a/youtube_dl/extractor/zippcast.py b/youtube_dl/extractor/zippcast.py deleted file mode 100644 index de819376d..000000000 --- a/youtube_dl/extractor/zippcast.py +++ /dev/null @@ -1,94 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - determine_ext, - str_to_int, -) - - -class ZippCastIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?zippcast\.com/(?:video/|videoview\.php\?.*\bvplay=)(?P[0-9a-zA-Z]+)' - _TESTS = [{ - # m3u8, hq direct link - 'url': 'http://www.zippcast.com/video/c9cfd5c7e44dbc29c81', - 'md5': '5ea0263b5606866c4d6cda0fc5e8c6b6', - 'info_dict': { - 'id': 'c9cfd5c7e44dbc29c81', - 'ext': 'mp4', - 'title': '[Vinesauce] Vinny - Digital Space Traveler', - 'description': 'Muted on youtube, but now uploaded in it\'s original form.', - 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'vinesauce', - 'view_count': int, - 'categories': ['Entertainment'], - 'tags': list, - }, - }, { - # f4m, lq ipod direct link - 'url': 'http://www.zippcast.com/video/b79c0a233e9c6581775', - 'only_matching': True, - }, { - 'url': 'http://www.zippcast.com/videoview.php?vplay=c9cfd5c7e44dbc29c81&auto=no', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://www.zippcast.com/video/%s' % video_id, video_id) - - formats = [] - video_url = self._search_regex( - r']+src=(["\'])(?P.+?)\1', webpage, - 'video url', default=None, group='url') - if video_url: - formats.append({ - 'url': video_url, - 'format_id': 'http', - 'preference': 0, # direct link is almost always of worse quality - }) - src_url = self._search_regex( - r'src\s*:\s*(?:escape\()?(["\'])(?Phttp://.+?)\1', - webpage, 'src', default=None, group='url') - ext = determine_ext(src_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - src_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - src_url, video_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) - - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) or self._html_search_meta( - 'description', webpage) - uploader = self._search_regex( - r']+href="https?://[^/]+/profile/[^>]+>([^<]+)', - webpage, 'uploader', fatal=False) - thumbnail = self._og_search_thumbnail(webpage) - view_count = str_to_int(self._search_regex( - r'>([\d,.]+) views!', webpage, 'view count', fatal=False)) - - categories = re.findall( - r']+href="https?://[^/]+/categories/[^"]+">([^<]+),?<', - webpage) - tags = re.findall( - r']+href="https?://[^/]+/search/tags/[^"]+">([^<]+),?<', - webpage) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'view_count': view_count, - 'categories': categories, - 'tags': tags, - 'formats': formats, - } From b6c4e36728e8f60ae7f4910a9b7027a2b702e8dc Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Aug 2016 13:29:01 +0800 Subject: [PATCH 1368/3599] [jwplatform] Parse video_id from JWPlayer data And remove a mysterious comma from 115c65793af4c56c8f1986d2640105fc7e760c13 --- youtube_dl/extractor/jwplatform.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 2a499bb77..ce3126943 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -30,7 +30,7 @@ class JWPlatformBaseIE(InfoExtractor): return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) - def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None): + def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None): # JWPlayer backward compatibility: flattened playlists # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 if 'playlist' not in jwplayer_data: @@ -43,6 +43,8 @@ class JWPlatformBaseIE(InfoExtractor): if 'sources' not in video_data: video_data['sources'] = [video_data] + this_video_id = video_id or video_data['mediaid'] + formats = [] for source in video_data['sources']: source_url = self._proto_relative_url(source['file']) @@ -52,7 +54,7 @@ class JWPlatformBaseIE(InfoExtractor): ext = mimetype2ext(source_type) or determine_ext(source_url) if source_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): formats.append({ @@ -68,7 +70,7 @@ class JWPlatformBaseIE(InfoExtractor): 'ext': ext, } if source_url.startswith('rtmp'): - a_format['ext'] = 'flv', + a_format['ext'] = 'flv' # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as # of jwplayer.flash.swf @@ -95,7 +97,7 @@ class JWPlatformBaseIE(InfoExtractor): }) entries.append({ - 'id': video_id, + 'id': this_video_id, 'title': video_data['title'] if require_title else video_data.get('title'), 'description': video_data.get('description'), 'thumbnail': self._proto_relative_url(video_data.get('image')), From 5c2d08722139118d8de27d43d6210e18ab1da9d5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Aug 2016 13:31:08 +0800 Subject: [PATCH 1369/3599] [sendtonews] Fix extraction --- youtube_dl/extractor/sendtonews.py | 103 +++++++++++++++-------------- 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py index 1c636f672..2dbe490bb 100644 --- a/youtube_dl/extractor/sendtonews.py +++ b/youtube_dl/extractor/sendtonews.py @@ -4,33 +4,43 @@ from __future__ import unicode_literals import re from .jwplatform import JWPlatformBaseIE -from ..compat import compat_parse_qs from ..utils import ( - ExtractorError, - parse_duration, + float_or_none, + parse_iso8601, + update_url_query, ) class SendtoNewsIE(JWPlatformBaseIE): - _VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P[^#]+)' + _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P[0-9A-Za-z-]+)' _TEST = { # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ - 'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes', + 'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES', 'info_dict': { - 'id': 'GxfCe0Zo7D-175909-5588', - 'ext': 'mp4', - 'title': 'Recap: CLE 15, CIN 6', - 'description': '5/16/16: Indians\' bats explode for 15 runs in a win', - 'duration': 49, + 'id': 'GxfCe0Zo7D-175909-5588' }, + 'playlist_count': 9, + # test the first video only to prevent lengthy tests + 'playlist': [{ + 'info_dict': { + 'id': '198180', + 'ext': 'mp4', + 'title': 'Recap: CLE 5, LAA 4', + 'description': '8/14/16: Naquin, Almonte lead Indians in 5-4 win', + 'duration': 57.343, + 'thumbnail': 're:https?://.*\.jpg$', + 'upload_date': '20160815', + 'timestamp': 1471221961, + }, + }], 'params': { # m3u8 download 'skip_download': True, }, } - _URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s' + _URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s' @classmethod def _extract_url(cls, webpage): @@ -39,48 +49,41 @@ class SendtoNewsIE(JWPlatformBaseIE): .*\bSC=(?P[0-9a-zA-Z-]+).* \1>''', webpage) if mobj: - sk, mk, pk = mobj.group('SC').split('-') - return cls._URL_TEMPLATE % (sk, mk, pk) + sc = mobj.group('SC') + return cls._URL_TEMPLATE % sc def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - params = compat_parse_qs(mobj.group('query')) + playlist_id = self._match_id(url) - if 'SK' not in params or 'MK' not in params or 'PK' not in params: - raise ExtractorError('Invalid URL', expected=True) + data_url = update_url_query( + url.replace('embedplayer.php', 'data_read.php'), + {'cmd': 'loadInitial'}) + playlist_data = self._download_json(data_url, playlist_id) - video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]]) + entries = [] + for video in playlist_data['playlistData'][0]: + info_dict = self._parse_jwplayer_data( + video['jwconfiguration'], + require_title=False, rtmp_params={'no_resume': True}) - webpage = self._download_webpage(url, video_id) + thumbnails = [] + if video.get('thumbnailUrl'): + thumbnails.append({ + 'id': 'normal', + 'url': video['thumbnailUrl'], + }) + if video.get('smThumbnailUrl'): + thumbnails.append({ + 'id': 'small', + 'url': video['smThumbnailUrl'], + }) + info_dict.update({ + 'title': video['S_headLine'], + 'description': video.get('S_fullStory'), + 'thumbnails': thumbnails, + 'duration': float_or_none(video.get('SM_length')), + 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '), + }) + entries.append(info_dict) - jwplayer_data_str = self._search_regex( - r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data') - js_vars = { - 'w': 1024, - 'h': 768, - 'modeVar': 'html5', - } - for name, val in js_vars.items(): - js_val = '%d' % val if isinstance(val, int) else '"%s"' % val - jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val) - - info_dict = self._parse_jwplayer_data( - self._parse_json(jwplayer_data_str, video_id), - video_id, require_title=False, rtmp_params={'no_resume': True}) - - title = self._html_search_regex( - r']+class="embedTitle">([^<]+)
', webpage, 'title') - description = self._html_search_regex( - r']+class="embedSubTitle">([^<]+)', webpage, - 'description', fatal=False) - duration = parse_duration(self._html_search_regex( - r']+class="embedDetails">([0-9:]+)', webpage, - 'duration', fatal=False)) - - info_dict.update({ - 'title': title, - 'description': description, - 'duration': duration, - }) - - return info_dict + return self.playlist_result(entries, playlist_id) From 760845ce9965b57484f232a162b9bb4ad3a505a7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Aug 2016 13:37:37 +0800 Subject: [PATCH 1370/3599] [cbslocal] Adapt to SendtoNewsIE --- youtube_dl/extractor/cbslocal.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 008c5fe32..4bcd104af 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -41,13 +41,8 @@ class CBSLocalIE(AnvatoIE): 'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/', 'info_dict': { 'id': 'GxfCe0Zo7D-175909-5588', - 'ext': 'mp4', - 'title': 'Recap: CLE 15, CIN 6', - 'description': '5/16/16: Indians\' bats explode for 15 runs in a win', - 'upload_date': '20160516', - 'timestamp': 1463433840, - 'duration': 49, }, + 'playlist_count': 9, 'params': { # m3u8 download 'skip_download': True, @@ -60,12 +55,11 @@ class CBSLocalIE(AnvatoIE): sendtonews_url = SendtoNewsIE._extract_url(webpage) if sendtonews_url: - info_dict = { - '_type': 'url_transparent', - 'url': compat_urlparse.urljoin(url, sendtonews_url), - } - else: - info_dict = self._extract_anvato_videos(webpage, display_id) + return self.url_result( + compat_urlparse.urljoin(url, sendtonews_url), + ie=SendtoNewsIE.ie_key()) + + info_dict = self._extract_anvato_videos(webpage, display_id) time_str = self._html_search_regex( r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) From 6d8ec8c3b7381c40afd89f9c118ae770997703d0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Aug 2016 13:39:43 +0800 Subject: [PATCH 1371/3599] [ChangeLog] Update for CBSLocal and related changes --- ChangeLog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ChangeLog b/ChangeLog index d04c5fc2a..32504dab5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,9 @@ version Extractors +* [cbslocal] Fix extraction for SendtoNews-based videos +* [sendtonews] Fix extraction +* [jwplatform] Now can parse video_id from JWPlayer data * [xiami] Fix extraction (#10342) From 69eb4d699fe3f6d84acc7882e427e661040faecb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Aug 2016 20:29:22 +0800 Subject: [PATCH 1372/3599] [cbsnews] Remove invalid tests. CBS Live videos gets deleted soon. --- youtube_dl/extractor/cbsnews.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 9328e3e20..9d3b75526 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -70,7 +70,8 @@ class CBSNewsLiveVideoIE(InfoExtractor): IE_DESC = 'CBS News Live Videos' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P[\da-z_-]+)' - _TESTS = [{ + # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples + _TEST = { 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', 'info_dict': { 'id': 'clinton-sanders-prepare-to-face-off-in-nh', @@ -78,15 +79,8 @@ class CBSNewsLiveVideoIE(InfoExtractor): 'title': 'Clinton, Sanders Prepare To Face Off In NH', 'duration': 334, }, - 'skip': 'Video gone, redirected to http://www.cbsnews.com/live/', - }, { - 'url': 'http://www.cbsnews.com/live/video/video-shows-intense-paragliding-accident/', - 'info_dict': { - 'id': 'video-shows-intense-paragliding-accident', - 'ext': 'flv', - 'title': 'Video Shows Intense Paragliding Accident', - }, - }] + 'skip': 'Video gone', + } def _real_extract(self, url): video_id = self._match_id(url) From bf90c46790bac92e8a61ee0514cf3c41a8c048e9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 15 Aug 2016 16:33:35 +0100 Subject: [PATCH 1373/3599] [fxnetworks] Add new extractor(closes #9462) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/fxnetworks.py | 49 ++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 youtube_dl/extractor/fxnetworks.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 15bc0a675..07928c530 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -287,6 +287,7 @@ from .freevideo import FreeVideoIE from .funimation import FunimationIE from .funnyordie import FunnyOrDieIE from .fusion import FusionIE +from .fxnetworks import FXNetworksIE from .gameinformer import GameInformerIE from .gameone import ( GameOneIE, diff --git a/youtube_dl/extractor/fxnetworks.py b/youtube_dl/extractor/fxnetworks.py new file mode 100644 index 000000000..70bc186a3 --- /dev/null +++ b/youtube_dl/extractor/fxnetworks.py @@ -0,0 +1,49 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .adobepass import AdobePass +from ..utils import ( + update_url_query, + extract_attributes, + parse_age_limit, + smuggle_url, +) + + +class FXNetworksIE(AdobePass): + _VALID_URL = r'https?://(?:www\.)?fxnetworks\.com/video/(?P\d+)' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_data = extract_attributes(self._search_regex( + r'()', webpage, 'video data')) + player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', fatal=False) + release_url = video_data['rel'] + title = video_data['data-title'] + rating = video_data.get('data-rating') + query = { + 'mbr': 'true', + } + if player_type == 'movies': + query.update({ + 'manifest': 'm3u', + }) + else: + query.update({ + 'switch': 'http', + }) + if video_data.get('data-req-auth') == '1': + resource = self._get_mvpd_resource( + video_data['data-channel'], title, + video_data.get('data-guid'), rating) + query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource) + + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), + 'thumbnail': video_data.get('data-large-thumb'), + 'age_limit': parse_age_limit(rating), + 'ie_key': 'ThePlatform', + } From cbef4d5c9ff5013d0c10b960e1690805724120cd Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 15 Aug 2016 17:10:45 +0100 Subject: [PATCH 1374/3599] [fxnetworks] add test and check geo restriction --- youtube_dl/extractor/fxnetworks.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/extractor/fxnetworks.py b/youtube_dl/extractor/fxnetworks.py index 70bc186a3..940e7427c 100644 --- a/youtube_dl/extractor/fxnetworks.py +++ b/youtube_dl/extractor/fxnetworks.py @@ -12,10 +12,27 @@ from ..utils import ( class FXNetworksIE(AdobePass): _VALID_URL = r'https?://(?:www\.)?fxnetworks\.com/video/(?P\d+)' + _TEST = { + 'url': 'http://www.fxnetworks.com/video/719841347694', + 'md5': '1447d4722e42ebca19e5232ab93abb22', + 'info_dict': { + 'id': '719841347694', + 'ext': 'mp4', + 'title': 'Vanpage', + 'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.', + 'age_limit': 14, + 'uploader': 'NEWA-FNG-FX', + 'upload_date': '20160706', + 'timestamp': 1467844741, + }, + 'add_ie': ['ThePlatform'], + } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + if 'The content you are trying to access is not available in your region.' in webpage: + self.raise_geo_restricted() video_data = extract_attributes(self._search_regex( r'()', webpage, 'video data')) player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', fatal=False) @@ -42,6 +59,7 @@ class FXNetworksIE(AdobePass): return { '_type': 'url_transparent', 'id': video_id, + 'title': title, 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), 'thumbnail': video_data.get('data-large-thumb'), 'age_limit': parse_age_limit(rating), From 818ac213eb80e18f472ecdf2406569bafd4cccaf Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 15 Aug 2016 21:36:34 +0100 Subject: [PATCH 1375/3599] [adobepass] add IE suffix to the extractor and remove duplicate constant --- youtube_dl/extractor/adobepass.py | 2 +- youtube_dl/extractor/fxnetworks.py | 4 ++-- youtube_dl/extractor/nationalgeographic.py | 6 +++--- youtube_dl/extractor/syfy.py | 4 ++-- youtube_dl/extractor/theplatform.py | 5 ++--- youtube_dl/extractor/viceland.py | 4 ++-- 6 files changed, 12 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index cf3a15cbb..2c9f8817b 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -13,7 +13,7 @@ from ..utils import ( ) -class AdobePass(InfoExtractor): +class AdobePassIE(InfoExtractor): _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' diff --git a/youtube_dl/extractor/fxnetworks.py b/youtube_dl/extractor/fxnetworks.py index 940e7427c..3ec3b0b46 100644 --- a/youtube_dl/extractor/fxnetworks.py +++ b/youtube_dl/extractor/fxnetworks.py @@ -1,7 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals -from .adobepass import AdobePass +from .adobepass import AdobePassIE from ..utils import ( update_url_query, extract_attributes, @@ -10,7 +10,7 @@ from ..utils import ( ) -class FXNetworksIE(AdobePass): +class FXNetworksIE(AdobePassIE): _VALID_URL = r'https?://(?:www\.)?fxnetworks\.com/video/(?P\d+)' _TEST = { 'url': 'http://www.fxnetworks.com/video/719841347694', diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 890e8d5bc..1dcf27afe 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from .theplatform import ThePlatformIE +from .adobepass import AdobePassIE from ..utils import ( smuggle_url, url_basename, @@ -65,7 +65,7 @@ class NationalGeographicVideoIE(InfoExtractor): } -class NationalGeographicIE(ThePlatformIE): +class NationalGeographicIE(AdobePassIE): IE_NAME = 'natgeo' _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P[^/?]+)' @@ -131,7 +131,7 @@ class NationalGeographicIE(ThePlatformIE): } -class NationalGeographicEpisodeGuideIE(ThePlatformIE): +class NationalGeographicEpisodeGuideIE(InfoExtractor): IE_NAME = 'natgeo:episodeguide' _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P[^/]+)/episode-guide' _TESTS = [ diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index 764287a64..cc81f6003 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -1,13 +1,13 @@ from __future__ import unicode_literals -from .theplatform import ThePlatformIE +from .adobepass import AdobePassIE from ..utils import ( update_url_query, smuggle_url, ) -class SyfyIE(ThePlatformIE): +class SyfyIE(AdobePassIE): _VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P[^/?#]+)' _TESTS = [{ 'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer', diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 108ddd3a9..eda899497 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -9,7 +9,7 @@ import hashlib from .once import OnceIE -from .adobepass import AdobePass +from .adobepass import AdobePassIE from ..compat import ( compat_parse_qs, compat_urllib_parse_urlparse, @@ -93,7 +93,7 @@ class ThePlatformBaseIE(OnceIE): return self._parse_theplatform_metadata(info) -class ThePlatformIE(ThePlatformBaseIE, AdobePass): +class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P[^/]+)/ (?:(?:(?:[^/]+/)+select/)?(?Pmedia/(?:guid/\d+/)?)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? @@ -164,7 +164,6 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePass): 'url': 'http://player.theplatform.com/p/NnzsPC/onsite_universal/select/media/guid/2410887629/2928790?fwsitesection=nbc_the_blacklist_video_library&autoPlay=true&carouselID=137781', 'only_matching': True, }] - _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' @classmethod def _extract_urls(cls, webpage): diff --git a/youtube_dl/extractor/viceland.py b/youtube_dl/extractor/viceland.py index da766d8db..8742b607a 100644 --- a/youtube_dl/extractor/viceland.py +++ b/youtube_dl/extractor/viceland.py @@ -5,7 +5,7 @@ import time import hashlib import json -from .adobepass import AdobePass +from .adobepass import AdobePassIE from ..compat import compat_HTTPError from ..utils import ( int_or_none, @@ -17,7 +17,7 @@ from ..utils import ( ) -class VicelandIE(AdobePass): +class VicelandIE(AdobePassIE): _VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P[a-f0-9]+)' _TEST = { 'url': 'https://www.viceland.com/en_us/video/cyberwar-trailer/57608447973ee7705f6fbd4e', From 254e64a20aa37a033cb200bc6f1aa9daf57eead8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Aug 2016 04:36:23 +0700 Subject: [PATCH 1376/3599] [bbc:playlist] Add support for pagination (Closes #10349) --- youtube_dl/extractor/bbc.py | 48 ++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 83e6d024c..16a97a76d 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import itertools from .common import InfoExtractor from ..utils import ( @@ -17,6 +18,7 @@ from ..utils import ( from ..compat import ( compat_etree_fromstring, compat_HTTPError, + compat_urlparse, ) @@ -1056,19 +1058,35 @@ class BBCCoUkArticleIE(InfoExtractor): class BBCCoUkPlaylistBaseIE(InfoExtractor): + def _entries(self, webpage, url, playlist_id): + single_page = 'page' in compat_urlparse.parse_qs( + compat_urlparse.urlparse(url).query) + for page_num in itertools.count(2): + for video_id in re.findall( + self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage): + yield self.url_result( + self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key()) + if single_page: + return + next_page = self._search_regex( + r']+class=(["\'])pagination_+next\1[^>]*>]+href=(["\'])(?P(?:(?!\2).)+)\2', + webpage, 'next page url', default=None, group='url') + if not next_page: + break + webpage = self._download_webpage( + compat_urlparse.urljoin(url, next_page), playlist_id, + 'Downloading page %d' % page_num, page_num) + def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - entries = [ - self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key()) - for video_id in re.findall( - self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)] - title, description = self._extract_title_and_description(webpage) - return self.playlist_result(entries, playlist_id, title, description) + return self.playlist_result( + self._entries(webpage, url, playlist_id), + playlist_id, title, description) class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): @@ -1094,6 +1112,24 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', }, 'playlist_mincount': 10, + }, { + # explicit page + 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1', + 'info_dict': { + 'id': 'b00mfl7n', + 'title': 'Bohemian Icons', + 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', + }, + 'playlist_mincount': 24, + }, { + # all pages + 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips', + 'info_dict': { + 'id': 'b00mfl7n', + 'title': 'Bohemian Icons', + 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', + }, + 'playlist_mincount': 142, }] def _extract_title_and_description(self, webpage): From 4f640f28901be8a3ce57e77ead404d751e36d208 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Aug 2016 04:43:10 +0700 Subject: [PATCH 1377/3599] [bbc:playlist] Fix tests --- youtube_dl/extractor/bbc.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 16a97a76d..deb9cc1c0 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1112,24 +1112,6 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', }, 'playlist_mincount': 10, - }, { - # explicit page - 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1', - 'info_dict': { - 'id': 'b00mfl7n', - 'title': 'Bohemian Icons', - 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', - }, - 'playlist_mincount': 24, - }, { - # all pages - 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips', - 'info_dict': { - 'id': 'b00mfl7n', - 'title': 'Bohemian Icons', - 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', - }, - 'playlist_mincount': 142, }] def _extract_title_and_description(self, webpage): @@ -1153,6 +1135,24 @@ class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE): 'description': 'French thriller serial about a missing teenager.', }, 'playlist_mincount': 7, + }, { + # multipage playlist, explicit page + 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1', + 'info_dict': { + 'id': 'b00mfl7n', + 'title': 'Frozen Planet - Clips - BBC One', + 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c', + }, + 'playlist_mincount': 24, + }, { + # multipage playlist, all pages + 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips', + 'info_dict': { + 'id': 'b00mfl7n', + 'title': 'Frozen Planet - Clips - BBC One', + 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c', + }, + 'playlist_mincount': 142, }, { 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06', 'only_matching': True, From fb64adcbd37a660da92687878831d08e82ae748c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Aug 2016 04:45:21 +0700 Subject: [PATCH 1378/3599] [adobepass] PEP 8 --- youtube_dl/extractor/adobepass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 2c9f8817b..9e3a3e362 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -131,4 +131,4 @@ class AdobePassIE(InfoExtractor): if ' Date: Tue, 16 Aug 2016 13:43:33 +0100 Subject: [PATCH 1379/3599] [amcnetworks] Add new extractor --- youtube_dl/extractor/amcnetworks.py | 72 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 73 insertions(+) create mode 100644 youtube_dl/extractor/amcnetworks.py diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py new file mode 100644 index 000000000..be9552541 --- /dev/null +++ b/youtube_dl/extractor/amcnetworks.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .theplatform import ThePlatformIE +from ..utils import ( + update_url_query, + parse_age_limit, +) + + +class AMCNetworksIE(ThePlatformIE): + _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P[^/?#]+)' + _TESTS = [{ + 'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', + 'md5': '', + 'info_dict': { + 'id': 's3MX01Nl4vPH', + 'ext': 'mp4', + 'title': 'Step 1', + 'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.', + 'age_limit': 17, + 'upload_date': '20160505', + 'timestamp': 1462468831, + 'uploader': 'AMCN', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', + 'only_matching': True, + }, { + 'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot', + 'only_matching': True, + }, { + 'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal', + 'only_matching': True, + }, { + 'url': 'http://www.ifc.com/movies/chaos', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + query = { + 'mbr': 'true', + 'manifest': 'm3u', + } + media_url = self._search_regex(r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', webpage, 'media url') + theplatform_metadata = self._download_theplatform_metadata(self._search_regex( + r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), display_id) + info = self._parse_theplatform_metadata(theplatform_metadata) + video_id = theplatform_metadata['pid'] + title = theplatform_metadata['title'] + rating = theplatform_metadata['ratings'][0]['rating'] + auth_required = self._search_regex(r'window\.authRequired\s*=\s*(true|false);', webpage, 'auth required') + if auth_required == 'true': + requestor_id = self._search_regex(r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', webpage, 'requestor id') + resource = self._get_mvpd_resource(requestor_id, title, video_id, rating) + query['auth'] = self._extract_mvpd_auth(url, video_id, requestor_id, resource) + media_url = update_url_query(media_url, query) + formats, subtitles = self._extract_theplatform_smil(media_url, video_id) + self._sort_formats(formats) + info.update({ + 'id': video_id, + 'subtiles': subtitles, + 'formats': formats, + 'age_limit': parse_age_limit(parse_age_limit(rating)), + }) + return info diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 07928c530..a5e0805b2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -29,6 +29,7 @@ from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE +from .amcnetworks import AMCNetworksIE from .animeondemand import AnimeOnDemandIE from .anitube import AnitubeIE from .anysex import AnySexIE From 837e56c8eefa725ca72feca9431050cdda571c57 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 16 Aug 2016 14:49:32 +0100 Subject: [PATCH 1380/3599] [amcnetworks] extract episode metadata --- youtube_dl/extractor/amcnetworks.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index be9552541..26f46acb5 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -5,6 +5,7 @@ from .theplatform import ThePlatformIE from ..utils import ( update_url_query, parse_age_limit, + int_or_none, ) @@ -16,7 +17,7 @@ class AMCNetworksIE(ThePlatformIE): 'info_dict': { 'id': 's3MX01Nl4vPH', 'ext': 'mp4', - 'title': 'Step 1', + 'title': 'Maron - Season 4 - Step 1', 'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.', 'age_limit': 17, 'upload_date': '20160505', @@ -69,4 +70,22 @@ class AMCNetworksIE(ThePlatformIE): 'formats': formats, 'age_limit': parse_age_limit(parse_age_limit(rating)), }) + ns_keys = theplatform_metadata.get('$xmlns', {}).keys() + if ns_keys: + ns = list(ns_keys)[0] + series = theplatform_metadata.get(ns + '$show') + season_number = int_or_none(theplatform_metadata.get(ns + '$season')) + episode = theplatform_metadata.get(ns + '$episodeTitle') + episode_number = int_or_none(theplatform_metadata.get(ns + '$episode')) + if season_number: + title = 'Season %d - %s' % (season_number, title) + if series: + title = '%s - %s' % (series, title) + info.update({ + 'title': title, + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + }) return info From 70a2829fee4203ebeb399481304d289ff92adf29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Aug 2016 21:17:52 +0700 Subject: [PATCH 1381/3599] [xvideos] Fix HLS extraction (Closes #10356) --- youtube_dl/extractor/xvideos.py | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 1dfe031ca..b2ef15119 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -42,24 +42,24 @@ class XVideosIE(InfoExtractor): video_url = compat_urllib_parse_unquote(self._search_regex( r'flv_url=(.+?)&', webpage, 'video URL', default='')) if video_url: - formats.append({'url': video_url}) + formats.append({ + 'url': video_url, + 'format_id': 'flv', + }) - player_args = self._search_regex( - r'(?s)new\s+HTML5Player\((.+?)\)', webpage, ' html5 player', default=None) - if player_args: - for arg in player_args.split(','): - format_url = self._search_regex( - r'(["\'])(?Phttps?://.+?)\1', arg, 'url', - default=None, group='url') - if not format_url: - continue - ext = determine_ext(format_url) - if ext == 'mp4': - formats.append({'url': format_url}) - elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + for kind, _, format_url in re.findall( + r'setVideo([^(]+)\((["\'])(http.+?)\2\)', webpage): + format_id = kind.lower() + if format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + elif format_id in ('urllow', 'urlhigh'): + formats.append({ + 'url': format_url, + 'format_id': '%s-%s' % (determine_ext(format_url, 'mp4'), format_id[3:]), + 'quality': -2 if format_id.endswith('low') else None, + }) self._sort_formats(formats) From 98affc1a482ab41466c76cfded41949c4db58f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Aug 2016 21:20:15 +0700 Subject: [PATCH 1382/3599] [xvideos] Fix test --- youtube_dl/extractor/xvideos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index b2ef15119..30825daae 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -15,10 +15,10 @@ class XVideosIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?xvideos\.com/video(?P[0-9]+)(?:.*)' _TEST = { 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', - 'md5': '4b46ae6ea5e6e9086e714d883313c0c9', + 'md5': '14cea69fcb84db54293b1e971466c2e1', 'info_dict': { 'id': '4588838', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Biker Takes his Girl', 'age_limit': 18, } From 11f502fac145b4592f47c025ee8317fe44020db0 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 16 Aug 2016 16:19:36 +0100 Subject: [PATCH 1383/3599] [theplatform] extract subtitles with multiple formats from the metadata --- youtube_dl/extractor/theplatform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index eda899497..23067e8c6 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -73,10 +73,10 @@ class ThePlatformBaseIE(OnceIE): if isinstance(captions, list): for caption in captions: lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type') - subtitles[lang] = [{ + subtitles.setdefault(lang, []).append({ 'ext': mimetype2ext(mime), 'url': src, - }] + }) return { 'title': info['title'], From 2cabee2a7d4c94aa2f4f2e84a3c68eb97cdf9cce Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 16 Aug 2016 16:20:07 +0100 Subject: [PATCH 1384/3599] [amcnetworks] fix typo --- youtube_dl/extractor/amcnetworks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index 26f46acb5..c739d2c99 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -66,7 +66,7 @@ class AMCNetworksIE(ThePlatformIE): self._sort_formats(formats) info.update({ 'id': video_id, - 'subtiles': subtitles, + 'subtitles': subtitles, 'formats': formats, 'age_limit': parse_age_limit(parse_age_limit(rating)), }) From 53fef319f14896ce497d309f661ceb586d7b4d90 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 16 Aug 2016 16:21:04 +0100 Subject: [PATCH 1385/3599] [fxnetworks] extend _VALID_URL to support simpsonsworld.com --- youtube_dl/extractor/fxnetworks.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/fxnetworks.py b/youtube_dl/extractor/fxnetworks.py index 3ec3b0b46..629897317 100644 --- a/youtube_dl/extractor/fxnetworks.py +++ b/youtube_dl/extractor/fxnetworks.py @@ -11,8 +11,8 @@ from ..utils import ( class FXNetworksIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?fxnetworks\.com/video/(?P\d+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P\d+)' + _TESTS = [{ 'url': 'http://www.fxnetworks.com/video/719841347694', 'md5': '1447d4722e42ebca19e5232ab93abb22', 'info_dict': { @@ -26,7 +26,10 @@ class FXNetworksIE(AdobePassIE): 'timestamp': 1467844741, }, 'add_ie': ['ThePlatform'], - } + }, { + 'url': 'http://www.simpsonsworld.com/video/716094019682', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -35,7 +38,7 @@ class FXNetworksIE(AdobePassIE): self.raise_geo_restricted() video_data = extract_attributes(self._search_regex( r'()', webpage, 'video data')) - player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', fatal=False) + player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None) release_url = video_data['rel'] title = video_data['data-title'] rating = video_data.get('data-rating') From 6e7e4a6edf6c4ffd56d908ade7f0bfe2bff738b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 00:19:43 +0700 Subject: [PATCH 1386/3599] [mtg] Add support for viafree URLs (#10358) --- youtube_dl/extractor/tvplay.py | 41 ++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 150bde663..d82bf67b4 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -20,16 +20,25 @@ from ..utils import ( class TVPlayIE(InfoExtractor): - IE_DESC = 'TV3Play and related services' - _VALID_URL = r'''(?x)https?://(?:www\.)? - (?:tvplay(?:\.skaties)?\.lv/parraides| - (?:tv3play|play\.tv3)\.lt/programos| - tv3play(?:\.tv3)?\.ee/sisu| - tv(?:3|6|8|10)play\.se/program| - (?:(?:tv3play|viasat4play|tv6play)\.no|tv3play\.dk)/programmer| - play\.novatv\.bg/programi - )/[^/]+/(?P\d+) - ''' + IE_NAME = 'mtg' + IE_DESC = 'MTG services' + _VALID_URL = r'''(?x) + (?: + mtg:| + https?:// + (?:www\.)? + (?: + tvplay(?:\.skaties)?\.lv/parraides| + (?:tv3play|play\.tv3)\.lt/programos| + tv3play(?:\.tv3)?\.ee/sisu| + (?:tv(?:3|6|8|10)play|viafree)\.se/program| + (?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer| + play\.novatv\.bg/programi + ) + /(?:[^/]+/)+ + ) + (?P\d+) + ''' _TESTS = [ { 'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true', @@ -197,6 +206,14 @@ class TVPlayIE(InfoExtractor): { 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true', 'only_matching': True, + }, + { + 'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869', + 'only_matching': True, + }, + { + 'url': 'mtg:418113', + 'only_matching': True, } ] @@ -204,13 +221,13 @@ class TVPlayIE(InfoExtractor): video_id = self._match_id(url) video = self._download_json( - 'http://playapi.mtgx.tv/v1/videos/%s' % video_id, video_id, 'Downloading video JSON') + 'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON') title = video['title'] try: streams = self._download_json( - 'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, + 'http://playapi.mtgx.tv/v3/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON') except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: From b35b0d73d853c52ca96ccf4488a4f8960a12e2ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 00:21:30 +0700 Subject: [PATCH 1387/3599] [viafree] Add extractor (Closes #10358) --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/tvplay.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a5e0805b2..55c639158 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -898,7 +898,10 @@ from .tvp import ( TVPIE, TVPSeriesIE, ) -from .tvplay import TVPlayIE +from .tvplay import ( + TVPlayIE, + ViafreeIE, +) from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index d82bf67b4..c8ec2465c 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -311,3 +311,56 @@ class TVPlayIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class ViafreeIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + viafree\. + (?: + (?:dk|no)/programmer| + se/program + ) + /(?:[^/]+/)+(?P[^/?#&]+) + ''' + _TESTS = [{ + 'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2', + 'info_dict': { + 'id': '395375', + 'ext': 'mp4', + 'title': 'Husräddarna S02E02', + 'description': 'md5:4db5c933e37db629b5a2f75dfb34829e', + 'series': 'Husräddarna', + 'season': 'Säsong 2', + 'season_number': 2, + 'duration': 2576, + 'timestamp': 1400596321, + 'upload_date': '20140520', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [TVPlayIE.ie_key()], + }, { + 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', + 'only_matching': True, + }, { + 'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url) + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_id = self._search_regex( + r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](?P\d{6,})', + webpage, 'video id') + + return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key()) From 502d87c5464f1894a8777873b9d11b76ba5a6375 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 00:32:28 +0700 Subject: [PATCH 1388/3599] [mtg] Improve view count extraction --- youtube_dl/extractor/tvplay.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index c8ec2465c..4186e82db 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -15,6 +15,7 @@ from ..utils import ( int_or_none, parse_iso8601, qualities, + try_get, update_url_query, ) @@ -203,6 +204,11 @@ class TVPlayIE(InfoExtractor): 'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true', 'only_matching': True, }, + { + # views is null + 'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183', + 'only_matching': True, + }, { 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true', 'only_matching': True, @@ -306,7 +312,7 @@ class TVPlayIE(InfoExtractor): 'season_number': season_number, 'duration': int_or_none(video.get('duration')), 'timestamp': parse_iso8601(video.get('created_at')), - 'view_count': int_or_none(video.get('views', {}).get('total')), + 'view_count': try_get(video, lambda x: x['views']['total'], int), 'age_limit': int_or_none(video.get('age_limit', 0)), 'formats': formats, 'subtitles': subtitles, From 9c0fa60bf375959c7d8582f655b441c534865c03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 00:42:02 +0700 Subject: [PATCH 1389/3599] [vbox7] Add support for embed URLs --- youtube_dl/extractor/vbox7.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index fa7899e6d..8e6d7efe7 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -6,7 +6,7 @@ from ..utils import urlencode_postdata class Vbox7IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vbox7\.com/play:(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?vbox7\.com/(?:play:|emb/external\.php\?.*?\bvid=)(?P[\da-fA-F]+)' _TESTS = [{ 'url': 'http://vbox7.com/play:0946fff23c', 'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf', @@ -24,15 +24,19 @@ class Vbox7IE(InfoExtractor): 'title': 'Смях! Чудо - чист за секунди - Скрита камера', }, 'skip': 'georestricted', + }, { + 'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'http://vbox7.com/play:%s' % video_id, video_id) title = self._html_search_regex( - r'(.*)', webpage, 'title').split('/')[0].strip() + r'(.+?)', webpage, 'title').split('/')[0].strip() video_url = self._search_regex( r'src\s*:\s*(["\'])(?P.+?.mp4.*?)\1', From 2a1321a272c7b410db25654cdfdc33c3cd8bd440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 01:02:59 +0700 Subject: [PATCH 1390/3599] [vbox7:generic] Add support for vbox7 embeds --- youtube_dl/extractor/generic.py | 18 ++++++++++++++++++ youtube_dl/extractor/vbox7.py | 10 ++++++++++ 2 files changed, 28 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 50500ce0e..197ab9531 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -72,6 +72,7 @@ from .kaltura import KalturaIE from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE from .soundcloud import SoundcloudIE +from .vbox7 import Vbox7IE class GenericIE(InfoExtractor): @@ -1373,6 +1374,18 @@ class GenericIE(InfoExtractor): }, 'add_ie': [ArkenaIE.ie_key()], }, + { + 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/', + 'info_dict': { + 'id': '1c7141f46c', + 'ext': 'mp4', + 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [Vbox7IE.ie_key()], + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2239,6 +2252,11 @@ class GenericIE(InfoExtractor): 'uploader': video_uploader, } + # Look for VBOX7 embeds + vbox7_url = Vbox7IE._extract_url(webpage) + if vbox7_url: + return self.url_result(vbox7_url, Vbox7IE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index 8e6d7efe7..e17988573 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -1,6 +1,8 @@ # encoding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import urlencode_postdata @@ -29,6 +31,14 @@ class Vbox7IE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + ']+src=(?P["\'])(?P(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)', + webpage) + if mobj: + return mobj.group('url') + def _real_extract(self, url): video_id = self._match_id(url) From 8652770bd23ff5f46c5687d94f71cec08d2c5886 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 05:44:46 +0700 Subject: [PATCH 1391/3599] [keezmovies] Improve and modernize --- youtube_dl/extractor/keezmovies.py | 138 +++++++++++++++++++++-------- 1 file changed, 99 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 126ca13df..ad2f8a8c8 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -3,64 +3,124 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..aes import aes_decrypt_text +from ..compat import ( + compat_str, + compat_urllib_parse_unquote, +) from ..utils import ( - sanitized_Request, - url_basename, + determine_ext, + ExtractorError, + int_or_none, + str_to_int, + strip_or_none, ) class KeezMoviesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P[0-9]+)(?:[/?&]|$)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/(?:(?P[^/]+)-)?(?P\d+)' + _TESTS = [{ 'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', 'md5': '1c1e75d22ffa53320f45eeb07bc4cdc0', 'info_dict': { 'id': '1214711', + 'display_id': 'petite-asian-lady-mai-playing-in-bathtub', 'ext': 'mp4', 'title': 'Petite Asian Lady Mai Playing In Bathtub', - 'age_limit': 18, 'thumbnail': 're:^https?://.*\.jpg$', + 'view_count': int, + 'age_limit': 18, } - } + }, { + 'url': 'http://www.keezmovies.com/video/1214711', + 'only_matching': True, + }] - def _real_extract(self, url): - video_id = self._match_id(url) + def _extract_info(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id - req = sanitized_Request(url) - req.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(req, video_id) - - # embedded video - mobj = re.search(r'href="([^"]+)">', webpage) - if mobj: - embedded_url = mobj.group(1) - return self.url_result(embedded_url) - - video_title = self._html_search_regex( - r'

]*>([^<]+)', webpage, 'title') - flashvars = self._parse_json(self._search_regex( - r'var\s+flashvars\s*=\s*([^;]+);', webpage, 'flashvars'), video_id) + webpage = self._download_webpage( + url, display_id, headers={'Cookie': 'age_verified=1'}) formats = [] - for height in (180, 240, 480): - if flashvars.get('quality_%dp' % height): - video_url = flashvars['quality_%dp' % height] - a_format = { - 'url': video_url, - 'height': height, - 'format_id': '%dp' % height, - } - filename_parts = url_basename(video_url).split('_') - if len(filename_parts) >= 2 and re.match(r'\d+[Kk]', filename_parts[1]): - a_format['tbr'] = int(filename_parts[1][:-1]) - formats.append(a_format) + format_urls = set() - age_limit = self._rta_search(webpage) + title = None + thumbnail = None + duration = None + encrypted = False - return { + def extract_format(format_url, height=None): + if not isinstance(format_url, compat_str) or not format_url.startswith('http'): + return + if format_url in format_urls: + return + format_urls.add(format_url) + tbr = int_or_none(self._search_regex( + r'[/_](\d+)[kK][/_]', format_url, 'tbr', default=None)) + if not height: + height = int_or_none(self._search_regex( + r'[/_](\d+)[pP][/_]', format_url, 'height', default=None)) + if encrypted: + format_url = aes_decrypt_text( + video_url, title, 32).decode('utf-8') + formats.append({ + 'url': format_url, + 'format_id': '%dp' % height if height else None, + 'height': height, + 'tbr': tbr, + }) + + flashvars = self._parse_json( + self._search_regex( + r'flashvars\s*=\s*({.+?});', webpage, + 'flashvars', default='{}'), + display_id, fatal=False) + + if flashvars: + title = flashvars.get('video_title') + thumbnail = flashvars.get('image_url') + duration = int_or_none(flashvars.get('video_duration')) + encrypted = flashvars.get('encrypted') is True + for key, value in flashvars.items(): + mobj = re.search(r'quality_(\d+)[pP]', key) + if mobj: + extract_format(value, int(mobj.group(1))) + video_url = flashvars.get('video_url') + if video_url and determine_ext(video_url, None): + extract_format(video_url) + + video_url = self._html_search_regex( + r'flashvars\.video_url\s*=\s*(["\'])(?Phttp.+?)\1', + webpage, 'video url', default=None, group='url') + if video_url: + extract_format(compat_urllib_parse_unquote(video_url)) + + if not formats: + if 'title="This video is no longer available"' in webpage: + raise ExtractorError( + 'Video %s is no longer available' % video_id, expected=True) + + self._sort_formats(formats) + + if not title: + title = self._html_search_regex( + r']*>([^<]+)', webpage, 'title') + + return webpage, { 'id': video_id, - 'title': video_title, + 'display_id': display_id, + 'title': strip_or_none(title), + 'thumbnail': thumbnail, + 'duration': duration, + 'age_limit': 18, 'formats': formats, - 'age_limit': age_limit, - 'thumbnail': flashvars.get('image_url') } + + def _real_extract(self, url): + webpage, info = self._extract_info(url) + info['view_count'] = str_to_int(self._search_regex( + r'([\d,.]+) Views?', webpage, 'view count', fatal=False)) + return info From 6be17c08703ad8ec89c6fb62f31f280956694cee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 05:45:49 +0700 Subject: [PATCH 1392/3599] [mofosex] Extract all formats and modernize (Closes #10335) --- youtube_dl/extractor/mofosex.py | 81 +++++++++++++++++---------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py index e47c80119..e3bbe5aa8 100644 --- a/youtube_dl/extractor/mofosex.py +++ b/youtube_dl/extractor/mofosex.py @@ -1,53 +1,56 @@ from __future__ import unicode_literals -import os -import re - -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_urllib_parse_urlparse, +from ..utils import ( + int_or_none, + str_to_int, + unified_strdate, ) -from ..utils import sanitized_Request +from .keezmovies import KeezMoviesIE -class MofosexIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?Pmofosex\.com/videos/(?P[0-9]+)/.*?\.html)' - _TEST = { - 'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html', - 'md5': '1b2eb47ac33cc75d4a80e3026b613c5a', +class MofosexIE(KeezMoviesIE): + _VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P\d+)/(?P[^/?#&.]+)\.html' + _TESTS = [{ + 'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html', + 'md5': '39a15853632b7b2e5679f92f69b78e91', 'info_dict': { - 'id': '5018', + 'id': '318131', + 'display_id': 'amateur-teen-playing-and-masturbating-318131', 'ext': 'mp4', - 'title': 'Japanese Teen Music Video', + 'title': 'amateur teen playing and masturbating', + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20121114', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, 'age_limit': 18, } - } + }, { + # This video is no longer available + 'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - url = 'http://www.' + mobj.group('url') + webpage, info = self._extract_info(url) - req = sanitized_Request(url) - req.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(req, video_id) + view_count = str_to_int(self._search_regex( + r'VIEWS:\s*([\d,.]+)', webpage, 'view count', fatal=False)) + like_count = int_or_none(self._search_regex( + r'id=["\']amountLikes["\'][^>]*>(\d+)', webpage, + 'like count', fatal=False)) + dislike_count = int_or_none(self._search_regex( + r'id=["\']amountDislikes["\'][^>]*>(\d+)', webpage, + 'like count', fatal=False)) + upload_date = unified_strdate(self._html_search_regex( + r'Added:([^<]+)', webpage, 'upload date', fatal=False)) - video_title = self._html_search_regex(r'

(.+?)<', webpage, 'title') - video_url = compat_urllib_parse_unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url')) - path = compat_urllib_parse_urlparse(video_url).path - extension = os.path.splitext(path)[1][1:] - format = path.split('/')[5].split('_')[:2] - format = '-'.join(format) + info.update({ + 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'upload_date': upload_date, + 'thumbnail': self._og_search_thumbnail(webpage), + }) - age_limit = self._rta_search(webpage) - - return { - 'id': video_id, - 'title': video_title, - 'url': video_url, - 'ext': extension, - 'format': format, - 'format_id': format, - 'age_limit': age_limit, - } + return info From 8804f10e6b580db38df7301a174cb48ea374f9eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 05:46:45 +0700 Subject: [PATCH 1393/3599] [tube8] Modernize --- youtube_dl/extractor/tube8.py | 60 ++++++----------------------------- 1 file changed, 10 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 1d9271d1e..4053f6c21 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -1,18 +1,13 @@ from __future__ import unicode_literals -import re - -from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, - sanitized_Request, str_to_int, ) -from ..aes import aes_decrypt_text +from .keezmovies import KeezMoviesIE -class Tube8IE(InfoExtractor): +class Tube8IE(KeezMoviesIE): _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P[^/]+)/(?P\d+)' _TESTS = [{ 'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', @@ -33,47 +28,17 @@ class Tube8IE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') + webpage, info = self._extract_info(url) - req = sanitized_Request(url) - req.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(req, display_id) + if not info['title']: + info['title'] = self._html_search_regex( + r'videoTitle\s*=\s*"([^"]+)', webpage, 'title') - flashvars = self._parse_json( - self._search_regex( - r'flashvars\s*=\s*({.+?});\r?\n', webpage, 'flashvars'), - video_id) - - formats = [] - for key, video_url in flashvars.items(): - if not isinstance(video_url, compat_str) or not video_url.startswith('http'): - continue - height = self._search_regex( - r'quality_(\d+)[pP]', key, 'height', default=None) - if not height: - continue - if flashvars.get('encrypted') is True: - video_url = aes_decrypt_text( - video_url, flashvars['video_title'], 32).decode('utf-8') - formats.append({ - 'url': video_url, - 'format_id': '%sp' % height, - 'height': int(height), - }) - self._sort_formats(formats) - - thumbnail = flashvars.get('image_url') - - title = self._html_search_regex( - r'videoTitle\s*=\s*"([^"]+)', webpage, 'title') description = self._html_search_regex( r'>Description:\s*(.+?)\s*<', webpage, 'description', fatal=False) uploader = self._html_search_regex( r'\s*(.+?)\s*<', webpage, 'uploader', fatal=False) - duration = int_or_none(flashvars.get('video_duration')) like_count = int_or_none(self._search_regex( r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False)) @@ -86,18 +51,13 @@ class Tube8IE(InfoExtractor): r'(\d+)', webpage, 'comment count', fatal=False)) - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, + info.update({ 'description': description, - 'thumbnail': thumbnail, 'uploader': uploader, - 'duration': duration, 'view_count': view_count, 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, - 'age_limit': 18, - 'formats': formats, - } + }) + + return info From ab19b46b88bb54971b973176976d8d189222a6d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 06:02:12 +0700 Subject: [PATCH 1394/3599] [extremetube] Modernize --- youtube_dl/extractor/extremetube.py | 73 +++++++---------------------- 1 file changed, 16 insertions(+), 57 deletions(-) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index 3403581fd..b4fd9334a 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -1,22 +1,17 @@ from __future__ import unicode_literals -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - sanitized_Request, - str_to_int, -) +from ..utils import str_to_int +from .keezmovies import KeezMoviesIE -class ExtremeTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P[^/#?&]+)' +class ExtremeTubeIE(KeezMoviesIE): + _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?:(?P[^/]+)-)(?P\d+)' _TESTS = [{ 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', - 'md5': '344d0c6d50e2f16b06e49ca011d8ac69', + 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0', 'info_dict': { - 'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431', + 'id': '652431', + 'display_id': 'music-video-14-british-euro-brit-european-cumshots-swallow', 'ext': 'mp4', 'title': 'Music Video 14 british euro brit european cumshots swallow', 'uploader': 'unknown', @@ -35,58 +30,22 @@ class ExtremeTubeIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + webpage, info = self._extract_info(url) - req = sanitized_Request(url) - req.add_header('Cookie', 'age_verified=1') - webpage = self._download_webpage(req, video_id) + if not info['title']: + info['title'] = self._search_regex( + r']+title="([^"]+)"[^>]*>', webpage, 'title') - video_title = self._html_search_regex( - r'

]*?title="([^"]+)"[^>]*>', webpage, 'title') uploader = self._html_search_regex( r'Uploaded by:\s*\s*(.+?)\s*', webpage, 'uploader', fatal=False) - view_count = str_to_int(self._html_search_regex( + view_count = str_to_int(self._search_regex( r'Views:\s*\s*([\d,\.]+)', webpage, 'view count', fatal=False)) - flash_vars = self._parse_json( - self._search_regex( - r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flash vars'), - video_id) - - formats = [] - for quality_key, video_url in flash_vars.items(): - height = int_or_none(self._search_regex( - r'quality_(\d+)[pP]$', quality_key, 'height', default=None)) - if not height: - continue - f = { - 'url': video_url, - } - mobj = re.search( - r'/(?P\d{3,4})[pP]_(?P\d+)[kK]_\d+', video_url) - if mobj: - height = int(mobj.group('height')) - bitrate = int(mobj.group('bitrate')) - f.update({ - 'format_id': '%dp-%dk' % (height, bitrate), - 'height': height, - 'tbr': bitrate, - }) - else: - f.update({ - 'format_id': '%dp' % height, - 'height': height, - }) - formats.append(f) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': video_title, - 'formats': formats, + info.update({ 'uploader': uploader, 'view_count': view_count, - 'age_limit': 18, - } + }) + + return info From a44694ab4e1ee6ac496ea09c3759923c03b9430c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 06:19:22 +0700 Subject: [PATCH 1395/3599] [ChangeLog] Actualize --- ChangeLog | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 32504dab5..8f27019c1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,28 @@ version +Core ++ Add _get_netrc_login_info + Extractors +* [mofosex] Extract all formats (#10335) ++ [generic] Add support for vbox7 embeds ++ [vbox7] Add support for embed URLs ++ [viafree] Add extractor (#10358) ++ [mtg] Add support for viafree URLs (#10358) +* [theplatform] Extract all subtitles per language ++ [xvideos] Fix HLS extraction (#10356) ++ [amcnetworks] Add extractor ++ [bbc:playlist] Add support for pagination (#10349) ++ [fxnetworks] Add extractor (#9462) * [cbslocal] Fix extraction for SendtoNews-based videos * [sendtonews] Fix extraction -* [jwplatform] Now can parse video_id from JWPlayer data +* [jwplatform] Extract video id from JWPlayer data +- [zippcast] Remove extractor (#10332) ++ [viceland] Add extractor (#8799) ++ [adobepass] Add base extractor for Adobe Pass Authentication +* [life:embed] Improve extraction +* [vgtv] Detect geo restricted videos (#10348) ++ [uplynk] Add extractor * [xiami] Fix extraction (#10342) From b3d7dce42952cf23b8f9ea883c75736dadfee12e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 06:21:21 +0700 Subject: [PATCH 1396/3599] release 2016.08.17 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 9 +++++++-- youtube_dl/version.py | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1e0d99b43..ae28d83d5 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.13*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.13** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.17** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.13 +[debug] youtube-dl version 2016.08.17 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 8f27019c1..354306a97 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.08.17 Core + Add _get_netrc_login_info diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 56fc41a40..189b9301d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -35,6 +35,7 @@ - **AlJazeera** - **Allocine** - **AlphaPorno** + - **AMCNetworks** - **AnimeOnDemand** - **anitube.se** - **AnySex** @@ -247,6 +248,7 @@ - **Funimation** - **FunnyOrDie** - **Fusion** + - **FXNetworks** - **GameInformer** - **GameOne** - **gameone:playlist** @@ -398,6 +400,7 @@ - **Moviezine** - **MPORA** - **MSN** + - **mtg**: MTG services - **MTV** - **mtv.de** - **mtvservices:embedded** @@ -731,7 +734,6 @@ - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska - **tvp:series** - - **TVPlay**: TV3Play and related services - **Tweakers** - **twitch:chapter** - **twitch:clips** @@ -748,6 +750,8 @@ - **UDNEmbed**: 聯合影音 - **Unistra** - **uol.com.br** + - **uplynk** + - **uplynk:preplay** - **Urort**: NRK P3 Urørt - **URPlay** - **USAToday** @@ -765,7 +769,9 @@ - **VevoPlaylist** - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** + - **Viafree** - **Vice** + - **Viceland** - **ViceShow** - **Vidbit** - **Viddler** @@ -887,4 +893,3 @@ - **ZDFChannel** - **zingmp3:album**: mp3.zing.vn albums - **zingmp3:song**: mp3.zing.vn songs - - **ZippCast** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index cc93d22aa..cf5950117 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.13' +__version__ = '2016.08.17' From 92cd9fd56574f22087a8f8df52192df1d4c11a21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 07:01:32 +0700 Subject: [PATCH 1397/3599] [keezmovies] Make display_id optional --- youtube_dl/extractor/keezmovies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index ad2f8a8c8..b002c0dd1 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -39,7 +39,8 @@ class KeezMoviesIE(InfoExtractor): def _extract_info(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - display_id = mobj.group('display_id') or video_id + display_id = (mobj.group('display_id') if 'display_id' + in mobj.groupdict() else None) or mobj.group('id') webpage = self._download_webpage( url, display_id, headers={'Cookie': 'age_verified=1'}) From b505e98784b2c1cc07f734e9709702ee9d01287e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Aug 2016 07:02:13 +0700 Subject: [PATCH 1398/3599] [extremetube] Revert display_id --- youtube_dl/extractor/extremetube.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index b4fd9334a..445f9438d 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -5,13 +5,12 @@ from .keezmovies import KeezMoviesIE class ExtremeTubeIE(KeezMoviesIE): - _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?:(?P[^/]+)-)(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?P[^/#?&]+)' _TESTS = [{ 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0', 'info_dict': { - 'id': '652431', - 'display_id': 'music-video-14-british-euro-brit-european-cumshots-swallow', + 'id': 'music-video-14-british-euro-brit-european-cumshots-swallow-652431', 'ext': 'mp4', 'title': 'Music Video 14 british euro brit european cumshots swallow', 'uploader': 'unknown', From 7273e5849b27cb7d0f4d5f40e7801cab2da85ae3 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 17 Aug 2016 11:03:09 +0100 Subject: [PATCH 1399/3599] [discoverygo] extend _VALID_URL to support other networks --- youtube_dl/extractor/discoverygo.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index adb68b96c..cba709935 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -11,7 +11,17 @@ from ..utils import ( class DiscoveryGoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?discoverygo\.com/(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'''(?x)https?://(?:www\.)?(?: + discovery| + investigationdiscovery| + discoverylife| + animalplanet| + ahctv| + destinationamerica| + sciencechannel| + tlc| + velocitychannel + )go\.com/(?:[^/]+/)*(?P[^/?#&]+)''' _TEST = { 'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/', 'info_dict': { From 4e9fee101508fe90c5b103738d1b6458e40affd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 04:37:14 +0700 Subject: [PATCH 1400/3599] [hgtvcom:show] Add extractor (Closes #10365) --- youtube_dl/extractor/extractors.py | 5 ++++- youtube_dl/extractor/hgtv.py | 31 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 55c639158..e61bb11c3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -324,7 +324,10 @@ from .heise import HeiseIE from .hellporno import HellPornoIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE -from .hgtv import HGTVIE +from .hgtv import ( + HGTVIE, + HGTVComShowIE, +) from .historicfilms import HistoricFilmsIE from .hitbox import HitboxIE, HitboxLiveIE from .hornbunny import HornBunnyIE diff --git a/youtube_dl/extractor/hgtv.py b/youtube_dl/extractor/hgtv.py index c3f0733cf..69543bff2 100644 --- a/youtube_dl/extractor/hgtv.py +++ b/youtube_dl/extractor/hgtv.py @@ -46,3 +46,34 @@ class HGTVIE(InfoExtractor): 'episode_number': int_or_none(embed_vars.get('episode')), 'ie_key': 'ThePlatform', } + + +class HGTVComShowIE(InfoExtractor): + IE_NAME = 'hgtv.com:show' + _VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P[^/?#&]+)' + _TEST = { + 'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-videos', + 'info_dict': { + 'id': 'flip-or-flop-full-episodes-videos', + 'title': 'Flip or Flop Full Episodes', + }, + 'playlist_mincount': 15, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + config = self._parse_json( + self._search_regex( + r'(?s)data-module=["\']video["\'][^>]*>.*?]+type=["\']text/x-config["\'][^>]*>(.+?) Date: Thu, 18 Aug 2016 04:39:31 +0700 Subject: [PATCH 1401/3599] [keezmovies] PEP 8 --- youtube_dl/extractor/keezmovies.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index b002c0dd1..588a4d0ec 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -39,8 +39,9 @@ class KeezMoviesIE(InfoExtractor): def _extract_info(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - display_id = (mobj.group('display_id') if 'display_id' - in mobj.groupdict() else None) or mobj.group('id') + display_id = (mobj.group('display_id') + if 'display_id' in mobj.groupdict() + else None) or mobj.group('id') webpage = self._download_webpage( url, display_id, headers={'Cookie': 'age_verified=1'}) From 08a42f9c741aa37a599e6fe54ec8b9660df117e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 05:22:23 +0700 Subject: [PATCH 1402/3599] [vk] Fix authentication on python3 --- youtube_dl/extractor/vk.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 3ee66e23e..634d17d91 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -52,8 +52,9 @@ class VKBaseIE(InfoExtractor): # what actually happens. # We will workaround this VK issue by resetting the remixlhk cookie to # the first one manually. - cookies = url_handle.headers.get('Set-Cookie') - if cookies: + for header, cookies in url_handle.headers.items(): + if header.lower() != 'set-cookie': + continue if sys.version_info[0] >= 3: cookies = cookies.encode('iso-8859-1') cookies = cookies.decode('utf-8') @@ -61,6 +62,7 @@ class VKBaseIE(InfoExtractor): if remixlhk: value, domain = remixlhk.groups() self._set_cookie(domain, 'remixlhk', value) + break login_page = self._download_webpage( 'https://login.vk.com/?act=login', None, From 51815886a98503593524ec6ffa778ff19d840e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 06:14:05 +0700 Subject: [PATCH 1403/3599] [vk:wallpost] Fix audio extraction --- youtube_dl/extractor/vk.py | 66 ++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 634d17d91..cd22df25a 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -1,6 +1,7 @@ # encoding: utf-8 from __future__ import unicode_literals +import collections import re import json import sys @@ -16,7 +17,6 @@ from ..utils import ( get_element_by_class, int_or_none, orderedSet, - parse_duration, remove_start, str_to_int, unescapeHTML, @@ -447,6 +447,9 @@ class VKWallPostIE(VKBaseIE): 'skip_download': True, }, }], + 'params': { + 'usenetrc': True, + }, 'skip': 'Requires vk account credentials', }, { # single YouTube embed, no leading - @@ -456,6 +459,9 @@ class VKWallPostIE(VKBaseIE): 'title': 'Sergey Gorbunov - Wall post 85155021_6319', }, 'playlist_count': 1, + 'params': { + 'usenetrc': True, + }, 'skip': 'Requires vk account credentials', }, { # wall page URL @@ -483,37 +489,41 @@ class VKWallPostIE(VKBaseIE): raise ExtractorError('VK said: %s' % error, expected=True) description = clean_html(get_element_by_class('wall_post_text', webpage)) - uploader = clean_html(get_element_by_class( - 'fw_post_author', webpage)) or self._og_search_description(webpage) + uploader = clean_html(get_element_by_class('author', webpage)) thumbnail = self._og_search_thumbnail(webpage) entries = [] - for audio in re.finditer(r'''(?sx) - ]+ - id=(?P["\'])audio_info(?P\d+_\d+).*?(?P=q1)[^>]+ - value=(?P["\'])(?Phttp.+?)(?P=q2) - .+? - ''', webpage): - audio_html = audio.group(0) - audio_id = audio.group('id') - duration = parse_duration(get_element_by_class('duration', audio_html)) - track = self._html_search_regex( - r']+id=["\']title%s[^>]*>([^<]+)' % audio_id, - audio_html, 'title', default=None) - artist = self._html_search_regex( - r'>([^<]+)\s*&ndash', audio_html, - 'artist', default=None) - entries.append({ - 'id': audio_id, - 'url': audio.group('url'), - 'title': '%s - %s' % (artist, track) if artist and track else audio_id, - 'thumbnail': thumbnail, - 'duration': duration, - 'uploader': uploader, - 'artist': artist, - 'track': track, - }) + audio_ids = re.findall(r'data-full-id=["\'](\d+_\d+)', webpage) + if audio_ids: + al_audio = self._download_webpage( + 'https://vk.com/al_audio.php', post_id, + note='Downloading audio info', fatal=False, + data=urlencode_postdata({ + 'act': 'reload_audio', + 'al': '1', + 'ids': ','.join(audio_ids) + })) + if al_audio: + Audio = collections.namedtuple( + 'Audio', ['id', 'user_id', 'url', 'track', 'artist', 'duration']) + audios = self._parse_json( + self._search_regex( + r'(.+?)', al_audio, 'audios', default='[]'), + post_id, fatal=False, transform_source=unescapeHTML) + if isinstance(audios, list): + for audio in audios: + a = Audio._make(audio[:6]) + entries.append({ + 'id': '%s_%s' % (a.user_id, a.id), + 'url': a.url, + 'title': '%s - %s' % (a.artist, a.track) if a.artist and a.track else a.id, + 'thumbnail': thumbnail, + 'duration': a.duration, + 'uploader': uploader, + 'artist': a.artist, + 'track': a.track, + }) for video in re.finditer( r']+href=(["\'])(?P/video(?:-?[\d_]+).*?)\1', webpage): From b0c8f2e9c8946f8aab4be0d1435e504aac0d317f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Wed, 17 Aug 2016 12:45:24 +0200 Subject: [PATCH 1404/3599] [DBTV:generic] Add support for embeds --- youtube_dl/extractor/dbtv.py | 6 ++++++ youtube_dl/extractor/generic.py | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index caff8842e..73dba5e2a 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -38,6 +38,12 @@ class DBTVIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [url for _, url in re.findall( + r']+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/lazyplayer/\d+.*?)\1', + webpage)] + def _real_extract(self, url): video_id, display_id = re.match(self._VALID_URL, url).groups() diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 197ab9531..1b71f7ac8 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -73,6 +73,7 @@ from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE from .soundcloud import SoundcloudIE from .vbox7 import Vbox7IE +from .dbtv import DBTVIE class GenericIE(InfoExtractor): @@ -1386,6 +1387,11 @@ class GenericIE(InfoExtractor): }, 'add_ie': [Vbox7IE.ie_key()], }, + { + # DBTV embeds + 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/', + 'playlist_mincount': 3, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2257,6 +2263,11 @@ class GenericIE(InfoExtractor): if vbox7_url: return self.url_result(vbox7_url, Vbox7IE.ie_key()) + # Look for DBTV embeds + dbtv_urls = DBTVIE._extract_urls(webpage) + if dbtv_urls: + return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') From b0d578ff7b54c521776cf8d1e050dc198bbc26e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 21:30:55 +0700 Subject: [PATCH 1405/3599] [dbtv] Relax embed regex --- youtube_dl/extractor/dbtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py index 73dba5e2a..6d880d43d 100644 --- a/youtube_dl/extractor/dbtv.py +++ b/youtube_dl/extractor/dbtv.py @@ -41,7 +41,7 @@ class DBTVIE(InfoExtractor): @staticmethod def _extract_urls(webpage): return [url for _, url in re.findall( - r']+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/lazyplayer/\d+.*?)\1', + r']+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1', webpage)] def _real_extract(self, url): From fd3ec986a4217319d0cc345c5e2eb910d90be6f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 21:35:41 +0700 Subject: [PATCH 1406/3599] [generic] Fix dbtv test (Closes #10364) --- youtube_dl/extractor/generic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1b71f7ac8..506892b11 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1390,6 +1390,10 @@ class GenericIE(InfoExtractor): { # DBTV embeds 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/', + 'info_dict': { + 'id': '43254897', + 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans', + }, 'playlist_mincount': 3, }, # { From 13585d7682ef6351bfcd463cf1802bc8fbadaf43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 23:32:00 +0700 Subject: [PATCH 1407/3599] [utils] Recognize lowercase units in parse_filesize --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 74fcf91c0..cb578cd53 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -817,7 +817,9 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_filesize('2 MiB'), 2097152) self.assertEqual(parse_filesize('5 GB'), 5000000000) self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) + self.assertEqual(parse_filesize('1.2tb'), 1200000000000) self.assertEqual(parse_filesize('1,24 KB'), 1240) + self.assertEqual(parse_filesize('1,24 kb'), 1240) def test_parse_count(self): self.assertEqual(parse_count(None), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b3b687a31..35362e767 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1508,34 +1508,42 @@ def parse_filesize(s): 'KB': 1000, 'kB': 1024, 'Kb': 1000, + 'kb': 1000, 'MiB': 1024 ** 2, 'MB': 1000 ** 2, 'mB': 1024 ** 2, 'Mb': 1000 ** 2, + 'mb': 1000 ** 2, 'GiB': 1024 ** 3, 'GB': 1000 ** 3, 'gB': 1024 ** 3, 'Gb': 1000 ** 3, + 'gb': 1000 ** 3, 'TiB': 1024 ** 4, 'TB': 1000 ** 4, 'tB': 1024 ** 4, 'Tb': 1000 ** 4, + 'tb': 1000 ** 4, 'PiB': 1024 ** 5, 'PB': 1000 ** 5, 'pB': 1024 ** 5, 'Pb': 1000 ** 5, + 'pb': 1000 ** 5, 'EiB': 1024 ** 6, 'EB': 1000 ** 6, 'eB': 1024 ** 6, 'Eb': 1000 ** 6, + 'eb': 1000 ** 6, 'ZiB': 1024 ** 7, 'ZB': 1000 ** 7, 'zB': 1024 ** 7, 'Zb': 1000 ** 7, + 'zb': 1000 ** 7, 'YiB': 1024 ** 8, 'YB': 1000 ** 8, 'yB': 1024 ** 8, 'Yb': 1000 ** 8, + 'yb': 1000 ** 8, } return lookup_unit_table(_UNIT_TABLE, s) From 850837b67ada7cf0a139117a7335aa40990cd0d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 23:52:41 +0700 Subject: [PATCH 1408/3599] [porncom] Add extractor (Closes #2251, closes #10251) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/porncom.py | 89 ++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 youtube_dl/extractor/porncom.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e61bb11c3..6c5d46015 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -642,6 +642,7 @@ from .podomatic import PodomaticIE from .pokemon import PokemonIE from .polskieradio import PolskieRadioIE from .porn91 import Porn91IE +from .porncom import PornComIE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py new file mode 100644 index 000000000..4baf79688 --- /dev/null +++ b/youtube_dl/extractor/porncom.py @@ -0,0 +1,89 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + int_or_none, + js_to_json, + parse_filesize, + str_to_int, +) + + +class PornComIE(InfoExtractor): + _VALID_URL = r'https?://(?:[a-zA-Z]+\.)?porn\.com/videos/(?:(?P[^/]+)-)?(?P\d+)' + _TESTS = [{ + 'url': 'http://www.porn.com/videos/teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec-2603339', + 'md5': '3f30ce76267533cd12ba999263156de7', + 'info_dict': { + 'id': '2603339', + 'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec', + 'ext': 'mp4', + 'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 551, + 'view_count': int, + 'age_limit': 18, + }, + }, { + 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id + + webpage = self._download_webpage(url, display_id) + + config = self._parse_json( + self._search_regex( + r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*=', + webpage, 'config', default='{}'), + display_id, transform_source=js_to_json, fatal=False) + + if config: + title = config['title'] + formats = [{ + 'url': stream['url'], + 'format_id': stream.get('id'), + 'height': int_or_none(self._search_regex( + r'^(\d+)[pP]', stream.get('id') or '', 'height', default=None)) + } for stream in config['streams'] if stream.get('url')] + thumbnail = (compat_urlparse.urljoin( + config['thumbCDN'], config['poster']) + if config.get('thumbCDN') and config.get('poster') else None) + duration = int_or_none(config.get('length')) + else: + title = self._search_regex( + (r'([^<]+)', r']*>([^<]+)

'), + webpage, 'title') + formats = [{ + 'url': compat_urlparse.urljoin(url, format_url), + 'format_id': '%sp' % height, + 'height': int(height), + 'filesize_approx': parse_filesize(filesize), + } for format_url, height, filesize in re.findall( + r']+href="(/download/[^"]+)">MPEG4 (\d+)p]*>(\d+\s+[a-zA-Z]+)<', + webpage)] + thumbnail = None + duration = None + + self._sort_formats(formats) + + view_count = str_to_int(self._search_regex( + r'class=["\']views["\'][^>]*>

([\d,.]+)', webpage, 'view count')) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'view_count': view_count, + 'formats': formats, + 'age_limit': 18, + } From 8b2dc4c3287e5e90f339af687f3a272818c94fea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 18 Aug 2016 23:59:13 +0700 Subject: [PATCH 1409/3599] [options] Remove output template description from --help Same reasons as for --format --- youtube_dl/options.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index d32a9e32c..5d62deef4 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -628,22 +628,7 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '-o', '--output', dest='outtmpl', metavar='TEMPLATE', - help=('Output filename template. Use %(title)s to get the title, ' - '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' - '%(autonumber)s to get an automatically incremented number, ' - '%(ext)s for the filename extension, ' - '%(format)s for the format description (like "22 - 1280x720" or "HD"), ' - '%(format_id)s for the unique id of the format (like YouTube\'s itags: "137"), ' - '%(upload_date)s for the upload date (YYYYMMDD), ' - '%(extractor)s for the provider (youtube, metacafe, etc), ' - '%(id)s for the video id, ' - '%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, ' - '%(playlist_index)s for the position in the playlist. ' - '%(height)s and %(width)s for the width and height of the video format. ' - '%(resolution)s for a textual description of the resolution of the video format. ' - '%% for a literal percent. ' - 'Use - to output to stdout. Can also be used to download to a different directory, ' - 'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) + help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info')) filesystem.add_option( '--autonumber-size', dest='autonumber_size', metavar='NUMBER', From 93a63b36f1c52a9981050e393d1876d6162abb49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 19 Aug 2016 00:13:24 +0700 Subject: [PATCH 1410/3599] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 354306a97..7e8bb834d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +version + +Core +- Remove output template description from --help +* Recognize lowercase units in parse_filesize + +Extractors ++ [porncom] Add extractor for porn.com (#2251, #10251) ++ [generic] Add support for DBTV embeds +* [vk:wallpost] Fix audio extraction for new site layout +* [vk] Fix authentication ++ [hgtvcom:show] Add extractor for hgtv.com shows (#10365) ++ [discoverygo] Add support for another GO network sites + + version 2016.08.17 Core From bd1bcd3ea079889cfd7cd44c0ea750ac9d432e41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 19 Aug 2016 00:15:12 +0700 Subject: [PATCH 1411/3599] release 2016.08.19 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 28 ++-------------------------- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 5 files changed, 9 insertions(+), 31 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index ae28d83d5..7af3c7099 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.17** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.19** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.17 +[debug] youtube-dl version 2016.08.19 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 7e8bb834d..e99ffcec6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.08.19 Core - Remove output template description from --help diff --git a/README.md b/README.md index cabbbef76..952db7abb 100644 --- a/README.md +++ b/README.md @@ -201,32 +201,8 @@ which means you can modify it, redistribute it or use it however you like. -a, --batch-file FILE File containing URLs to download ('-' for stdin) --id Use only video ID in file name - -o, --output TEMPLATE Output filename template. Use %(title)s to - get the title, %(uploader)s for the - uploader name, %(uploader_id)s for the - uploader nickname if different, - %(autonumber)s to get an automatically - incremented number, %(ext)s for the - filename extension, %(format)s for the - format description (like "22 - 1280x720" or - "HD"), %(format_id)s for the unique id of - the format (like YouTube's itags: "137"), - %(upload_date)s for the upload date - (YYYYMMDD), %(extractor)s for the provider - (youtube, metacafe, etc), %(id)s for the - video id, %(playlist_title)s, - %(playlist_id)s, or %(playlist)s (=title if - present, ID otherwise) for the playlist the - video is in, %(playlist_index)s for the - position in the playlist. %(height)s and - %(width)s for the width and height of the - video format. %(resolution)s for a textual - description of the resolution of the video - format. %% for a literal percent. Use - to - output to stdout. Can also be used to - download to a different directory, for - example with -o '/my/downloads/%(uploader)s - /%(title)s-%(id)s.%(ext)s' . + -o, --output TEMPLATE Output filename template, see the "OUTPUT + TEMPLATE" for all the info --autonumber-size NUMBER Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 189b9301d..edf192138 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -279,6 +279,7 @@ - **Helsinki**: helsinki.fi - **HentaiStigma** - **HGTV** + - **hgtv.com:show** - **HistoricFilms** - **history:topic**: History.com Topic - **hitbox** @@ -523,6 +524,7 @@ - **podomatic** - **Pokemon** - **PolskieRadio** + - **PornCom** - **PornHd** - **PornHub**: PornHub and Thumbzilla - **PornHubPlaylist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index cf5950117..691f2c591 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.17' +__version__ = '2016.08.19' From 9e5751b9fe72f7425e4cb3f22a56b6a95b59e41d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 19 Aug 2016 01:13:45 +0700 Subject: [PATCH 1412/3599] [globo:article] Relax _VALID_URL and video id regex (Closes #10379) --- youtube_dl/extractor/globo.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 3de8356f6..dbacbfc61 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -396,12 +396,12 @@ class GloboIE(InfoExtractor): class GloboArticleIE(InfoExtractor): - _VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P[^/]+)\.html' + _VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P[^/]+)(?:\.html)?' _VIDEOID_REGEXES = [ r'\bdata-video-id=["\'](\d{7,})', r'\bdata-player-videosids=["\'](\d{7,})', - r'\bvideosIDs\s*:\s*["\'](\d{7,})', + r'\bvideosIDs\s*:\s*["\']?(\d{7,})', r'\bdata-id=["\'](\d{7,})', r']+\bid=["\'](\d{7,})', ] @@ -423,6 +423,9 @@ class GloboArticleIE(InfoExtractor): }, { 'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html', 'only_matching': True, + }, { + 'url': 'http://oglobo.globo.com/rio/a-amizade-entre-um-entregador-de-farmacia-um-piano-19946271', + 'only_matching': True, }] @classmethod From e4659b45474acb563db0ab4284abdfc80837307e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 19 Aug 2016 20:37:17 +0800 Subject: [PATCH 1413/3599] [utils] Correct octal/hexadecimal number detection in js_to_json --- ChangeLog | 6 ++++++ test/test_utils.py | 3 +++ youtube_dl/utils.py | 6 +++--- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index e99ffcec6..98a3dbca3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* Fix js_to_json(): correct octal or hexadecimal number detection + + version 2016.08.19 Core diff --git a/test/test_utils.py b/test/test_utils.py index cb578cd53..b83da93b4 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -712,6 +712,9 @@ class TestUtil(unittest.TestCase): inp = '''{"foo":101}''' self.assertEqual(js_to_json(inp), '''{"foo":101}''') + inp = '''{"duration": "00:01:07"}''' + self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''') + def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 35362e767..0c36c1b80 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2038,14 +2038,14 @@ def js_to_json(code): }.get(m.group(0), m.group(0)), v[1:-1]) INTEGER_TABLE = ( - (r'^0[xX][0-9a-fA-F]+', 16), - (r'^0+[0-7]+', 8), + (r'^(0[xX][0-9a-fA-F]+)\s*:?$', 16), + (r'^(0+[0-7]+)\s*:?$', 8), ) for regex, base in INTEGER_TABLE: im = re.match(regex, v) if im: - i = int(im.group(0), base) + i = int(im.group(1), base) return '"%d":' % i if v.endswith(':') else '%d' % i return '"%s"' % v From b82232036a019e340b715779108c3f4caea8a78d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 19 Aug 2016 20:39:28 +0800 Subject: [PATCH 1414/3599] [n-tv.de] Fix extraction (closes #10331) --- ChangeLog | 3 +++ youtube_dl/extractor/ntvde.py | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 98a3dbca3..6281fe325 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ version Core * Fix js_to_json(): correct octal or hexadecimal number detection +Extractors +* [n-tv.de] Fix extraction (#10331) + version 2016.08.19 diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py index a83e85cb8..d28a81542 100644 --- a/youtube_dl/extractor/ntvde.py +++ b/youtube_dl/extractor/ntvde.py @@ -1,6 +1,8 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( @@ -40,8 +42,8 @@ class NTVDeIE(InfoExtractor): timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp')) vdata = self._parse_json(self._search_regex( r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);', - webpage, 'player data'), - video_id, transform_source=js_to_json) + webpage, 'player data'), video_id, + transform_source=lambda s: js_to_json(re.sub(r'advertising:\s*{[^}]+},', '', s))) duration = parse_duration(vdata.get('duration')) formats = [] From 55af45fcab4295a92d56180cdbebe7b47e094bc3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 19 Aug 2016 23:12:30 +0800 Subject: [PATCH 1415/3599] [radiobremen] Update _TEST (closes #10337) --- youtube_dl/extractor/radiobremen.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 0cbb15f08..19a751da0 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -13,15 +13,15 @@ class RadioBremenIE(InfoExtractor): IE_NAME = 'radiobremen' _TEST = { - 'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720', + 'url': 'http://www.radiobremen.de/mediathek/?id=141876', 'info_dict': { - 'id': '114720', + 'id': '141876', 'ext': 'mp4', - 'duration': 1685, + 'duration': 178, 'width': 512, - 'title': 'buten un binnen vom 22. Dezember', + 'title': 'Druck auf Patrick Öztürk', 'thumbnail': 're:https?://.*\.jpg$', - 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++', + 'description': 'Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen.', }, } From 520251c093f5e0fe6af5e57203a0452aef0682ac Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 19 Aug 2016 23:53:47 +0800 Subject: [PATCH 1416/3599] [extractor/common] Recognize m3u8 manifests in HTML5 multimedia tags --- ChangeLog | 1 + youtube_dl/extractor/common.py | 36 +++++++++++++++++++++++----------- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6281fe325..450351231 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core +* Support m3u8 manifests in HTML5 multimedia tags * Fix js_to_json(): correct octal or hexadecimal number detection Extractors diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9427ff449..07d58afe7 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1695,7 +1695,7 @@ class InfoExtractor(object): self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats - def _parse_html5_media_entries(self, base_url, webpage): + def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None): def absolute_url(video_url): return compat_urlparse.urljoin(base_url, video_url) @@ -1710,6 +1710,21 @@ class InfoExtractor(object): return f return {} + def _media_formats(src, cur_media_type): + full_url = absolute_url(src) + if determine_ext(full_url) == 'm3u8': + is_plain_url = False + formats = self._extract_m3u8_formats( + full_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id=m3u8_id) + else: + is_plain_url = True + formats = [{ + 'url': full_url, + 'vcodec': 'none' if cur_media_type == 'audio' else None, + }] + return is_plain_url, formats + entries = [] for media_tag, media_type, media_content in re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage): media_info = { @@ -1719,10 +1734,8 @@ class InfoExtractor(object): media_attributes = extract_attributes(media_tag) src = media_attributes.get('src') if src: - media_info['formats'].append({ - 'url': absolute_url(src), - 'vcodec': 'none' if media_type == 'audio' else None, - }) + _, formats = _media_formats(src) + media_info['formats'].extend(formats) media_info['thumbnail'] = media_attributes.get('poster') if media_content: for source_tag in re.findall(r']+>', media_content): @@ -1730,12 +1743,13 @@ class InfoExtractor(object): src = source_attributes.get('src') if not src: continue - f = parse_content_type(source_attributes.get('type')) - f.update({ - 'url': absolute_url(src), - 'vcodec': 'none' if media_type == 'audio' else None, - }) - media_info['formats'].append(f) + is_plain_url, formats = _media_formats(src, media_type) + if is_plain_url: + f = parse_content_type(source_attributes.get('type')) + f.update(formats[0]) + media_info['formats'].append(f) + else: + media_info['formats'].extend(formats) for track_tag in re.findall(r']+>', media_content): track_attributes = extract_attributes(track_tag) kind = track_attributes.get('kind') From ecc90093f9c3793439832f4c9d279605da3489a7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 19 Aug 2016 23:56:09 +0800 Subject: [PATCH 1417/3599] [vuclip] Adapt to the new API and update _TEST --- youtube_dl/extractor/vuclip.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py index b73da5cd0..55e087bdb 100644 --- a/youtube_dl/extractor/vuclip.py +++ b/youtube_dl/extractor/vuclip.py @@ -17,12 +17,12 @@ class VuClipIE(InfoExtractor): _VALID_URL = r'https?://(?:m\.)?vuclip\.com/w\?.*?cid=(?P[0-9]+)' _TEST = { - 'url': 'http://m.vuclip.com/w?cid=922692425&fid=70295&z=1010&nvar&frm=index.html', + 'url': 'http://m.vuclip.com/w?cid=1129900602&bu=8589892792&frm=w&z=34801&op=0&oc=843169247§ion=recommend', 'info_dict': { - 'id': '922692425', + 'id': '1129900602', 'ext': '3gp', - 'title': 'The Toy Soldiers - Hollywood Movie Trailer', - 'duration': 177, + 'title': 'Top 10 TV Convicts', + 'duration': 733, } } @@ -54,7 +54,7 @@ class VuClipIE(InfoExtractor): 'url': video_url, }] else: - formats = self._parse_html5_media_entries(url, webpage)[0]['formats'] + formats = self._parse_html5_media_entries(url, webpage, video_id)[0]['formats'] title = remove_end(self._html_search_regex( r'(.*?)-\s*Vuclip', webpage, 'title').strip(), ' - Video') From a9a3b4a081a6793f0dd0b40be8429a2aa3c1c36d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 20 Aug 2016 00:08:23 +0800 Subject: [PATCH 1418/3599] [miomio] Adapt to the new API and update _TESTS The test case is from #9680 --- youtube_dl/extractor/miomio.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index 937ba0f28..ec1b4c4fe 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -25,10 +25,7 @@ class MioMioIE(InfoExtractor): 'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕', 'duration': 5923, }, - 'params': { - # The server provides broken file - 'skip_download': True, - } + 'skip': 'Unable to load videos', }, { 'url': 'http://www.miomio.tv/watch/cc184024/', 'info_dict': { @@ -47,16 +44,12 @@ class MioMioIE(InfoExtractor): 'skip': 'Unable to load videos', }, { # new 'h5' player - 'url': 'http://www.miomio.tv/watch/cc273295/', - 'md5': '', + 'url': 'http://www.miomio.tv/watch/cc273997/', + 'md5': '0b27a4b4495055d826813f8c3a6b2070', 'info_dict': { - 'id': '273295', + 'id': '273997', 'ext': 'mp4', - 'title': 'アウト×デラックス 20160526', - }, - 'params': { - # intermittent HTTP 500 - 'skip_download': True, + 'title': 'マツコの知らない世界【劇的進化SP!ビニール傘&冷凍食品2016】 1_2 - 16 05 31', }, }] @@ -116,7 +109,7 @@ class MioMioIE(InfoExtractor): player_webpage = self._download_webpage( player_url, video_id, note='Downloading player webpage', headers={'Referer': url}) - entries = self._parse_html5_media_entries(player_url, player_webpage) + entries = self._parse_html5_media_entries(player_url, player_webpage, video_id) http_headers = {'Referer': player_url} else: http_headers = {'Referer': 'http://www.miomio.tv%s' % mioplayer_path} From 70852b47ca101f0b4acc76eb3213b763a14b3602 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 20 Aug 2016 00:12:32 +0800 Subject: [PATCH 1419/3599] [utils] Recognize units with full names in parse_filename Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes --- ChangeLog | 4 +++- test/test_utils.py | 1 + youtube_dl/utils.py | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 450351231..b36e4438c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,9 @@ version Core -* Support m3u8 manifests in HTML5 multimedia tags ++ Recognize file size strings with full unit names (for example "8.5 + megabytes") ++ Support m3u8 manifests in HTML5 multimedia tags * Fix js_to_json(): correct octal or hexadecimal number detection Extractors diff --git a/test/test_utils.py b/test/test_utils.py index b83da93b4..d16ea7f77 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -823,6 +823,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_filesize('1.2tb'), 1200000000000) self.assertEqual(parse_filesize('1,24 KB'), 1240) self.assertEqual(parse_filesize('1,24 kb'), 1240) + self.assertEqual(parse_filesize('8.5 megabytes'), 8500000) def test_parse_count(self): self.assertEqual(parse_count(None), None) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0c36c1b80..41ca562f1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1504,46 +1504,63 @@ def parse_filesize(s): _UNIT_TABLE = { 'B': 1, 'b': 1, + 'bytes': 1, 'KiB': 1024, 'KB': 1000, 'kB': 1024, 'Kb': 1000, 'kb': 1000, + 'kilobytes': 1000, + 'kibibytes': 1024, 'MiB': 1024 ** 2, 'MB': 1000 ** 2, 'mB': 1024 ** 2, 'Mb': 1000 ** 2, 'mb': 1000 ** 2, + 'megabytes': 1000 ** 2, + 'mebibytes': 1024 ** 2, 'GiB': 1024 ** 3, 'GB': 1000 ** 3, 'gB': 1024 ** 3, 'Gb': 1000 ** 3, 'gb': 1000 ** 3, + 'gigabytes': 1000 ** 3, + 'gibibytes': 1024 ** 3, 'TiB': 1024 ** 4, 'TB': 1000 ** 4, 'tB': 1024 ** 4, 'Tb': 1000 ** 4, 'tb': 1000 ** 4, + 'terabytes': 1000 ** 4, + 'tebibytes': 1024 ** 4, 'PiB': 1024 ** 5, 'PB': 1000 ** 5, 'pB': 1024 ** 5, 'Pb': 1000 ** 5, 'pb': 1000 ** 5, + 'petabytes': 1000 ** 5, + 'pebibytes': 1024 ** 5, 'EiB': 1024 ** 6, 'EB': 1000 ** 6, 'eB': 1024 ** 6, 'Eb': 1000 ** 6, 'eb': 1000 ** 6, + 'exabytes': 1000 ** 6, + 'exbibytes': 1024 ** 6, 'ZiB': 1024 ** 7, 'ZB': 1000 ** 7, 'zB': 1024 ** 7, 'Zb': 1000 ** 7, 'zb': 1000 ** 7, + 'zettabytes': 1000 ** 7, + 'zebibytes': 1024 ** 7, 'YiB': 1024 ** 8, 'YB': 1000 ** 8, 'yB': 1024 ** 8, 'Yb': 1000 ** 8, 'yb': 1000 ** 8, + 'yottabytes': 1000 ** 8, + 'yobibytes': 1024 ** 8, } return lookup_unit_table(_UNIT_TABLE, s) From 19f35402c5296e93213d56034d85698087ce3fe1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 20 Aug 2016 00:18:22 +0800 Subject: [PATCH 1420/3599] [snotr] Fix extraction (closes #10338) --- ChangeLog | 1 + youtube_dl/extractor/snotr.py | 38 +++++++++++++++++++---------------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/ChangeLog b/ChangeLog index b36e4438c..13c3d3ffc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,7 @@ Core * Fix js_to_json(): correct octal or hexadecimal number detection Extractors +* [snotr] Fix extraction (#10338) * [n-tv.de] Fix extraction (#10331) diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py index 0d1ab07f8..3bb78cb84 100644 --- a/youtube_dl/extractor/snotr.py +++ b/youtube_dl/extractor/snotr.py @@ -5,9 +5,9 @@ import re from .common import InfoExtractor from ..utils import ( - float_or_none, - str_to_int, parse_duration, + parse_filesize, + str_to_int, ) @@ -17,21 +17,24 @@ class SnotrIE(InfoExtractor): 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks', 'info_dict': { 'id': '13708', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Drone flying through fireworks!', - 'duration': 247, - 'filesize_approx': 98566144, + 'duration': 248, + 'filesize_approx': 40700000, 'description': 'A drone flying through Fourth of July Fireworks', - } + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['description'], }, { 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', 'info_dict': { 'id': '530', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'David Letteman - George W. Bush Top 10', 'duration': 126, - 'filesize_approx': 8912896, + 'filesize_approx': 8500000, 'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!', + 'thumbnail': 're:^https?://.*\.jpg$', } }] @@ -43,26 +46,27 @@ class SnotrIE(InfoExtractor): title = self._og_search_title(webpage) description = self._og_search_description(webpage) - video_url = 'http://cdn.videos.snotr.com/%s.flv' % video_id + info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] view_count = str_to_int(self._html_search_regex( - r'

\nViews:\n([\d,\.]+)

', + r']*>\s*]*>Views:\s*]*>([\d,\.]+)', webpage, 'view count', fatal=False)) duration = parse_duration(self._html_search_regex( - r'

\nLength:\n\s*([0-9:]+).*?

', + r']*>\s*]*>Length:\s*]*>([\d:]+)', webpage, 'duration', fatal=False)) - filesize_approx = float_or_none(self._html_search_regex( - r'

\nFilesize:\n\s*([0-9.]+)\s*megabyte

', - webpage, 'filesize', fatal=False), invscale=1024 * 1024) + filesize_approx = parse_filesize(self._html_search_regex( + r']*>\s*]*>Filesize:\s*]*>([^<]+)', + webpage, 'filesize', fatal=False)) - return { + info_dict.update({ 'id': video_id, 'description': description, 'title': title, - 'url': video_url, 'view_count': view_count, 'duration': duration, 'filesize_approx': filesize_approx, - } + }) + + return info_dict From 39e1c4f08c4cfca81943e73523bd66b890f5aff2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 20 Aug 2016 00:52:37 +0800 Subject: [PATCH 1421/3599] [litv] Support 'promo' URLs (closes #10385) --- ChangeLog | 1 + youtube_dl/extractor/litv.py | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 13c3d3ffc..a8d8d05a3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,7 @@ Core * Fix js_to_json(): correct octal or hexadecimal number detection Extractors ++ [litv] Support 'promo' URLs (#10385) * [snotr] Fix extraction (#10338) * [n-tv.de] Fix extraction (#10331) diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py index 3356d015d..05c6579f1 100644 --- a/youtube_dl/extractor/litv.py +++ b/youtube_dl/extractor/litv.py @@ -14,7 +14,7 @@ from ..utils import ( class LiTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.litv\.tv/vod/[^/]+/content\.do\?.*?\bid=(?P[^&]+)' + _VALID_URL = r'https?://www\.litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P[^&]+)' _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s' @@ -27,6 +27,7 @@ class LiTVIE(InfoExtractor): 'playlist_count': 50, }, { 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', + 'md5': '969e343d9244778cb29acec608e53640', 'info_dict': { 'id': 'VOD00041610', 'ext': 'mp4', @@ -37,7 +38,16 @@ class LiTVIE(InfoExtractor): }, 'params': { 'noplaylist': True, - 'skip_download': True, # m3u8 download + }, + 'skip': 'Georestricted to Taiwan', + }, { + 'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&', + 'md5': '88322ea132f848d6e3e18b32a832b918', + 'info_dict': { + 'id': 'VOD00044841', + 'ext': 'mp4', + 'title': '芈月傳第1集 霸星芈月降世楚國', + 'description': '楚威王二年,太史令唐昧夜觀星象,發現霸星即將現世。王后得知霸星的預言後,想盡辦法不讓孩子順利出生,幸得莒姬相護化解危機。沒想到眾人期待下出生的霸星卻是位公主,楚威王對此失望至極。楚王后命人將女嬰丟棄河中,居然奇蹟似的被少司命像攔下,楚威王認為此女非同凡響,為她取名芈月。', }, 'skip': 'Georestricted to Taiwan', }] @@ -92,13 +102,18 @@ class LiTVIE(InfoExtractor): # endpoint gives the same result as the data embedded in the webpage. # If georestricted, there are no embedded data, so an extra request is # necessary to get the error code + if 'assetId' not in view_data: + view_data = self._download_json( + 'https://www.litv.tv/vod/ajax/getProgramInfo', video_id, + query={'contentId': video_id}, + headers={'Accept': 'application/json'}) video_data = self._parse_json(self._search_regex( r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);', webpage, 'video data', default='{}'), video_id) if not video_data: payload = { 'assetId': view_data['assetId'], - 'watchDevices': vod_data['watchDevices'], + 'watchDevices': view_data['watchDevices'], 'contentType': view_data['contentType'], } video_data = self._download_json( @@ -115,7 +130,8 @@ class LiTVIE(InfoExtractor): raise ExtractorError('Unexpected result from %s' % self.IE_NAME) formats = self._extract_m3u8_formats( - video_data['fullpath'], video_id, ext='mp4', m3u8_id='hls') + video_data['fullpath'], video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls') for a_format in formats: # LiTV HLS segments doesn't like compressions a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True From 5b9d187cc6545c0c5209a4db5525b1023ca8ea41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 19 Aug 2016 22:59:26 +0200 Subject: [PATCH 1422/3599] [imdb] Improve title extraction and make thumbnail non-fatal --- youtube_dl/extractor/imdb.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 0acce9f4c..3a6a6f5ad 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( mimetype2ext, qualities, + remove_end, ) @@ -19,7 +20,7 @@ class ImdbIE(InfoExtractor): 'info_dict': { 'id': '2524815897', 'ext': 'mp4', - 'title': 'Ice Age: Continental Drift Trailer (No. 2) - IMDb', + 'title': 'Ice Age: Continental Drift Trailer (No. 2)', 'description': 'md5:9061c2219254e5d14e03c25c98e96a81', } }, { @@ -83,10 +84,10 @@ class ImdbIE(InfoExtractor): return { 'id': video_id, - 'title': self._og_search_title(webpage), + 'title': remove_end(self._og_search_title(webpage), ' - IMDb'), 'formats': formats, 'description': descr, - 'thumbnail': format_info['slate'], + 'thumbnail': format_info.get('slate'), } From 4245f55880c42e670cebd5a8a2b10929be834682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 20 Aug 2016 06:18:20 +0700 Subject: [PATCH 1423/3599] [dotsub] Replace test (Closes #10386) --- youtube_dl/extractor/dotsub.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/dotsub.py b/youtube_dl/extractor/dotsub.py index e9ca236d4..fd64d1a7f 100644 --- a/youtube_dl/extractor/dotsub.py +++ b/youtube_dl/extractor/dotsub.py @@ -10,18 +10,18 @@ from ..utils import ( class DotsubIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P[^/]+)' _TEST = { - 'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27', - 'md5': '0914d4d69605090f623b7ac329fea66e', + 'url': 'https://dotsub.com/view/9c63db2a-fa95-4838-8e6e-13deafe47f09', + 'md5': '21c7ff600f545358134fea762a6d42b6', 'info_dict': { - 'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27', + 'id': '9c63db2a-fa95-4838-8e6e-13deafe47f09', 'ext': 'flv', - 'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary', - 'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074', - 'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p', - 'duration': 3169, - 'uploader': '4v4l0n42', - 'timestamp': 1292248482.625, - 'upload_date': '20101213', + 'title': 'MOTIVATION - "It\'s Possible" Best Inspirational Video Ever', + 'description': 'md5:41af1e273edbbdfe4e216a78b9d34ac6', + 'thumbnail': 're:^https?://dotsub.com/media/9c63db2a-fa95-4838-8e6e-13deafe47f09/p', + 'duration': 198, + 'uploader': 'liuxt', + 'timestamp': 1385778501.104, + 'upload_date': '20131130', 'view_count': int, } } From dabe15701b3c12ef7e6af1f3333e1d3e39149592 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 20 Aug 2016 13:25:32 +0100 Subject: [PATCH 1424/3599] [cbs, cbsnews] fix extraction(fixes #10393) --- youtube_dl/extractor/cbs.py | 47 ++++++++++++++++++++----------- youtube_dl/extractor/cbsnews.py | 9 +++--- youtube_dl/extractor/cbssports.py | 3 ++ 3 files changed, 38 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index a23173d6f..c72ed2dbb 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -4,6 +4,7 @@ from .theplatform import ThePlatformFeedIE from ..utils import ( int_or_none, find_xpath_attr, + ExtractorError, ) @@ -17,19 +18,6 @@ class CBSBaseIE(ThePlatformFeedIE): }] } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] - def _extract_video_info(self, filter_query, video_id): - return self._extract_feed_info( - 'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: { - 'series': entry.get('cbs$SeriesTitle'), - 'season_number': int_or_none(entry.get('cbs$SeasonNumber')), - 'episode': entry.get('cbs$EpisodeTitle'), - 'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')), - }, { - 'StreamPack': { - 'manifest': 'm3u', - } - }) - class CBSIE(CBSBaseIE): _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P[\w-]+)' @@ -38,7 +26,6 @@ class CBSIE(CBSBaseIE): 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', 'info_dict': { 'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_', - 'display_id': 'connect-chat-feat-garth-brooks', 'ext': 'mp4', 'title': 'Connect Chat feat. Garth Brooks', 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', @@ -47,7 +34,10 @@ class CBSIE(CBSBaseIE): 'upload_date': '20131127', 'uploader': 'CBSI-NEW', }, - 'expected_warnings': ['Failed to download m3u8 information'], + 'params': { + # m3u8 download + 'skip_download': True, + }, '_skip': 'Blocked outside the US', }, { 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', @@ -56,8 +46,31 @@ class CBSIE(CBSBaseIE): 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', 'only_matching': True, }] - TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' + + def _extract_video_info(self, guid): + path = 'dJ5BDC/media/guid/2198311517/' + guid + smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path + formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid) + for r in ('HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): + try: + tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0]) + formats.extend(tp_formats) + except ExtractorError: + continue + self._sort_formats(formats) + metadata = self._download_theplatform_metadata(path, guid) + info = self._parse_theplatform_metadata(metadata) + info.update({ + 'id': guid, + 'formats': formats, + 'subtitles': subtitles, + 'series': metadata.get('cbs$SeriesTitle'), + 'season_number': int_or_none(metadata.get('cbs$SeasonNumber')), + 'episode': metadata.get('cbs$EpisodeTitle'), + 'episode_number': int_or_none(metadata.get('cbs$EpisodeNumber')), + }) + return info def _real_extract(self, url): content_id = self._match_id(url) - return self._extract_video_info('byGuid=%s' % content_id, content_id) + return self._extract_video_info(content_id) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 9d3b75526..4aa6917a0 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -2,13 +2,13 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .cbs import CBSBaseIE +from .cbs import CBSIE from ..utils import ( parse_duration, ) -class CBSNewsIE(CBSBaseIE): +class CBSNewsIE(CBSIE): IE_DESC = 'CBS News' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P[\da-z_-]+)' @@ -35,7 +35,8 @@ class CBSNewsIE(CBSBaseIE): 'ext': 'mp4', 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7', - 'upload_date': '19700101', + 'upload_date': '20140404', + 'timestamp': 1396650660, 'uploader': 'CBSI-NEW', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 205, @@ -63,7 +64,7 @@ class CBSNewsIE(CBSBaseIE): item = video_info['item'] if 'item' in video_info else video_info guid = item['mpxRefId'] - return self._extract_video_info('byGuid=%s' % guid, guid) + return self._extract_video_info(guid) class CBSNewsLiveVideoIE(InfoExtractor): diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py index 78ca44b02..bf7915626 100644 --- a/youtube_dl/extractor/cbssports.py +++ b/youtube_dl/extractor/cbssports.py @@ -23,6 +23,9 @@ class CBSSportsIE(CBSBaseIE): } }] + def _extract_video_info(self, filter_query, video_id): + return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id) + def _real_extract(self, url): video_id = self._match_id(url) return self._extract_video_info('byId=%s' % video_id, video_id) From 292a2301bf0b99be81640c4511d78ebc3c622dad Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 20 Aug 2016 19:00:25 +0100 Subject: [PATCH 1425/3599] [cnn] add support for money.cnn.com videos(closes #2797) --- youtube_dl/extractor/cnn.py | 41 ++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 53489a14e..220bb55e8 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -11,7 +11,7 @@ from ..utils import ( class CNNIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/ + _VALID_URL = r'''(?x)https?://(?:(?Pedition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/ (?P.+?/(?P[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))''' _TESTS = [{ @@ -45,19 +45,46 @@ class CNNIE(InfoExtractor): 'description': 'md5:e7223a503315c9f150acac52e76de086', 'upload_date': '20141222', } + }, { + 'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html', + 'md5': '52a515dc1b0f001cd82e4ceda32be9d1', + 'info_dict': { + 'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney', + 'ext': 'mp4', + 'title': '5 stunning stats about Netflix', + 'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', + 'upload_date': '20160819', + } }, { 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', 'only_matching': True, }, { 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg', 'only_matching': True, + }, { + 'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn', + 'only_matching': True, }] + _CONFIG = { + # http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml + 'edition': { + 'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml', + 'media_src': 'http://pmd.cdn.turner.com/cnn/big', + }, + # http://money.cnn.com/.element/apps/cvp2/cfg/config.xml + 'money': { + 'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml', + 'media_src': 'http://ht3.cdn.turner.com/money/big', + }, + } + def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - path = mobj.group('path') - page_title = mobj.group('title') - info_url = 'http://edition.cnn.com/video/data/3.0/%s/index.xml' % path + sub_domain, path, page_title = re.match(self._VALID_URL, url).groups() + if sub_domain not in ('money', 'edition'): + sub_domain = 'edition' + config = self._CONFIG[sub_domain] + info_url = config['data_src'] % path info = self._download_xml(info_url, page_title) formats = [] @@ -66,7 +93,7 @@ class CNNIE(InfoExtractor): (?:_(?P<bitrate>[0-9]+)k)? ''') for f in info.findall('files/file'): - video_url = 'http://ht.cdn.turner.com/cnn/big%s' % (f.text.strip()) + video_url = config['media_src'] + f.text.strip() fdct = { 'format_id': f.attrib['bitrate'], 'url': video_url, @@ -146,7 +173,7 @@ class CNNBlogsIE(InfoExtractor): class CNNArticleIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!video/)' + _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)' _TEST = { 'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/', 'md5': '689034c2a3d9c6dc4aa72d65a81efd01', From e25586e47163c83e519ae0af9aa6d8fbc3d58ef4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 20 Aug 2016 20:02:49 +0100 Subject: [PATCH 1426/3599] [cultureunplugged] fix extraction(closes #10330) --- youtube_dl/extractor/cultureunplugged.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cultureunplugged.py b/youtube_dl/extractor/cultureunplugged.py index 9c764fe68..9f26fa587 100644 --- a/youtube_dl/extractor/cultureunplugged.py +++ b/youtube_dl/extractor/cultureunplugged.py @@ -1,9 +1,13 @@ from __future__ import unicode_literals import re +import time from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + HEADRequest, +) class CultureUnpluggedIE(InfoExtractor): @@ -32,6 +36,9 @@ class CultureUnpluggedIE(InfoExtractor): video_id = mobj.group('id') display_id = mobj.group('display_id') or video_id + # request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request + self._request_webpage(HEADRequest( + 'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id) movie_data = self._download_json( 'http://www.cultureunplugged.com/movie-data/cu-%s.json' % video_id, display_id) From 5b1d85754ee2f1a8b94c979bc5122b4130ef8cc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 08:07:26 +0700 Subject: [PATCH 1427/3599] [YoutubeDL] Autocalculate ext when ext is None --- youtube_dl/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e844dc98a..0b3e3da82 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1299,7 +1299,7 @@ class YoutubeDL(object): for subtitle_format in subtitle: if subtitle_format.get('url'): subtitle_format['url'] = sanitize_url(subtitle_format['url']) - if 'ext' not in subtitle_format: + if subtitle_format.get('ext') is None: subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() if self.params.get('listsubtitles', False): @@ -1354,7 +1354,7 @@ class YoutubeDL(object): note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '', ) # Automatically determine file extension if missing - if 'ext' not in format: + if format.get('ext') is None: format['ext'] = determine_ext(format['url']).lower() # Automatically determine protocol if missing (useful for format # selection purposes) From d8f30a7e6606d2300dfffb8fc0aaf8d6a0c79b0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 08:10:49 +0700 Subject: [PATCH 1428/3599] [kaltura] Remove unused code --- youtube_dl/extractor/kaltura.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index ddf1165ff..66c7b36bc 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -122,18 +122,6 @@ class KalturaIE(InfoExtractor): return data - def _get_kaltura_signature(self, video_id, partner_id, service_url=None): - actions = [{ - 'apiVersion': '3.1', - 'expiry': 86400, - 'format': 1, - 'service': 'session', - 'action': 'startWidgetSession', - 'widgetId': '_%s' % partner_id, - }] - return self._kaltura_api_call( - video_id, actions, service_url, note='Downloading Kaltura signature')['ks'] - def _get_video_info(self, video_id, partner_id, service_url=None): actions = [ { From a80944675010617cc0124c57ab597f9d9004c0d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 08:26:45 +0700 Subject: [PATCH 1429/3599] [kaltura] Add subtitles support when entry_id is unknown beforehand (Closes #10279) --- youtube_dl/extractor/kaltura.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 66c7b36bc..a8ce6dda2 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -196,6 +196,17 @@ class KalturaIE(InfoExtractor): reference_id)['entryResult'] info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets'] entry_id = info['id'] + # Unfortunately, data returned in kalturaIframePackageData lacks + # captions so we will try requesting the complete data using + # regular approach since we now know the entry_id + try: + _, info, flavor_assets, captions = self._get_video_info( + entry_id, partner_id) + except ExtractorError: + # Regular scenario failed but we already have everything + # extracted apart from captions and can process at least + # with this + pass else: raise ExtractorError('Invalid URL', expected=True) ks = params.get('flashvars[ks]', [None])[0] From fddaa76a599a7df00dc94dd5663d43c881f8fee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 08:27:53 +0700 Subject: [PATCH 1430/3599] [kaltura] Assume ttml to be default subtitles' extension --- youtube_dl/extractor/kaltura.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index a8ce6dda2..15f2fe24f 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -266,7 +266,7 @@ class KalturaIE(InfoExtractor): continue subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({ 'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']), - 'ext': caption.get('fileExt'), + 'ext': caption.get('fileExt', 'ttml'), }) return { From 2c6acdfd2d31b7ce9500e9efe411620c61059b98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 08:37:01 +0700 Subject: [PATCH 1431/3599] [kaltura] Add test for #10279 --- youtube_dl/extractor/kaltura.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 15f2fe24f..e0f7366c2 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -67,6 +67,27 @@ class KalturaIE(InfoExtractor): # video with subtitles 'url': 'kaltura:111032:1_cw786r8q', 'only_matching': True, + }, + { + # video with ttml subtitles (no fileExt) + 'url': 'kaltura:1926081:0_l5ye1133', + 'info_dict': { + 'id': '0_l5ye1133', + 'ext': 'mp4', + 'title': 'What Can You Do With Python?', + 'upload_date': '20160221', + 'uploader_id': 'stork', + 'thumbnail': 're:^https?://.*/thumbnail/.*', + 'timestamp': int, + 'subtitles': { + 'en': [{ + 'ext': 'ttml', + }], + }, + }, + 'params': { + 'skip_download': True, + }, } ] From db29af6d36b3d16614355dac70f22c4f2d8410d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= <trox1972@users.noreply.github.com> Date: Fri, 19 Aug 2016 12:53:34 +0200 Subject: [PATCH 1432/3599] [charlierose] Add new extractor --- youtube_dl/extractor/charlierose.py | 45 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 46 insertions(+) create mode 100644 youtube_dl/extractor/charlierose.py diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dl/extractor/charlierose.py new file mode 100644 index 000000000..ba1d1b833 --- /dev/null +++ b/youtube_dl/extractor/charlierose.py @@ -0,0 +1,45 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import remove_end + + +class CharlieRoseIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)' + _TEST = { + 'url': 'https://charlierose.com/videos/27996', + 'info_dict': { + 'id': '27996', + 'ext': 'mp4', + 'title': 'Remembering Zaha Hadid', + 'thumbnail': 're:^https?://.*\.jpg\?\d+', + 'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + } + + _PLAYER_BASE = 'https://charlierose.com/video/player/%s' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(self._PLAYER_BASE % video_id, video_id) + + title = remove_end(self._og_search_title(webpage), ' - Charlie Rose') + + entries = self._parse_html5_media_entries(self._PLAYER_BASE % video_id, webpage, video_id)[0] + formats = entries['formats'] + + self._sort_formats(formats) + self._remove_duplicate_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description(webpage), + 'subtitles': entries.get('subtitles'), + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6c5d46015..d4d90c1f8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -134,6 +134,7 @@ from .ccc import CCCIE from .cda import CDAIE from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE +from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE from .chilloutzone import ChilloutzoneIE from .chirbit import ( From d164a0d41bdc95caa2b1458b9f51381de7d6a5a7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 21 Aug 2016 20:00:48 +0800 Subject: [PATCH 1433/3599] [README.md] Add a format selection example using comma Ref: #10399 --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 952db7abb..a10aaf35c 100644 --- a/README.md +++ b/README.md @@ -645,7 +645,11 @@ $ youtube-dl -f 'best[filesize<50M]' # Download best format available via direct link over HTTP/HTTPS protocol $ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]' + +# Download the best video format and the best audio format without merging them +$ youtube-dl -f 'bestvideo,bestaudio' -o '%(title)s.f%(format_id)s.%(ext)s' ``` +Note that in the last example, an output template is recommended as bestvideo and bestaudio may have the same file name. # VIDEO SELECTION From 3d47ee0a9eb37b2c91dfae80c7f22fda0242dd61 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 21 Aug 2016 14:09:18 +0100 Subject: [PATCH 1434/3599] [zingmp3] fix extraction and add support for video clips(closes #10041) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/zingmp3.py | 127 +++++++++++++++++------------ 2 files changed, 76 insertions(+), 56 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6c5d46015..20fb23527 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1114,7 +1114,4 @@ from .youtube import ( ) from .zapiks import ZapiksIE from .zdf import ZDFIE, ZDFChannelIE -from .zingmp3 import ( - ZingMp3SongIE, - ZingMp3AlbumIE, -) +from .zingmp3 import ZingMp3IE diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dl/extractor/zingmp3.py index 437eecb67..bd708b42c 100644 --- a/youtube_dl/extractor/zingmp3.py +++ b/youtube_dl/extractor/zingmp3.py @@ -4,13 +4,17 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + int_or_none, + update_url_query, +) class ZingMp3BaseInfoExtractor(InfoExtractor): - def _extract_item(self, item, fatal=True): - error_message = item.find('./errormessage').text + def _extract_item(self, item, page_type, fatal=True): + error_message = item.get('msg') if error_message: if not fatal: return @@ -18,25 +22,48 @@ class ZingMp3BaseInfoExtractor(InfoExtractor): '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) - title = item.find('./title').text.strip() - source = item.find('./source').text - extension = item.attrib['type'] - thumbnail = item.find('./backimage').text + formats = [] + for quality, source_url in zip(item.get('qualities') or item.get('quality', []), item.get('source_list') or item.get('source', [])): + if not source_url or source_url == 'require vip': + continue + if not re.match(r'https?://', source_url): + source_url = '//' + source_url + source_url = self._proto_relative_url(source_url, 'http:') + quality_num = int_or_none(quality) + f = { + 'format_id': quality, + 'url': source_url, + } + if page_type == 'video': + f.update({ + 'height': quality_num, + 'ext': 'mp4', + }) + else: + f.update({ + 'abr': quality_num, + 'ext': 'mp3', + }) + formats.append(f) + + cover = item.get('cover') return { - 'title': title, - 'url': source, - 'ext': extension, - 'thumbnail': thumbnail, + 'title': (item.get('name') or item.get('title')).strip(), + 'formats': formats, + 'thumbnail': 'http:/' + cover if cover else None, + 'artist': item.get('artist'), } - def _extract_player_xml(self, player_xml_url, id, playlist_title=None): - player_xml = self._download_xml(player_xml_url, id, 'Downloading Player XML') - items = player_xml.findall('./item') + def _extract_player_json(self, player_json_url, id, page_type, playlist_title=None): + player_json = self._download_json(player_json_url, id, 'Downloading Player JSON') + items = player_json['data'] + if 'item' in items: + items = items['item'] if len(items) == 1: # one single song - data = self._extract_item(items[0]) + data = self._extract_item(items[0], page_type) data['id'] = id return data @@ -45,7 +72,7 @@ class ZingMp3BaseInfoExtractor(InfoExtractor): entries = [] for i, item in enumerate(items, 1): - entry = self._extract_item(item, fatal=False) + entry = self._extract_item(item, page_type, fatal=False) if not entry: continue entry['id'] = '%s-%d' % (id, i) @@ -59,8 +86,8 @@ class ZingMp3BaseInfoExtractor(InfoExtractor): } -class ZingMp3SongIE(ZingMp3BaseInfoExtractor): - _VALID_URL = r'https?://mp3\.zing\.vn/bai-hat/(?P<slug>[^/]+)/(?P<song_id>\w+)\.html' +class ZingMp3IE(ZingMp3BaseInfoExtractor): + _VALID_URL = r'https?://mp3\.zing\.vn/(?:bai-hat|album|playlist|video-clip)/[^/]+/(?P<id>\w+)\.html' _TESTS = [{ 'url': 'http://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html', 'md5': 'ead7ae13693b3205cbc89536a077daed', @@ -70,51 +97,47 @@ class ZingMp3SongIE(ZingMp3BaseInfoExtractor): 'ext': 'mp3', 'thumbnail': 're:^https?://.*\.jpg$', }, - }] - IE_NAME = 'zingmp3:song' - IE_DESC = 'mp3.zing.vn songs' - - def _real_extract(self, url): - matched = re.match(self._VALID_URL, url) - slug = matched.group('slug') - song_id = matched.group('song_id') - - webpage = self._download_webpage( - 'http://mp3.zing.vn/bai-hat/%s/%s.html' % (slug, song_id), song_id) - - player_xml_url = self._search_regex( - r'&xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url') - - return self._extract_player_xml(player_xml_url, song_id) - - -class ZingMp3AlbumIE(ZingMp3BaseInfoExtractor): - _VALID_URL = r'https?://mp3\.zing\.vn/(?:album|playlist)/(?P<slug>[^/]+)/(?P<album_id>\w+)\.html' - _TESTS = [{ + }, { + 'url': 'http://mp3.zing.vn/video-clip/Let-It-Go-Frozen-OST-Sungha-Jung/ZW6BAEA0.html', + 'md5': '870295a9cd8045c0e15663565902618d', + 'info_dict': { + 'id': 'ZW6BAEA0', + 'title': 'Let It Go (Frozen OST)', + 'ext': 'mp4', + }, + }, { 'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html', 'info_dict': { '_type': 'playlist', 'id': 'ZWZBWDAF', - 'title': 'Lâu Đài Tình Ái - Bằng Kiều ft. Minh Tuyết | Album 320 lossless', + 'title': 'Lâu Đài Tình Ái - Bằng Kiều,Minh Tuyết | Album 320 lossless', }, 'playlist_count': 10, + 'skip': 'removed at the request of the owner', }, { 'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html', 'only_matching': True, }] - IE_NAME = 'zingmp3:album' - IE_DESC = 'mp3.zing.vn albums' + IE_NAME = 'zingmp3' + IE_DESC = 'mp3.zing.vn' def _real_extract(self, url): - matched = re.match(self._VALID_URL, url) - slug = matched.group('slug') - album_id = matched.group('album_id') + page_id = self._match_id(url) - webpage = self._download_webpage( - 'http://mp3.zing.vn/album/%s/%s.html' % (slug, album_id), album_id) - player_xml_url = self._search_regex( - r'&xmlURL=(?P<xml_url>[^&]+)&', webpage, 'player xml url') + webpage = self._download_webpage(url, page_id) - return self._extract_player_xml( - player_xml_url, album_id, - playlist_title=self._og_search_title(webpage)) + player_json_url = self._search_regex([ + r'data-xml="([^"]+)', + r'&xmlURL=([^&]+)&' + ], webpage, 'player xml url') + + playlist_title = None + page_type = self._search_regex(r'/(?:html5)?xml/([^/-]+)', player_json_url, 'page type') + if page_type == 'video': + player_json_url = update_url_query(player_json_url, {'format': 'json'}) + else: + player_json_url = player_json_url.replace('/xml/', '/html5xml/') + if page_type == 'album': + playlist_title = self._og_search_title(webpage) + + return self._extract_player_json(player_json_url, page_id, page_type, playlist_title) From 92d4cfa358bacff0e79da30ffb0908c7096e82f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 21:01:01 +0700 Subject: [PATCH 1435/3599] [kaltura] Fallback ext calculation on caption's format --- youtube_dl/extractor/kaltura.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index e0f7366c2..6a8464998 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -36,6 +36,12 @@ class KalturaIE(InfoExtractor): ''' _SERVICE_URL = 'http://cdnapi.kaltura.com' _SERVICE_BASE = '/api_v3/index.php' + # See https://github.com/kaltura/server/blob/master/plugins/content/caption/base/lib/model/enums/CaptionType.php + _CAPTION_TYPES = { + 1: 'srt', + 2: 'ttml', + 3: 'vtt', + } _TESTS = [ { 'url': 'kaltura:269692:1_1jc2y3e4', @@ -285,9 +291,12 @@ class KalturaIE(InfoExtractor): # Continue if caption is not ready if f.get('status') != 2: continue + if not caption.get('id'): + continue + caption_format = int_or_none(caption.get('format')) subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({ 'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']), - 'ext': caption.get('fileExt', 'ttml'), + 'ext': caption.get('fileExt') or self._CAPTION_TYPES.get(caption_format) or 'ttml', }) return { From b1e676fde81d33116f6739006d9aa0b68eebc072 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 21:28:02 +0700 Subject: [PATCH 1436/3599] [twitch] Modernize --- youtube_dl/extractor/twitch.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 890f55180..4b5b2030c 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -20,7 +20,6 @@ from ..utils import ( orderedSet, parse_duration, parse_iso8601, - sanitized_Request, urlencode_postdata, ) @@ -50,8 +49,8 @@ class TwitchBaseIE(InfoExtractor): for cookie in self._downloader.cookiejar: if cookie.name == 'api_token': headers['Twitch-Api-Token'] = cookie.value - request = sanitized_Request(url, headers=headers) - response = super(TwitchBaseIE, self)._download_json(request, video_id, note) + response = super(TwitchBaseIE, self)._download_json( + url, video_id, note, headers=headers) self._handle_error(response) return response @@ -82,11 +81,10 @@ class TwitchBaseIE(InfoExtractor): if not post_url.startswith('http'): post_url = compat_urlparse.urljoin(redirect_url, post_url) - request = sanitized_Request( - post_url, urlencode_postdata(login_form)) - request.add_header('Referer', redirect_url) response = self._download_webpage( - request, None, 'Logging in as %s' % username) + post_url, None, 'Logging in as %s' % username, + data=urlencode_postdata(login_form), + headers={'Referer': redirect_url}) error_message = self._search_regex( r'<div[^>]+class="subwindow_notice"[^>]*>([^<]+)</div>', From e3f6b569096ba6faa8de230333849817c8b31a2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 22:09:29 +0700 Subject: [PATCH 1437/3599] [twitch] Refactor API calls --- youtube_dl/extractor/twitch.py | 38 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 4b5b2030c..f0a9370c8 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -41,7 +41,7 @@ class TwitchBaseIE(InfoExtractor): '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')), expected=True) - def _download_json(self, url, video_id, note='Downloading JSON metadata'): + def _call_api(self, path, item_id, note): headers = { 'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2', 'X-Requested-With': 'XMLHttpRequest', @@ -49,8 +49,8 @@ class TwitchBaseIE(InfoExtractor): for cookie in self._downloader.cookiejar: if cookie.name == 'api_token': headers['Twitch-Api-Token'] = cookie.value - response = super(TwitchBaseIE, self)._download_json( - url, video_id, note, headers=headers) + response = self._download_json( + '%s/%s' % (self._API_BASE, path), item_id, note) self._handle_error(response) return response @@ -107,14 +107,14 @@ class TwitchBaseIE(InfoExtractor): class TwitchItemBaseIE(TwitchBaseIE): def _download_info(self, item, item_id): - return self._extract_info(self._download_json( - '%s/kraken/videos/%s%s' % (self._API_BASE, item, item_id), item_id, + return self._extract_info(self._call_api( + 'kraken/videos/%s%s' % (item, item_id), item_id, 'Downloading %s info JSON' % self._ITEM_TYPE)) def _extract_media(self, item_id): info = self._download_info(self._ITEM_SHORTCUT, item_id) - response = self._download_json( - '%s/api/videos/%s%s' % (self._API_BASE, self._ITEM_SHORTCUT, item_id), item_id, + response = self._call_api( + 'api/videos/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id, 'Downloading %s playlist JSON' % self._ITEM_TYPE) entries = [] chunks = response['chunks'] @@ -244,8 +244,8 @@ class TwitchVodIE(TwitchItemBaseIE): item_id = self._match_id(url) info = self._download_info(self._ITEM_SHORTCUT, item_id) - access_token = self._download_json( - '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id, + access_token = self._call_api( + 'api/vods/%s/access_token' % item_id, item_id, 'Downloading %s access token' % self._ITEM_TYPE) formats = self._extract_m3u8_formats( @@ -273,12 +273,12 @@ class TwitchVodIE(TwitchItemBaseIE): class TwitchPlaylistBaseIE(TwitchBaseIE): - _PLAYLIST_URL = '%s/kraken/channels/%%s/videos/?offset=%%d&limit=%%d' % TwitchBaseIE._API_BASE + _PLAYLIST_PATH = 'kraken/channels/%s/videos/?offset=%d&limit=%d' _PAGE_LIMIT = 100 def _extract_playlist(self, channel_id): - info = self._download_json( - '%s/kraken/channels/%s' % (self._API_BASE, channel_id), + info = self._call_api( + 'kraken/channels/%s' % channel_id, channel_id, 'Downloading channel info JSON') channel_name = info.get('display_name') or info.get('name') entries = [] @@ -287,8 +287,8 @@ class TwitchPlaylistBaseIE(TwitchBaseIE): broken_paging_detected = False counter_override = None for counter in itertools.count(1): - response = self._download_json( - self._PLAYLIST_URL % (channel_id, offset, limit), + response = self._call_api( + self._PLAYLIST_PATH % (channel_id, offset, limit), channel_id, 'Downloading %s videos JSON page %s' % (self._PLAYLIST_TYPE, counter_override or counter)) @@ -343,7 +343,7 @@ class TwitchProfileIE(TwitchPlaylistBaseIE): class TwitchPastBroadcastsIE(TwitchPlaylistBaseIE): IE_NAME = 'twitch:past_broadcasts' _VALID_URL = r'%s/(?P<id>[^/]+)/profile/past_broadcasts/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE - _PLAYLIST_URL = TwitchPlaylistBaseIE._PLAYLIST_URL + '&broadcasts=true' + _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcasts=true' _PLAYLIST_TYPE = 'past broadcasts' _TEST = { @@ -387,8 +387,8 @@ class TwitchStreamIE(TwitchBaseIE): def _real_extract(self, url): channel_id = self._match_id(url) - stream = self._download_json( - '%s/kraken/streams/%s' % (self._API_BASE, channel_id), channel_id, + stream = self._call_api( + 'kraken/streams/%s' % channel_id, channel_id, 'Downloading stream JSON').get('stream') # Fallback on profile extraction if stream is offline @@ -403,8 +403,8 @@ class TwitchStreamIE(TwitchBaseIE): # JSON and fallback to lowercase if it's not available. channel_id = stream.get('channel', {}).get('name') or channel_id.lower() - access_token = self._download_json( - '%s/api/channels/%s/access_token' % (self._API_BASE, channel_id), channel_id, + access_token = self._call_api( + 'api/channels/%s/access_token' % channel_id, channel_id, 'Downloading channel access token') query = { From efe470e2614d8a50a5cc2d14551e9bc4fc41cc8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 21 Aug 2016 22:45:50 +0700 Subject: [PATCH 1438/3599] [twitch] Renew authentication --- youtube_dl/extractor/twitch.py | 38 +++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index f0a9370c8..359a8859c 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -7,6 +7,7 @@ import random from .common import InfoExtractor from ..compat import ( + compat_HTTPError, compat_parse_qs, compat_str, compat_urllib_parse_urlencode, @@ -14,6 +15,7 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + clean_html, ExtractorError, int_or_none, js_to_json, @@ -62,9 +64,17 @@ class TwitchBaseIE(InfoExtractor): if username is None: return + def fail(message): + raise ExtractorError( + 'Unable to login. Twitch said: %s' % message, expected=True) + login_page, handle = self._download_webpage_handle( self._LOGIN_URL, None, 'Downloading login page') + # Some TOR nodes and public proxies are blocked completely + if 'blacklist_message' in login_page: + fail(clean_html(login_page)) + login_form = self._hidden_inputs(login_page) login_form.update({ @@ -81,20 +91,24 @@ class TwitchBaseIE(InfoExtractor): if not post_url.startswith('http'): post_url = compat_urlparse.urljoin(redirect_url, post_url) - response = self._download_webpage( - post_url, None, 'Logging in as %s' % username, - data=urlencode_postdata(login_form), - headers={'Referer': redirect_url}) + headers = {'Referer': redirect_url} - error_message = self._search_regex( - r'<div[^>]+class="subwindow_notice"[^>]*>([^<]+)</div>', - response, 'error message', default=None) - if error_message: - raise ExtractorError( - 'Unable to login. Twitch said: %s' % error_message, expected=True) + try: + response = self._download_json( + post_url, None, 'Logging in as %s' % username, + data=urlencode_postdata(login_form), + headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + response = self._parse_json( + e.cause.read().decode('utf-8'), None) + fail(response['message']) + raise - if '>Reset your password<' in response: - self.report_warning('Twitch asks you to reset your password, go to https://secure.twitch.tv/reset/submit') + if response.get('redirect'): + self._download_webpage( + response['redirect'], None, 'Downloading login redirect page', + headers=headers) def _prefer_source(self, formats): try: From 9b8c554ea70ee970009de2628bafe7fd7390bf9e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 21 Aug 2016 17:55:47 +0100 Subject: [PATCH 1439/3599] [firsttv] fix extraction(closes #9249) --- youtube_dl/extractor/firsttv.py | 133 ++++++++++---------------------- 1 file changed, 39 insertions(+), 94 deletions(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 88bca1007..af7de10b7 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -2,130 +2,75 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_xpath +from ..compat import compat_urlparse from ..utils import ( int_or_none, qualities, unified_strdate, - xpath_attr, - xpath_element, - xpath_text, - xpath_with_ns, ) class FirstTVIE(InfoExtractor): IE_NAME = '1tv' IE_DESC = 'Первый канал' - _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+p?(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)' _TESTS = [{ - # single format via video_materials.json API - 'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930', - 'md5': '82a2777648acae812d58b3f5bd42882b', + 'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015', + 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', 'info_dict': { - 'id': '35930', + 'id': '40049', 'ext': 'mp4', 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', - 'description': 'md5:357933adeede13b202c7c21f91b871b2', + 'description': 'md5:36a39c1d19618fec57d12efe212a8370', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', 'upload_date': '20150212', 'duration': 2694, }, }, { - # multiple formats via video_materials.json API - 'url': 'http://www.1tv.ru/video_archive/projects/dobroeutro/p113641', - 'info_dict': { - 'id': '113641', - 'ext': 'mp4', - 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', - 'description': 'md5:8dcebb3dded0ff20fade39087fd1fee2', - 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', - 'upload_date': '20160407', - 'duration': 179, - 'formats': 'mincount:3', - }, - 'params': { - 'skip_download': True, - }, - }, { - # single format only available via ONE_ONLINE_VIDEOS.archive_single_xml API - 'url': 'http://www.1tv.ru/video_archive/series/f7552/p47038', - 'md5': '519d306c5b5669761fd8906c39dbee23', - 'info_dict': { - 'id': '47038', - 'ext': 'mp4', - 'title': '"Побег". Второй сезон. 3 серия', - 'description': 'md5:3abf8f6b9bce88201c33e9a3d794a00b', - 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', - 'upload_date': '20120516', - 'duration': 3080, - }, - }, { - 'url': 'http://www.1tv.ru/videoarchive/9967', - 'only_matching': True, + 'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016', + 'only_matching': 'true', }] def _real_extract(self, url): - video_id = self._match_id(url) + display_id = self._match_id(url) - # Videos with multiple formats only available via this API - video = self._download_json( - 'http://www.1tv.ru/video_materials.json?legacy_id=%s' % video_id, - video_id, fatal=False) - - description, thumbnail, upload_date, duration = [None] * 4 - - if video: - item = video[0] - title = item['title'] - quality = qualities(('ld', 'sd', 'hd', )) - formats = [{ - 'url': f['src'], - 'format_id': f.get('name'), - 'quality': quality(f.get('name')), - } for f in item['mbr'] if f.get('src')] - thumbnail = item.get('poster') - else: - # Some videos are not available via video_materials.json - video = self._download_xml( - 'http://www.1tv.ru/owa/win/ONE_ONLINE_VIDEOS.archive_single_xml?pid=%s' % video_id, - video_id) - - NS_MAP = { - 'media': 'http://search.yahoo.com/mrss/', - } - - item = xpath_element(video, './channel/item', fatal=True) - title = xpath_text(item, './title', fatal=True) - formats = [{ - 'url': content.attrib['url'], - } for content in item.findall( - compat_xpath(xpath_with_ns('./media:content', NS_MAP))) if content.attrib.get('url')] - thumbnail = xpath_attr( - item, xpath_with_ns('./media:thumbnail', NS_MAP), 'url') + webpage = self._download_webpage(url, display_id) + playlist_url = compat_urlparse.urljoin(url, self._search_regex( + r'data-playlist-url="([^"]+)', webpage, 'playlist url')) + item = self._download_json(playlist_url, display_id)[0] + video_id = item['id'] + quality = qualities(('ld', 'sd', 'hd', )) + formats = [] + for f in item.get('mbr', []): + src = f.get('src') + if not src: + continue + fname = f.get('name') + formats.append({ + 'url': src, + 'format_id': fname, + 'quality': quality(fname), + }) self._sort_formats(formats) - webpage = self._download_webpage(url, video_id, 'Downloading page', fatal=False) - if webpage: - title = self._html_search_regex( - (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', - r"'title'\s*:\s*'([^']+)'"), - webpage, 'title', default=None) or title - description = self._html_search_regex( - r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', - webpage, 'description', default=None) or self._html_search_meta( - 'description', webpage, 'description') - thumbnail = thumbnail or self._og_search_thumbnail(webpage) - duration = int_or_none(self._html_search_meta( - 'video:duration', webpage, 'video duration', fatal=False)) - upload_date = unified_strdate(self._html_search_meta( - 'ya:ovs:upload_date', webpage, 'upload date', fatal=False)) + title = self._html_search_regex( + (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', + r"'title'\s*:\s*'([^']+)'"), + webpage, 'title', default=None) or item['title'] + description = self._html_search_regex( + r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', + webpage, 'description', default=None) or self._html_search_meta( + 'description', webpage, 'description') + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'video duration', fatal=False)) + upload_date = unified_strdate(self._html_search_meta( + 'ya:ovs:upload_date', webpage, 'upload date', fatal=False)) return { 'id': video_id, - 'thumbnail': thumbnail, + 'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage), 'title': title, 'description': description, 'upload_date': upload_date, From 526656726b13f47a33c36e56821136b90d6decf1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 22 Aug 2016 02:06:47 +0800 Subject: [PATCH 1440/3599] [charlierose] Simplify and improve --- youtube_dl/extractor/charlierose.py | 33 +++++++++++++++++------------ 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dl/extractor/charlierose.py index ba1d1b833..817f7128f 100644 --- a/youtube_dl/extractor/charlierose.py +++ b/youtube_dl/extractor/charlierose.py @@ -6,20 +6,25 @@ from ..utils import remove_end class CharlieRoseIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?charlierose\.com/video(?:s|/player)/(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'https://charlierose.com/videos/27996', + 'md5': 'fda41d49e67d4ce7c2411fd2c4702e09', 'info_dict': { 'id': '27996', 'ext': 'mp4', 'title': 'Remembering Zaha Hadid', 'thumbnail': 're:^https?://.*\.jpg\?\d+', 'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.', + 'subtitles': { + 'en': [{ + 'ext': 'vtt', + }], + }, }, - 'params': { - # m3u8 download - 'skip_download': True, - } - } + }, { + 'url': 'https://charlierose.com/videos/27996', + 'only_matching': True, + }] _PLAYER_BASE = 'https://charlierose.com/video/player/%s' @@ -29,17 +34,17 @@ class CharlieRoseIE(InfoExtractor): title = remove_end(self._og_search_title(webpage), ' - Charlie Rose') - entries = self._parse_html5_media_entries(self._PLAYER_BASE % video_id, webpage, video_id)[0] - formats = entries['formats'] + info_dict = self._parse_html5_media_entries( + self._PLAYER_BASE % video_id, webpage, video_id)[0] - self._sort_formats(formats) - self._remove_duplicate_formats(formats) + self._sort_formats(info_dict['formats']) + self._remove_duplicate_formats(info_dict['formats']) - return { + info_dict.update({ 'id': video_id, 'title': title, - 'formats': formats, 'thumbnail': self._og_search_thumbnail(webpage), 'description': self._og_search_description(webpage), - 'subtitles': entries.get('subtitles'), - } + }) + + return info_dict From d0fa172e5fc1d676834252dcd395ec495b20b0bc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 21 Aug 2016 19:11:51 +0100 Subject: [PATCH 1441/3599] [firsttv] keep a test videos with multiple formats --- youtube_dl/extractor/firsttv.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index af7de10b7..332d12020 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -16,6 +16,7 @@ class FirstTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>[^/?#]+)' _TESTS = [{ + # single format 'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015', 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', 'info_dict': { @@ -28,8 +29,21 @@ class FirstTVIE(InfoExtractor): 'duration': 2694, }, }, { + # multiple formats 'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016', - 'only_matching': 'true', + 'info_dict': { + 'id': '364746', + 'ext': 'mp4', + 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', + 'description': 'md5:a242eea0031fd180a4497d52640a9572', + 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', + 'upload_date': '20160407', + 'duration': 179, + 'formats': 'mincount:3', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): From ad120ae1c57fe3ff0c7f5559d280cb8230a2b38c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 22 Aug 2016 02:18:46 +0800 Subject: [PATCH 1442/3599] [extractor/common] Change the default m3u8 protocol in HTML5 Helper functions should have consistent default values --- ChangeLog | 1 + youtube_dl/extractor/charlierose.py | 3 ++- youtube_dl/extractor/common.py | 6 +++--- youtube_dl/extractor/snotr.py | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index a8d8d05a3..383ff59ea 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,7 @@ Core * Fix js_to_json(): correct octal or hexadecimal number detection Extractors ++ [charlierose] Add new extractor (#10382) + [litv] Support 'promo' URLs (#10385) * [snotr] Fix extraction (#10338) * [n-tv.de] Fix extraction (#10331) diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dl/extractor/charlierose.py index 817f7128f..4bf2cf7b0 100644 --- a/youtube_dl/extractor/charlierose.py +++ b/youtube_dl/extractor/charlierose.py @@ -35,7 +35,8 @@ class CharlieRoseIE(InfoExtractor): title = remove_end(self._og_search_title(webpage), ' - Charlie Rose') info_dict = self._parse_html5_media_entries( - self._PLAYER_BASE % video_id, webpage, video_id)[0] + self._PLAYER_BASE % video_id, webpage, video_id, + m3u8_entry_protocol='m3u8_native')[0] self._sort_formats(info_dict['formats']) self._remove_duplicate_formats(info_dict['formats']) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 07d58afe7..ba4c03d3d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1695,7 +1695,7 @@ class InfoExtractor(object): self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats - def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None): + def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'): def absolute_url(video_url): return compat_urlparse.urljoin(base_url, video_url) @@ -1715,8 +1715,8 @@ class InfoExtractor(object): if determine_ext(full_url) == 'm3u8': is_plain_url = False formats = self._extract_m3u8_formats( - full_url, video_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id=m3u8_id) + full_url, video_id, ext='mp4', + entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id) else: is_plain_url = True formats = [{ diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py index 3bb78cb84..4819fe5b4 100644 --- a/youtube_dl/extractor/snotr.py +++ b/youtube_dl/extractor/snotr.py @@ -46,7 +46,8 @@ class SnotrIE(InfoExtractor): title = self._og_search_title(webpage) description = self._og_search_description(webpage) - info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] + info_dict = self._parse_html5_media_entries( + url, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0] view_count = str_to_int(self._html_search_regex( r'<p[^>]*>\s*<strong[^>]*>Views:</strong>\s*<span[^>]*>([\d,\.]+)', From cf143c4d977915c993f4aa467b491a6c284bb569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 22 Aug 2016 03:31:33 +0700 Subject: [PATCH 1443/3599] [ivi] Add support for 720p and 1080p --- youtube_dl/extractor/ivi.py | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 472d72b4c..f5ab5f4af 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re @@ -8,7 +8,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, - sanitized_Request, + qualities, ) @@ -49,11 +49,27 @@ class IviIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', }, 'skip': 'Only works from Russia', + }, + { + # with MP4-HD720 format + 'url': 'http://www.ivi.ru/watch/146500', + 'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e', + 'info_dict': { + 'id': '146500', + 'ext': 'mp4', + 'title': 'Кукла', + 'description': 'md5:ffca9372399976a2d260a407cc74cce6', + 'duration': 5599, + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'skip': 'Only works from Russia', } ] # Sorted by quality - _KNOWN_FORMATS = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ'] + _KNOWN_FORMATS = ( + 'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', + 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080') def _real_extract(self, url): video_id = self._match_id(url) @@ -69,10 +85,9 @@ class IviIE(InfoExtractor): ] } - request = sanitized_Request( - 'http://api.digitalaccess.ru/api/json/', json.dumps(data)) video_json = self._download_json( - request, video_id, 'Downloading video JSON') + 'http://api.digitalaccess.ru/api/json/', video_id, + 'Downloading video JSON', data=json.dumps(data)) if 'error' in video_json: error = video_json['error'] @@ -84,11 +99,13 @@ class IviIE(InfoExtractor): result = video_json['result'] + quality = qualities(self._KNOWN_FORMATS) + formats = [{ 'url': x['url'], - 'format_id': x['content_format'], - 'preference': self._KNOWN_FORMATS.index(x['content_format']), - } for x in result['files'] if x['content_format'] in self._KNOWN_FORMATS] + 'format_id': x.get('content_format'), + 'quality': quality(x.get('content_format')), + } for x in result['files'] if x.get('url')] self._sort_formats(formats) From 3d897cc791781430f371da98f2f3a05a0b856c5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 22 Aug 2016 03:34:27 +0700 Subject: [PATCH 1444/3599] [ivi] Fix episode number extraction --- youtube_dl/extractor/ivi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index f5ab5f4af..7c8cb21c2 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -132,7 +132,7 @@ class IviIE(InfoExtractor): webpage, 'season number', default=None)) episode_number = int_or_none(self._search_regex( - r'<meta[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)', + r'[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)', webpage, 'episode number', default=None)) description = self._og_search_description(webpage, default=None) or self._html_search_meta( From afbab5688e837d9b1617119b1ac26b4a4e343bed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 22 Aug 2016 04:15:46 +0700 Subject: [PATCH 1445/3599] [ChangeLog] Actualize --- ChangeLog | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 383ff59ea..ee9b9500f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,16 +1,28 @@ version <unreleased> Core -+ Recognize file size strings with full unit names (for example "8.5 - megabytes") -+ Support m3u8 manifests in HTML5 multimedia tags -* Fix js_to_json(): correct octal or hexadecimal number detection +* Improve formats and subtitles extension auto calculation ++ Recognize full unit names in parse_filesize ++ Add support for m3u8 manifests in HTML5 multimedia tags +* Fix octal/hexadecimal number detection in js_to_json Extractors ++ [ivi] Add support for 720p and 1080p + [charlierose] Add new extractor (#10382) +* [1tv] Fix extraction (#9249) +* [twitch] Renew authentication +* [kaltura] Improve subtitles extension calculation ++ [zingmp3] Add support for video clips +* [zingmp3] Fix extraction (#10041) +* [kaltura] Improve subtitles extraction (#10279) +* [cultureunplugged] Fix extraction (#10330) ++ [cnn] Add support for money.cnn.com (#2797) +* [cbsnews] Fix extraction (#10362) +* [cbs] Fix extraction (#10393) + [litv] Support 'promo' URLs (#10385) * [snotr] Fix extraction (#10338) * [n-tv.de] Fix extraction (#10331) +* [globo:article] Relax URL and video id regular expressions (#10379) version 2016.08.19 From 6d2679ee26eb6ad0587d01e40ca7a17a6edd6e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 22 Aug 2016 04:17:34 +0700 Subject: [PATCH 1446/3599] release 2016.08.22 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 4 ++-- youtube_dl/version.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7af3c7099..7dcca18a1 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.19** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.22** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.19 +[debug] youtube-dl version 2016.08.22 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index ee9b9500f..a8202d3de 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.08.22 Core * Improve formats and subtitles extension auto calculation diff --git a/docs/supportedsites.md b/docs/supportedsites.md index edf192138..ca96d2b07 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -121,6 +121,7 @@ - **CDA** - **CeskaTelevize** - **channel9**: Channel 9 + - **CharlieRose** - **Chaturbate** - **Chilloutzone** - **chirbit** @@ -893,5 +894,4 @@ - **Zapiks** - **ZDF** - **ZDFChannel** - - **zingmp3:album**: mp3.zing.vn albums - - **zingmp3:song**: mp3.zing.vn songs + - **zingmp3**: mp3.zing.vn diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 691f2c591..e33d32e97 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.19' +__version__ = '2016.08.22' From 55d119e2a10ccbfadc12b9af30c495f46874c2a3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 22 Aug 2016 00:06:39 +0100 Subject: [PATCH 1447/3599] [abc:iview] Add new extractor(closes #6148) --- youtube_dl/extractor/abc.py | 63 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 5 ++- 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index b584277be..879ded88d 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -7,6 +7,8 @@ from ..utils import ( ExtractorError, js_to_json, int_or_none, + update_url_query, + parse_iso8601, ) @@ -93,3 +95,64 @@ class ABCIE(InfoExtractor): 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), } + + +class ABCIViewIE(InfoExtractor): + IE_NAME = 'abc.net.au:iview' + _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)' + + _TESTS = [{ + 'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00', + 'md5': '979d10b2939101f0d27a06b79edad536', + 'info_dict': { + 'id': 'FA1505V024S00', + 'ext': 'mp4', + 'title': 'Series 27 Ep 24', + 'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d', + 'upload_date': '20160820', + 'uploader_id': 'abc1', + 'timestamp': 1471719600, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_params = self._parse_json(self._search_regex( + r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id) + title = video_params['title'] + stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') + + formats = [] + f4m_url = stream.get('hds-unmetered') or stream['hds-metered'] + formats.extend(self._extract_f4m_formats( + update_url_query(f4m_url, {'hdcore': '3.7.0'}), + video_id, f4m_id='hds', fatal=False)) + formats.extend(self._extract_m3u8_formats(f4m_url.replace( + 'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'), + video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + self._sort_formats(formats) + + subtitles = {} + src_vtt = stream.get('captions', {}).get('src-vtt') + if src_vtt: + subtitles['en'] = [{ + 'url': src_vtt, + 'ext': 'vtt', + }] + + return { + 'id': video_id, + 'title': title, + 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage), + 'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage), + 'duration': int_or_none(video_params.get('eventDuration')), + 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), + 'series': video_params.get('seriesTitle'), + 'series_id': video_params.get('seriesHouseNumber') or video_id[:7], + 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)), + 'episode': self._html_search_meta('episode_title', webpage), + 'uploader_id': video_params.get('channel'), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b0644be11..8e405ad72 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1,7 +1,10 @@ # flake8: noqa from __future__ import unicode_literals -from .abc import ABCIE +from .abc import ( + ABCIE, + ABCIViewIE, +) from .abc7news import Abc7NewsIE from .abcnews import ( AbcNewsIE, From 96229e5f95a5be622a694b464085bdea59134ccf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 22 Aug 2016 13:56:09 +0800 Subject: [PATCH 1448/3599] [mtvservices:embedded] Update config URL All starts from #10363. The test case in mtvservices:embedded uses config.xml, while the video from #10363 and the test case in generic.py is broken. Both uses index.html for fetching the feed URL. --- youtube_dl/extractor/mtv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 2f455680e..200f340de 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -257,8 +257,8 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): def _get_feed_url(self, uri): video_id = self._id_from_uri(uri) site_id = uri.replace(video_id, '') - config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/' - 'context4/context5/config.xml'.format(site_id)) + config_url = ('http://media.mtvnservices.com/pmt-arc/e1/players/{0}/' + 'context52/config.xml'.format(site_id)) config_doc = self._download_xml(config_url, video_id) feed_node = config_doc.find('.//feed') feed_url = feed_node.text.strip().split('?')[0] From c7c43a93ba4abbd2175ab0891b63def7e25aa385 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 22 Aug 2016 07:47:25 +0100 Subject: [PATCH 1449/3599] [common] add helper method to extract akamai m3u8 and f4m formats --- youtube_dl/extractor/common.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ba4c03d3d..8ed16deee 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1765,6 +1765,18 @@ class InfoExtractor(object): entries.append(media_info) return entries + def _extract_akamai_formats(self, manifest_url, video_id): + formats = [] + f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') + formats.extend(self._extract_f4m_formats( + update_url_query(f4m_url, {'hdcore': '3.7.0'}), + video_id, f4m_id='hds', fatal=False)) + m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8') + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + return formats + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() From ad316425840315b40405a55243635fcfbcae5f19 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 22 Aug 2016 07:48:40 +0100 Subject: [PATCH 1450/3599] [nrk,abc:iview] use _extract_akamai_formats --- youtube_dl/extractor/abc.py | 10 +--------- youtube_dl/extractor/nrk.py | 14 ++------------ 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 879ded88d..c7b6df7d0 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -7,7 +7,6 @@ from ..utils import ( ExtractorError, js_to_json, int_or_none, - update_url_query, parse_iso8601, ) @@ -123,14 +122,7 @@ class ABCIViewIE(InfoExtractor): title = video_params['title'] stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') - formats = [] - f4m_url = stream.get('hds-unmetered') or stream['hds-metered'] - formats.extend(self._extract_f4m_formats( - update_url_query(f4m_url, {'hdcore': '3.7.0'}), - video_id, f4m_id='hds', fatal=False)) - formats.extend(self._extract_m3u8_formats(f4m_url.replace( - 'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'), - video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id) self._sort_formats(formats) subtitles = {} diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 6ded5bd45..ed42eb301 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -14,16 +14,6 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): - def _extract_formats(self, manifest_url, video_id, fatal=True): - formats = [] - formats.extend(self._extract_f4m_formats( - manifest_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', - video_id, f4m_id='hds', fatal=fatal)) - formats.extend(self._extract_m3u8_formats(manifest_url.replace( - 'akamaihd.net/z/', 'akamaihd.net/i/').replace('/manifest.f4m', '/master.m3u8'), - video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=fatal)) - return formats - def _real_extract(self, url): video_id = self._match_id(url) @@ -45,7 +35,7 @@ class NRKBaseIE(InfoExtractor): asset_url = asset.get('url') if not asset_url: continue - formats = self._extract_formats(asset_url, video_id, fatal=False) + formats = self._extract_akamai_formats(asset_url, video_id) if not formats: continue self._sort_formats(formats) @@ -69,7 +59,7 @@ class NRKBaseIE(InfoExtractor): if not entries: media_url = data.get('mediaUrl') if media_url: - formats = self._extract_formats(media_url, video_id) + formats = self._extract_akamai_formats(media_url, video_id) self._sort_formats(formats) duration = parse_duration(data.get('duration')) entries = [{ From 7367bdef23a3db4691ba99f01613b7759340f05e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 22 Aug 2016 23:10:06 +0100 Subject: [PATCH 1451/3599] [awaan] fix extraction, modernize, rename the extractors and add test for live stream --- youtube_dl/extractor/{dcn.py => awaan.py} | 89 +++++++++++------------ youtube_dl/extractor/extractors.py | 12 +-- 2 files changed, 50 insertions(+), 51 deletions(-) rename youtube_dl/extractor/{dcn.py => awaan.py} (75%) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/awaan.py similarity index 75% rename from youtube_dl/extractor/dcn.py rename to youtube_dl/extractor/awaan.py index b8542820a..bdf23c6a9 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/awaan.py @@ -12,46 +12,41 @@ from ..compat import ( from ..utils import ( int_or_none, parse_iso8601, - sanitized_Request, smuggle_url, unsmuggle_url, urlencode_postdata, ) -class DCNIE(InfoExtractor): +class AWAANIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?' def _real_extract(self, url): show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() if video_id and int(video_id) > 0: return self.url_result( - 'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo') + 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') elif season_id and int(season_id) > 0: return self.url_result(smuggle_url( - 'http://www.dcndigital.ae/program/season/%s' % season_id, - {'show_id': show_id}), 'DCNSeason') + 'http://awaan.ae/program/season/%s' % season_id, + {'show_id': show_id}), 'AWAANSeason') else: return self.url_result( - 'http://www.dcndigital.ae/program/%s' % show_id, 'DCNSeason') + 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason') -class DCNBaseIE(InfoExtractor): - def _extract_video_info(self, video_data, video_id, is_live): +class AWAANBaseIE(InfoExtractor): + def _parse_video_data(self, video_data, video_id, is_live): title = video_data.get('title_en') or video_data['title_ar'] img = video_data.get('img') - thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None - duration = int_or_none(video_data.get('duration')) - description = video_data.get('description_en') or video_data.get('description_ar') - timestamp = parse_iso8601(video_data.get('create_time'), ' ') return { 'id': video_id, 'title': self._live_title(title) if is_live else title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, + 'description': video_data.get('description_en') or video_data.get('description_ar'), + 'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None, + 'duration': int_or_none(video_data.get('duration')), + 'timestamp': parse_iso8601(video_data.get('create_time'), ' '), 'is_live': is_live, } @@ -75,11 +70,12 @@ class DCNBaseIE(InfoExtractor): return formats -class DCNVideoIE(DCNBaseIE): - IE_NAME = 'dcn:video' +class AWAANVideoIE(AWAANBaseIE): + IE_NAME = 'awaan:video' _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375', + 'md5': '5f61c33bfc7794315c671a62d43116aa', 'info_dict': { 'id': '17375', @@ -90,10 +86,6 @@ class DCNVideoIE(DCNBaseIE): 'timestamp': 1227504126, 'upload_date': '20081124', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, }, { 'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', 'only_matching': True, @@ -102,11 +94,10 @@ class DCNVideoIE(DCNBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - request = sanitized_Request( + video_data = self._download_json( 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, - headers={'Origin': 'http://www.dcndigital.ae'}) - video_data = self._download_json(request, video_id) - info = self._extract_video_info(video_data, video_id, False) + video_id, headers={'Origin': 'http://awaan.ae'}) + info = self._parse_video_data(video_data, video_id, False) webpage = self._download_webpage( 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + @@ -121,19 +112,31 @@ class DCNVideoIE(DCNBaseIE): return info -class DCNLiveIE(DCNBaseIE): - IE_NAME = 'dcn:live' +class AWAANLiveIE(AWAANBaseIE): + IE_NAME = 'awaan:live' _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)' + _TEST = { + 'url': 'http://awaan.ae/live/6/dubai-tv', + 'info_dict': { + 'id': '6', + 'ext': 'mp4', + 'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'upload_date': '20150107', + 'timestamp': 1420588800, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } def _real_extract(self, url): channel_id = self._match_id(url) - request = sanitized_Request( + channel_data = self._download_json( 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, - headers={'Origin': 'http://www.dcndigital.ae'}) - - channel_data = self._download_json(request, channel_id) - info = self._extract_video_info(channel_data, channel_id, True) + channel_id, headers={'Origin': 'http://awaan.ae'}) + info = self._parse_video_data(channel_data, channel_id, True) webpage = self._download_webpage( 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + @@ -148,8 +151,8 @@ class DCNLiveIE(DCNBaseIE): return info -class DCNSeasonIE(InfoExtractor): - IE_NAME = 'dcn:season' +class AWAANSeasonIE(InfoExtractor): + IE_NAME = 'awaan:season' _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))' _TEST = { 'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A', @@ -170,21 +173,17 @@ class DCNSeasonIE(InfoExtractor): data['season'] = season_id show_id = smuggled_data.get('show_id') if show_id is None: - request = sanitized_Request( + season = self._download_json( 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, - headers={'Origin': 'http://www.dcndigital.ae'}) - season = self._download_json(request, season_id) + season_id, headers={'Origin': 'http://awaan.ae'}) show_id = season['id'] data['show_id'] = show_id - request = sanitized_Request( + show = self._download_json( 'http://admin.mangomolo.com/analytics/index.php/plus/show', - urlencode_postdata(data), - { - 'Origin': 'http://www.dcndigital.ae', + show_id, data=urlencode_postdata(data), headers={ + 'Origin': 'http://awaan.ae', 'Content-Type': 'application/x-www-form-urlencoded' }) - - show = self._download_json(request, show_id) if not season_id: season_id = show['default_season'] for season in show['seasons']: @@ -195,6 +194,6 @@ class DCNSeasonIE(InfoExtractor): for video in show['videos']: video_id = compat_str(video['id']) entries.append(self.url_result( - 'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo', video_id)) + 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id)) return self.playlist_result(entries, season_id, title) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8e405ad72..04cd23bdb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -71,6 +71,12 @@ from .atttechchannel import ATTTechChannelIE from .audimedia import AudiMediaIE from .audioboom import AudioBoomIE from .audiomack import AudiomackIE, AudiomackAlbumIE +from .awaan import ( + AWAANIE, + AWAANVideoIE, + AWAANLiveIE, + AWAANSeasonIE, +) from .azubu import AzubuIE, AzubuLiveIE from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE @@ -200,12 +206,6 @@ from .daum import ( DaumUserIE, ) from .dbtv import DBTVIE -from .dcn import ( - DCNIE, - DCNVideoIE, - DCNLiveIE, - DCNSeasonIE, -) from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .democracynow import DemocracynowIE From 3083e4dc070d6378456f9b20ebd5cbf9ee9d92af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 23 Aug 2016 07:22:14 +0700 Subject: [PATCH 1452/3599] [eagleplatform] Improve detection of embedded videos (Closes #10409) --- youtube_dl/extractor/eagleplatform.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 12d28d3b9..d4dfda8cd 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -52,11 +52,24 @@ class EaglePlatformIE(InfoExtractor): @staticmethod def _extract_url(webpage): + # Regular iframe embedding mobj = re.search( r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1', webpage) if mobj is not None: return mobj.group('url') + # Basic usage embedding (see http://dultonmedia.github.io/eplayer/) + mobj = re.search( + r'''(?xs) + <script[^>]+ + src=(?P<q1>["\'])(?:https?:)?//(?P<host>.+?\.media\.eagleplatform\.com)/player/player\.js(?P=q1) + .+? + <div[^>]+ + class=(?P<q2>["\'])eagleplayer(?P=q2)[^>]+ + data-id=["\'](?P<id>\d+) + ''', webpage) + if mobj is not None: + return 'eagleplatform:%(host)s:%(id)s' % mobj.groupdict() @staticmethod def _handle_error(response): From fb009b7f534e600e98b93e062198ade5826b5800 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 23 Aug 2016 10:28:28 +0100 Subject: [PATCH 1453/3599] [bravotv] correct clip info extraction and add support for adobe pass auth(closes #10407) --- youtube_dl/extractor/bravotv.py | 81 +++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/bravotv.py b/youtube_dl/extractor/bravotv.py index 541c76944..31763b4c6 100644 --- a/youtube_dl/extractor/bravotv.py +++ b/youtube_dl/extractor/bravotv.py @@ -1,31 +1,74 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import smuggle_url +from .adobepass import AdobePassIE +from ..utils import ( + smuggle_url, + update_url_query, + int_or_none, +) -class BravoTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)' - _TEST = { +class BravoTVIE(AdobePassIE): + _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)' + _TESTS = [{ 'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale', - 'md5': 'd60cdf68904e854fac669bd26cccf801', + 'md5': '9086d0b7ef0ea2aabc4781d75f4e5863', 'info_dict': { - 'id': 'LitrBdX64qLn', + 'id': 'zHyk1_HU_mPy', 'ext': 'mp4', - 'title': 'Last Chance Kitchen Returns', - 'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13', - 'timestamp': 1448926740, - 'upload_date': '20151130', + 'title': 'LCK Ep 12: Fishy Finale', + 'description': 'S13/E12: Two eliminated chefs have just 12 minutes to cook up a delicious fish dish.', 'uploader': 'NBCU-BRAV', + 'upload_date': '20160302', + 'timestamp': 1456945320, } - } + }, { + 'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', + 'only_matching': True, + }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid') - release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid') - return self.url_result(smuggle_url( - 'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid), - {'force_smil_url': True}), 'ThePlatform', release_pid) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + settings = self._parse_json(self._search_regex( + r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'), + display_id) + info = {} + query = { + 'mbr': 'true', + } + account_pid, release_pid = [None] * 2 + tve = settings.get('sharedTVE') + if tve: + query['manifest'] = 'm3u' + account_pid = 'HNK2IC' + release_pid = tve['release_pid'] + if tve.get('entitlement') == 'auth': + adobe_pass = settings.get('adobePass', {}) + resource = self._get_mvpd_resource( + adobe_pass.get('adobePassResourceId', 'bravo'), + tve['title'], release_pid, tve.get('rating')) + query['auth'] = self._extract_mvpd_auth( + url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource) + else: + shared_playlist = settings['shared_playlist'] + account_pid = shared_playlist['account_pid'] + metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']] + release_pid = metadata['release_pid'] + info.update({ + 'title': metadata['title'], + 'description': metadata.get('description'), + 'season_number': int_or_none(metadata.get('season_num')), + 'episode_number': int_or_none(metadata.get('episode_num')), + }) + query['switch'] = 'progressive' + info.update({ + '_type': 'url_transparent', + 'id': release_pid, + 'url': smuggle_url(update_url_query( + 'http://link.theplatform.com/s/%s/%s' % (account_pid, release_pid), + query), {'force_smil_url': True}), + 'ie_key': 'ThePlatform', + }) + return info From 18b6216150fa39d5e3cdbf353339e1c010bcee8d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 23 Aug 2016 21:55:58 +0800 Subject: [PATCH 1454/3599] [openload] Fix extraction (closes #10408) Thanks @yokrysty for the algorithm --- ChangeLog | 6 +++ youtube_dl/extractor/openload.py | 81 +++++++------------------------- 2 files changed, 24 insertions(+), 63 deletions(-) diff --git a/ChangeLog b/ChangeLog index a8202d3de..651d4d5d7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [openload] Fix extraction (#10408) + + version 2016.08.22 Core diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 4e80ca9ff..e181d0b3a 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,12 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals, division -import math - from .common import InfoExtractor -from ..compat import compat_chr +from ..compat import ( + compat_chr, + compat_ord, +) from ..utils import ( - decode_png, determine_ext, ExtractorError, ) @@ -42,71 +42,26 @@ class OpenloadIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) - if 'File not found' in webpage: + if 'File not found' in webpage or 'deleted by the owner' in webpage: raise ExtractorError('File not found', expected=True) - # The following extraction logic is proposed by @Belderak and @gdkchan - # and declared to be used freely in youtube-dl - # See https://github.com/rg3/youtube-dl/issues/9706 + # The following decryption algorithm is written by @yokrysty and + # declared to be freely used in youtube-dl + # See https://github.com/rg3/youtube-dl/issues/10408 + enc_data = self._html_search_regex( + r'<span[^>]+id="hiddenurl"[^>]*>([^<]+)</span>', webpage, 'encrypted data') - numbers_js = self._download_webpage( - 'https://openload.co/assets/js/obfuscator/n.js', video_id, - note='Downloading signature numbers') - signums = self._search_regex( - r'window\.signatureNumbers\s*=\s*[\'"](?P<data>[a-z]+)[\'"]', - numbers_js, 'signature numbers', group='data') + video_url_chars = [] - linkimg_uri = self._search_regex( - r'<img[^>]+id="linkimg"[^>]+src="([^"]+)"', webpage, 'link image') - linkimg = self._request_webpage( - linkimg_uri, video_id, note=False).read() + for c in enc_data: + j = compat_ord(c) + if j >= 33 and j <= 126: + j = ((j + 14) % 94) + 33 + video_url_chars += compat_chr(j) - width, height, pixels = decode_png(linkimg) - - output = '' - for y in range(height): - for x in range(width): - r, g, b = pixels[y][3 * x:3 * x + 3] - if r == 0 and g == 0 and b == 0: - break - else: - output += compat_chr(r) - output += compat_chr(g) - output += compat_chr(b) - - img_str_length = len(output) // 200 - img_str = [[0 for x in range(img_str_length)] for y in range(10)] - - sig_str_length = len(signums) // 260 - sig_str = [[0 for x in range(sig_str_length)] for y in range(10)] - - for i in range(10): - for j in range(img_str_length): - begin = i * img_str_length * 20 + j * 20 - img_str[i][j] = output[begin:begin + 20] - for j in range(sig_str_length): - begin = i * sig_str_length * 26 + j * 26 - sig_str[i][j] = signums[begin:begin + 26] - - parts = [] - # TODO: find better names for str_, chr_ and sum_ - str_ = '' - for i in [2, 3, 5, 7]: - str_ = '' - sum_ = float(99) - for j in range(len(sig_str[i])): - for chr_idx in range(len(img_str[i][j])): - if sum_ > float(122): - sum_ = float(98) - chr_ = compat_chr(int(math.floor(sum_))) - if sig_str[i][j][chr_idx] == chr_ and j >= len(str_): - sum_ += float(2.5) - str_ += img_str[i][j][chr_idx] - parts.append(str_.replace(',', '')) - - video_url = 'https://openload.co/stream/%s~%s~%s~%s' % (parts[3], parts[1], parts[2], parts[0]) + video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) title = self._og_search_title(webpage, default=None) or self._search_regex( r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, From ccb6570e9e625ff5e9adf88729e745acadcaff0e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 23 Aug 2016 17:31:08 +0100 Subject: [PATCH 1455/3599] [syfy,bravotv] restrict drupal settings regex --- youtube_dl/extractor/bravotv.py | 2 +- youtube_dl/extractor/syfy.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bravotv.py b/youtube_dl/extractor/bravotv.py index 31763b4c6..a25d500e4 100644 --- a/youtube_dl/extractor/bravotv.py +++ b/youtube_dl/extractor/bravotv.py @@ -32,7 +32,7 @@ class BravoTVIE(AdobePassIE): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) settings = self._parse_json(self._search_regex( - r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'), + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'), display_id) info = {} query = { diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index cc81f6003..ab8bab5cd 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -31,7 +31,7 @@ class SyfyIE(AdobePassIE): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) syfy_mpx = list(self._parse_json(self._search_regex( - r'jQuery\.extend\([^,]+,\s*({.+})\);', webpage, 'drupal settings'), + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'), display_id)['syfy']['syfy_mpx'].values())[0] video_id = syfy_mpx['mpxGUID'] title = syfy_mpx['episodeTitle'] From 1212e9972fce69df6bd871a5c301294427299cbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 00:25:21 +0700 Subject: [PATCH 1456/3599] [youtube] Fix authentication (#10392) --- youtube_dl/extractor/youtube.py | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 268080ba6..38556d86e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -91,36 +91,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if login_page is False: return - galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"', - login_page, 'Login GALX parameter') + login_form = self._hidden_inputs(login_page) - # Log in - login_form_strs = { - 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', + login_form.update({ 'Email': username, - 'GALX': galx, 'Passwd': password, - - 'PersistentCookie': 'yes', - '_utf8': '霱', - 'bgresponse': 'js_disabled', - 'checkConnection': '', - 'checkedDomains': 'youtube', - 'dnConn': '', - 'pstMsg': '0', - 'rmShown': '1', - 'secTok': '', - 'signIn': 'Sign in', - 'timeStmp': '', - 'service': 'youtube', - 'uilel': '3', - 'hl': 'en_US', - } + }) login_results = self._download_webpage( self._PASSWORD_CHALLENGE_URL, None, note='Logging in', errnote='unable to log in', fatal=False, - data=urlencode_postdata(login_form_strs)) + data=urlencode_postdata(login_form)) if login_results is False: return False From 05bddcc512cd5058f1af1d5985979b70bdcf4711 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 01:29:50 +0700 Subject: [PATCH 1457/3599] [youtube] Fix authentication (2) (Closes #10392) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 38556d86e..d5d5b7334 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -94,6 +94,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): login_form = self._hidden_inputs(login_page) login_form.update({ + 'checkConnection': 'youtube', 'Email': username, 'Passwd': password, }) From 6e52bbb41320e1b6f4b7a16a5e651d945ac14611 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 01:36:27 +0700 Subject: [PATCH 1458/3599] [ChangeLog] Actualize --- ChangeLog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 651d4d5d7..07ab5867f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,14 @@ version <unreleased> Extractors +* [youtube] Fix authentication (#10392) * [openload] Fix extraction (#10408) ++ [bravotv] Add support for Adobe Pass (#10407) +* [bravotv] Fix clip info extraction (#10407) +* [eagleplatform] Improve embedded videos detection (#10409) +* [awaan] Fix extraction +* [mtvservices:embedded] Update config URL ++ [abc:iview] Add extractor (#6148) version 2016.08.22 From c86f51ee38b2063ad4eec2f0bb6e3d3551be0855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 01:38:46 +0700 Subject: [PATCH 1459/3599] release 2016.08.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 9 +++++---- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7dcca18a1..00f593783 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.22*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.22** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.22 +[debug] youtube-dl version 2016.08.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 07ab5867f..b4f6dbe08 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.08.24 Extractors * [youtube] Fix authentication (#10392) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index ca96d2b07..08db56fa9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -16,6 +16,7 @@ - **9gag** - **9now.com.au** - **abc.net.au** + - **abc.net.au:iview** - **Abc7News** - **abcnews** - **abcnews:video** @@ -66,6 +67,10 @@ - **audiomack** - **audiomack:album** - **auroravid**: AuroraVid + - **AWAAN** + - **awaan:live** + - **awaan:season** + - **awaan:video** - **Azubu** - **AzubuLive** - **BaiduVideo**: 百度视频 @@ -172,10 +177,6 @@ - **daum.net:playlist** - **daum.net:user** - **DBTV** - - **DCN** - - **dcn:live** - - **dcn:season** - - **dcn:video** - **DctpTv** - **DeezerPlaylist** - **defense.gouv.fr** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e33d32e97..c1194124e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.22' +__version__ = '2016.08.24' From 8c3e35dd441ceed682da885368f5cd97afb1816e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 08:41:52 +0700 Subject: [PATCH 1460/3599] [pluralsight] Add support for subtitles (Closes #9681) --- youtube_dl/extractor/pluralsight.py | 74 ++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 9aab77645..afd3217d9 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -1,9 +1,10 @@ from __future__ import unicode_literals -import re -import json -import random import collections +import json +import os +import random +import re from .common import InfoExtractor from ..compat import ( @@ -12,10 +13,12 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + float_or_none, int_or_none, parse_duration, qualities, sanitized_Request, + srt_subtitles_timecode, urlencode_postdata, ) @@ -91,6 +94,51 @@ class PluralsightIE(PluralsightBaseIE): if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')): raise ExtractorError('Unable to log in') + def _get_subtitles(self, author, clip_id, lang, name, duration, video_id): + captions_post = { + 'a': author, + 'cn': clip_id, + 'lc': lang, + 'm': name, + } + captions = self._download_json( + '%s/training/Player/Captions' % self._API_BASE, video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False, data=json.dumps(captions_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) + if captions: + return { + lang: [{ + 'ext': 'json', + 'data': json.dumps(captions), + }, { + 'ext': 'srt', + 'data': self._convert_subtitles(duration, captions), + }] + } + + @staticmethod + def _convert_subtitles(duration, subs): + srt = '' + for num, current in enumerate(subs): + current = subs[num] + start, text = float_or_none( + current.get('DisplayTimeOffset')), current.get('Text') + if start is None or text is None: + continue + end = duration if num == len(subs) - 1 else float_or_none( + subs[num + 1].get('DisplayTimeOffset')) + srt += os.linesep.join( + ( + '%d' % num, + '%s --> %s' % ( + srt_subtitles_timecode(start), + srt_subtitles_timecode(end)), + text, + os.linesep, + )) + return srt + def _real_extract(self, url): qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) @@ -138,6 +186,8 @@ class PluralsightIE(PluralsightBaseIE): if not clip: raise ExtractorError('Unable to resolve clip') + title = '%s - %s' % (module['title'], clip['title']) + QUALITIES = { 'low': {'width': 640, 'height': 480}, 'medium': {'width': 848, 'height': 640}, @@ -225,18 +275,20 @@ class PluralsightIE(PluralsightBaseIE): formats.append(f) self._sort_formats(formats) - # TODO: captions - # http://www.pluralsight.com/training/Player/ViewClip + cap = true - # or - # http://www.pluralsight.com/training/Player/Captions - # { a = author, cn = clip_id, lc = end, m = name } + duration = int_or_none( + clip.get('duration')) or parse_duration(clip.get('formattedDuration')) + + # TODO: other languages? + subtitles = self.extract_subtitles( + author, clip_id, 'en', name, duration, display_id) return { 'id': clip.get('clipName') or clip['name'], - 'title': '%s - %s' % (module['title'], clip['title']), - 'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')), + 'title': title, + 'duration': duration, 'creator': author, - 'formats': formats + 'formats': formats, + 'subtitles': subtitles, } From 30317f4887178082809706ce8ac9cb989014c8fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 08:52:12 +0700 Subject: [PATCH 1461/3599] [pluralsight] Modernize and make more robust --- youtube_dl/extractor/pluralsight.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index afd3217d9..ea5caefa9 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -17,7 +17,6 @@ from ..utils import ( int_or_none, parse_duration, qualities, - sanitized_Request, srt_subtitles_timecode, urlencode_postdata, ) @@ -78,12 +77,10 @@ class PluralsightIE(PluralsightBaseIE): if not post_url.startswith('http'): post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) - request = sanitized_Request( - post_url, urlencode_postdata(login_form)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') - response = self._download_webpage( - request, None, 'Logging in as %s' % username) + post_url, None, 'Logging in as %s' % username, + data=urlencode_postdata(login_form), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) error = self._search_regex( r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>', @@ -128,6 +125,8 @@ class PluralsightIE(PluralsightBaseIE): continue end = duration if num == len(subs) - 1 else float_or_none( subs[num + 1].get('DisplayTimeOffset')) + if end is None: + continue srt += os.linesep.join( ( '%d' % num, @@ -246,13 +245,12 @@ class PluralsightIE(PluralsightBaseIE): 'mt': ext, 'q': '%dx%d' % (f['width'], f['height']), } - request = sanitized_Request( - '%s/training/Player/ViewClip' % self._API_BASE, - json.dumps(clip_post).encode('utf-8')) - request.add_header('Content-Type', 'application/json;charset=utf-8') format_id = '%s-%s' % (ext, quality) clip_url = self._download_webpage( - request, display_id, 'Downloading %s URL' % format_id, fatal=False) + '%s/training/Player/ViewClip' % self._API_BASE, display_id, + 'Downloading %s URL' % format_id, fatal=False, + data=json.dumps(clip_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) # Pluralsight tracks multiple sequential calls to ViewClip API and start # to return 429 HTTP errors after some time (see From 6d94cbd2f43548575b32907724f48331df1693ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 10:07:06 +0700 Subject: [PATCH 1462/3599] [ChangeLog] Actualize --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index b4f6dbe08..b63f49ae1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors ++ [pluralsight] Add support for subtitles (#9681) + + version 2016.08.24 Extractors From d38b27dd9b108a7518dd291c5c231a53abd3f2df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 24 Aug 2016 10:11:04 +0700 Subject: [PATCH 1463/3599] release 2016.08.24.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 00f593783..15acc025a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.24.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.24.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.24 +[debug] youtube-dl version 2016.08.24.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index b63f49ae1..4f3f1265f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.08.24.1 Extractors + [pluralsight] Add support for subtitles (#9681) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c1194124e..7447d3d7e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.24' +__version__ = '2016.08.24.1' From 97653f81b2565c752f2c107fc44167a93c3eef42 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 24 Aug 2016 21:18:56 +0800 Subject: [PATCH 1464/3599] [bilibili] Mark as broken Bilibili now uses emscripten, which is very difficult for reverse engineering. I don't expect it to be fixed in near future, so I mark it as broken. Ref: #10375 --- youtube_dl/extractor/bilibili.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index d8eb71821..d87c38a02 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -21,6 +21,8 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)' _TESTS = [{ From 0c75abbb7bb9135d145805e86c87a5a43b69ac15 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 24 Aug 2016 23:58:22 +0800 Subject: [PATCH 1465/3599] [mtvservices:embedded] Use another endpoint to get feed URL Closes #10363 In the original mtvservices:embedded test case, config.xml is still used to get the feed URL. Some other examples, including test_Generic_40 (http://www.vulture.com/2016/06/new-key-peele-sketches-released.html), and the video mentioned in #10363, use another endpoint to get the feed URL. The 'index.html' approach works for the original test case, too. So I didn't keep the old approach. --- ChangeLog | 6 ++++++ youtube_dl/extractor/bet.py | 5 ++--- youtube_dl/extractor/mtv.py | 27 +++++++++++++-------------- youtube_dl/extractor/nick.py | 5 ++--- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4f3f1265f..c3cc8f38f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363) + + version 2016.08.24.1 Extractors diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index bd3ee2e2e..1f8ef0303 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor from ..utils import unified_strdate -from ..compat import compat_urllib_parse_urlencode class BetIE(MTVServicesInfoExtractor): @@ -53,9 +52,9 @@ class BetIE(MTVServicesInfoExtractor): _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" def _get_feed_query(self, uri): - return compat_urllib_parse_urlencode({ + return { 'uuid': uri, - }) + } def _extract_mgid(self, webpage): return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid') diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 200f340de..bdda68819 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -4,7 +4,6 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlencode, compat_str, compat_xpath, ) @@ -14,12 +13,13 @@ from ..utils import ( fix_xml_ampersands, float_or_none, HEADRequest, + RegexNotFoundError, sanitized_Request, strip_or_none, timeconvert, unescapeHTML, + update_url_query, url_basename, - RegexNotFoundError, xpath_text, ) @@ -36,6 +36,11 @@ class MTVServicesInfoExtractor(InfoExtractor): def _id_from_uri(uri): return uri.split(':')[-1] + @staticmethod + def _remove_template_parameter(url): + # Remove the templates, like &device={device} + return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) + # This was originally implemented for ComedyCentral, but it also works here @classmethod def _transform_rtmp_url(cls, rtmp_video_url): @@ -117,9 +122,7 @@ class MTVServicesInfoExtractor(InfoExtractor): video_id = self._id_from_uri(uri) self.report_extraction(video_id) content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))) - mediagen_url = content_el.attrib['url'] - # Remove the templates, like &device={device} - mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url) + mediagen_url = self._remove_template_parameter(content_el.attrib['url']) if 'acceptMethods' not in mediagen_url: mediagen_url += '&' if '?' in mediagen_url else '?' mediagen_url += 'acceptMethods=fms' @@ -178,12 +181,12 @@ class MTVServicesInfoExtractor(InfoExtractor): data = {'uri': uri} if self._LANG: data['lang'] = self._LANG - return compat_urllib_parse_urlencode(data) + return data def _get_videos_info(self, uri): video_id = self._id_from_uri(uri) feed_url = self._get_feed_url(uri) - info_url = feed_url + '?' + self._get_feed_query(uri) + info_url = update_url_query(feed_url, self._get_feed_query(uri)) return self._get_videos_info_from_url(info_url, video_id) def _get_videos_info_from_url(self, url, video_id): @@ -256,13 +259,9 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): def _get_feed_url(self, uri): video_id = self._id_from_uri(uri) - site_id = uri.replace(video_id, '') - config_url = ('http://media.mtvnservices.com/pmt-arc/e1/players/{0}/' - 'context52/config.xml'.format(site_id)) - config_doc = self._download_xml(config_url, video_id) - feed_node = config_doc.find('.//feed') - feed_url = feed_node.text.strip().split('?')[0] - return feed_url + config = self._download_json( + 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id) + return self._remove_template_parameter(config['feedWithQueryParams']) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 9c54846e1..64730a624 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import update_url_query @@ -59,10 +58,10 @@ class NickIE(MTVServicesInfoExtractor): }] def _get_feed_query(self, uri): - return compat_urllib_parse_urlencode({ + return { 'feed': 'nick_arc_player_prime', 'mgid': uri, - }) + } def _extract_mgid(self, webpage): return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid') From 08773689f37341f8c70c3fd298f5910235b8c151 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 25 Aug 2016 01:29:32 +0800 Subject: [PATCH 1466/3599] [kickstarter] Silent the warning for og:description Closes #10415 --- youtube_dl/extractor/kickstarter.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index 9f1ade2e4..c61e78622 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -37,7 +37,6 @@ class KickStarterIE(InfoExtractor): 'ext': 'mp4', 'title': 'Power Drive 2000', }, - 'expected_warnings': ['OpenGraph description'], }] def _real_extract(self, url): @@ -67,6 +66,6 @@ class KickStarterIE(InfoExtractor): 'id': video_id, 'url': video_url, 'title': title, - 'description': self._og_search_description(webpage), + 'description': self._og_search_description(webpage, default=None), 'thumbnail': thumbnail, } From 0c6422cdd649c6f39cb2d8680e29f91da18d8c57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 25 Aug 2016 07:34:55 +0700 Subject: [PATCH 1467/3599] [README.md] Add FAQ entry for streaming to player --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a10aaf35c..52e53803e 100644 --- a/README.md +++ b/README.md @@ -730,7 +730,7 @@ Videos or video formats streamed via RTMP protocol can only be downloaded when [ ### I have downloaded a video but how can I play it? -Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/). +Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](http://www.videolan.org/) or [mplayer](http://www.mplayerhq.hu/). ### I extracted a video URL with `-g`, but it does not play on another machine / in my webbrowser. @@ -816,6 +816,12 @@ Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. N Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare). +### How do I stream directly to media player? + +You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with: + + youtube-dl -o - http://www.youtube.com/watch?v=BaW_jenozKcj | vlc - + ### Can you add support for this anime video site, or site which shows current movies for free? As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl. From 073ac1225f6fe28905e11f29f2d23f4b4db50f9c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 08:33:16 +0100 Subject: [PATCH 1468/3599] [utils] add ac-3 to the list of audio codecs in parse_codecs --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 41ca562f1..1091f17f3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2183,7 +2183,7 @@ def parse_codecs(codecs_str): if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'): if not vcodec: vcodec = full_codec - elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac'): + elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3'): if not acodec: acodec = full_codec else: From 07ea9c9b05359aef14472dfa66a6578d21c88e96 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 08:37:41 +0100 Subject: [PATCH 1469/3599] [downloader/hls] fill IV with zeros for IVs shorter than 16-octet --- youtube_dl/downloader/hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8d7971e5d..8dd1b898e 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -120,7 +120,7 @@ class HlsFD(FragmentFD): decrypt_info = parse_m3u8_attributes(line[11:]) if decrypt_info['METHOD'] == 'AES-128': if 'IV' in decrypt_info: - decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:]) + decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) if not re.match(r'^https?://', decrypt_info['URI']): decrypt_info['URI'] = compat_urlparse.urljoin( man_url, decrypt_info['URI']) From f39ffc5877e4e9f112fa26ff21079f179b4aec46 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 08:39:23 +0100 Subject: [PATCH 1470/3599] [common] extract formats from #EXT-X-MEDIA tags --- youtube_dl/extractor/common.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8ed16deee..da0af29ec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1202,30 +1202,45 @@ class InfoExtractor(object): 'preference': preference, }] last_info = None - last_media = None for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): last_info = parse_m3u8_attributes(line) elif line.startswith('#EXT-X-MEDIA:'): - last_media = parse_m3u8_attributes(line) + media = parse_m3u8_attributes(line) + media_type = media.get('TYPE') + if media_type in ('VIDEO', 'AUDIO'): + media_url = media.get('URI') + if media_url: + format_id = [] + for v in (media.get('GROUP-ID'), media.get('NAME')): + if v: + format_id.append(v) + formats.append({ + 'format_id': '-'.join(format_id), + 'url': format_url(media_url), + 'language': media.get('LANGUAGE'), + 'vcodec': 'none' if media_type == 'AUDIO' else None, + 'ext': ext, + 'protocol': entry_protocol, + 'preference': preference, + }) elif line.startswith('#') or not line.strip(): continue else: if last_info is None: formats.append({'url': format_url(line)}) continue - tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000) + tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000) format_id = [] if m3u8_id: format_id.append(m3u8_id) - last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None - # Despite specification does not mention NAME attribute for - # EXT-X-STREAM-INF it still sometimes may be present - stream_name = last_info.get('NAME') or last_media_name # Bandwidth of live streams may differ over time thus making # format_id unpredictable. So it's better to keep provided # format_id intact. if not live: + # Despite specification does not mention NAME attribute for + # EXT-X-STREAM-INF it still sometimes may be present + stream_name = last_info.get('NAME') format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats))) f = { 'format_id': '-'.join(format_id), @@ -1252,9 +1267,6 @@ class InfoExtractor(object): 'abr': abr, }) f.update(parse_codecs(last_info.get('CODECS'))) - if last_media is not None: - f['m3u8_media'] = last_media - last_media = None formats.append(f) last_info = {} return formats From 75fa990dc669563b51f22eeddd2f33acc41c8599 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 08:46:54 +0100 Subject: [PATCH 1471/3599] [YoutubeDL] add fallback value for thumbnails values in thumbnails sorting --- youtube_dl/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 0b3e3da82..c499c1da4 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1256,8 +1256,8 @@ class YoutubeDL(object): info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] if thumbnails: thumbnails.sort(key=lambda t: ( - t.get('preference'), t.get('width'), t.get('height'), - t.get('id'), t.get('url'))) + t.get('preference') or -1, t.get('width') or -1, t.get('height') or -1, + t.get('id') or '', t.get('url'))) for i, t in enumerate(thumbnails): t['url'] = sanitize_url(t['url']) if t.get('width') and t.get('height'): From 30afe4aeb25576225d3f3ca486983b5ad9258aa0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 08:49:15 +0100 Subject: [PATCH 1472/3599] [cbc] Add support for watch.cbc.ca --- youtube_dl/extractor/cbc.py | 172 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 2 + 2 files changed, 174 insertions(+) diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py index a87e97140..d71fddf58 100644 --- a/youtube_dl/extractor/cbc.py +++ b/youtube_dl/extractor/cbc.py @@ -9,10 +9,19 @@ from ..utils import ( js_to_json, smuggle_url, try_get, + xpath_text, + xpath_element, + xpath_with_ns, + find_xpath_attr, + parse_iso8601, + parse_age_limit, + int_or_none, + ExtractorError, ) class CBCIE(InfoExtractor): + IE_NAME = 'cbc.ca' _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)' _TESTS = [{ # with mediaId @@ -114,6 +123,7 @@ class CBCIE(InfoExtractor): class CBCPlayerIE(InfoExtractor): + IE_NAME = 'cbc.ca:player' _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.cbc.ca/player/play/2683190193', @@ -167,3 +177,165 @@ class CBCPlayerIE(InfoExtractor): }), 'id': video_id, } + + +class CBCWatchBaseIE(InfoExtractor): + _device_id = None + _device_token = None + _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/' + _NS_MAP = { + 'media': 'http://search.yahoo.com/mrss/', + 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/', + } + + def _call_api(self, path, video_id): + url = path if path.startswith('http') else self._API_BASE_URL + path + result = self._download_xml(url, video_id, headers={ + 'X-Clearleap-DeviceId': self._device_id, + 'X-Clearleap-DeviceToken': self._device_token, + }) + error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage') + if error_message: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message)) + return result + + def _real_initialize(self): + if not self._device_id or not self._device_token: + device = self._downloader.cache.load('cbcwatch', 'device') or {} + self._device_id, self._device_token = device.get('id'), device.get('token') + if not self._device_id or not self._device_token: + result = self._download_xml( + self._API_BASE_URL + 'device/register', + None, data=b'<device><type>web</type></device>') + self._device_id = xpath_text(result, 'deviceId', fatal=True) + self._device_token = xpath_text(result, 'deviceToken', fatal=True) + self._downloader.cache.store( + 'cbcwatch', 'device', { + 'id': self._device_id, + 'token': self._device_token, + }) + + def _parse_rss_feed(self, rss): + channel = xpath_element(rss, 'channel', fatal=True) + + def _add_ns(path): + return xpath_with_ns(path, self._NS_MAP) + + entries = [] + for item in channel.findall('item'): + guid = xpath_text(item, 'guid', fatal=True) + title = xpath_text(item, 'title', fatal=True) + + media_group = xpath_element(item, _add_ns('media:group'), fatal=True) + content = xpath_element(media_group, _add_ns('media:content'), fatal=True) + content_url = content.attrib['url'] + + thumbnails = [] + for thumbnail in media_group.findall(_add_ns('media:thumbnail')): + thumbnail_url = thumbnail.get('url') + if not thumbnail_url: + continue + thumbnails.append({ + 'id': thumbnail.get('profile'), + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + timestamp = None + release_date = find_xpath_attr( + item, _add_ns('media:credit'), 'role', 'releaseDate') + if release_date is not None: + timestamp = parse_iso8601(release_date.text) + + entries.append({ + '_type': 'url_transparent', + 'url': content_url, + 'id': guid, + 'title': title, + 'description': xpath_text(item, 'description'), + 'timestamp': timestamp, + 'duration': int_or_none(content.get('duration')), + 'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))), + 'episode': xpath_text(item, _add_ns('clearleap:episode')), + 'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))), + 'series': xpath_text(item, _add_ns('clearleap:series')), + 'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))), + 'thumbnails': thumbnails, + 'ie_key': 'CBCWatchVideo', + }) + + return self.playlist_result( + entries, xpath_text(channel, 'guid'), + xpath_text(channel, 'title'), + xpath_text(channel, 'description')) + + +class CBCWatchVideoIE(CBCWatchBaseIE): + IE_NAME = 'cbc.ca:watch:video' + _VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + + def _real_extract(self, url): + video_id = self._match_id(url) + result = self._call_api(url, video_id) + + m3u8_url = xpath_text(result, 'url', fatal=True) + formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False) + if len(formats) < 2: + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + # Despite metadata in m3u8 all video+audio formats are + # actually video-only (no audio) + for f in formats: + if f.get('acodec') != 'none' and f.get('vcodec') != 'none': + f['acodec'] = 'none' + self._sort_formats(formats) + + info = { + 'id': video_id, + 'title': video_id, + 'formats': formats, + } + + rss = xpath_element(result, 'rss') + if rss: + info.update(self._parse_rss_feed(rss)['entries'][0]) + del info['url'] + del info['_type'] + del info['ie_key'] + return info + + +class CBCWatchIE(CBCWatchBaseIE): + IE_NAME = 'cbc.ca:watch' + _VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)' + _TESTS = [{ + 'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4', + 'info_dict': { + 'id': '38e815a-009e3ab12e4', + 'ext': 'mp4', + 'title': 'Customer (Dis)Service', + 'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87', + 'upload_date': '20160219', + 'timestamp': 1455840000, + }, + 'params': { + # m3u8 download + 'skip_download': True, + 'format': 'bestvideo', + }, + 'skip': 'Geo-restricted to Canada', + }, { + 'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057', + 'info_dict': { + 'id': '1ed4b385-cd84-49cf-95f0-80f004680057', + 'title': 'Arthur', + 'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.', + }, + 'playlist_mincount': 30, + 'skip': 'Geo-restricted to Canada', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + rss = self._call_api('web/browse/' + video_id, video_id) + return self._parse_rss_feed(rss) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 04cd23bdb..a58145e3e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -130,6 +130,8 @@ from .carambatv import ( from .cbc import ( CBCIE, CBCPlayerIE, + CBCWatchVideoIE, + CBCWatchIE, ) from .cbs import CBSIE from .cbslocal import CBSLocalIE From f70e9229e623eb041ad514605ceca484b176b850 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 09:11:23 +0100 Subject: [PATCH 1473/3599] [discoverygo] detect when video needs authentication(closes #10425) --- youtube_dl/extractor/discoverygo.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index cba709935..e86d16d36 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -7,6 +7,7 @@ from ..utils import ( int_or_none, parse_age_limit, unescapeHTML, + ExtractorError, ) @@ -53,7 +54,13 @@ class DiscoveryGoIE(InfoExtractor): title = video['name'] - stream = video['stream'] + stream = video.get('stream') + if not stream: + raise ExtractorError( + 'This video is only available via cable service provider subscription that' + ' is not currently supported. You may want to use --cookies.' + if video.get('authenticated') is True else 'Unable to find stream', + expected=True) STREAM_URL_SUFFIX = 'streamUrl' formats = [] for stream_kind in ('', 'hds'): From 5c13c285660c2811206c5bb29acf43b114ab31e3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 09:55:23 +0100 Subject: [PATCH 1474/3599] raise unexpected error when no stream found --- youtube_dl/extractor/adultswim.py | 11 ++++++----- youtube_dl/extractor/discoverygo.py | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 3f7f8c036..96599048f 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -162,11 +162,12 @@ class AdultSwimIE(InfoExtractor): elif video_info.get('videoPlaybackID'): segment_ids = [video_info['videoPlaybackID']] else: - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported. You may want to use --cookies.' - if video_info.get('auth') is True else 'Unable to find stream or clips', - expected=True) + if video_info.get('auth') is True: + raise ExtractorError( + 'This video is only available via cable service provider subscription that' + ' is not currently supported. You may want to use --cookies.', expected=True) + else: + raise ExtractorError('Unable to find stream or clips') episode_id = video_info['id'] episode_title = video_info['title'] diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index e86d16d36..c4e83b2c3 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -56,11 +56,12 @@ class DiscoveryGoIE(InfoExtractor): stream = video.get('stream') if not stream: - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported. You may want to use --cookies.' - if video.get('authenticated') is True else 'Unable to find stream', - expected=True) + if video.get('authenticated') is True: + raise ExtractorError( + 'This video is only available via cable service provider subscription that' + ' is not currently supported. You may want to use --cookies.', expected=True) + else: + raise ExtractorError('Unable to find stream') STREAM_URL_SUFFIX = 'streamUrl' formats = [] for stream_kind in ('', 'hds'): From d37708fc861b3534c522f2892b5cd2ee716e1035 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 11:53:47 +0100 Subject: [PATCH 1475/3599] [YoutubeDL] check only for None Value in thumbnails sorting --- youtube_dl/YoutubeDL.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c499c1da4..805733fb7 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1256,8 +1256,10 @@ class YoutubeDL(object): info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] if thumbnails: thumbnails.sort(key=lambda t: ( - t.get('preference') or -1, t.get('width') or -1, t.get('height') or -1, - t.get('id') or '', t.get('url'))) + t.get('preference') if t.get('preference') is not None else -1, + t.get('width') if t.get('width') is not None else -1, + t.get('height') if t.get('height') is not None else -1, + t.get('id') if t.get('id') is not None else '', t.get('url'))) for i, t in enumerate(thumbnails): t['url'] = sanitize_url(t['url']) if t.get('width') and t.get('height'): From 6a76b53355947eef2a534d8f2505ed683db8754f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 25 Aug 2016 18:05:01 +0700 Subject: [PATCH 1476/3599] [README.md] Quote URL in streaming to player FAQ entry --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 52e53803e..1aa267752 100644 --- a/README.md +++ b/README.md @@ -820,7 +820,7 @@ Passing cookies to youtube-dl is a good way to workaround login when a particula You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](http://www.videolan.org/) can be achieved with: - youtube-dl -o - http://www.youtube.com/watch?v=BaW_jenozKcj | vlc - + youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc - ### Can you add support for this anime video site, or site which shows current movies for free? From ea01cdbf61c9a689e7914dd2d06371f3ef73b490 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 25 Aug 2016 18:17:45 +0700 Subject: [PATCH 1477/3599] [README.md] Clarify how to export cookies from browser for cookies FAQ entry --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1aa267752..0bb7b791f 100644 --- a/README.md +++ b/README.md @@ -812,7 +812,11 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt ### How do I pass cookies to youtube-dl? -Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. +Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. + +In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/ru/firefox/addon/export-cookies/) (for Firefox). + +Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare). From f26a298247fe19bc8114d6f7a280140dfabee984 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 25 Aug 2016 18:19:41 +0700 Subject: [PATCH 1478/3599] [README.md] Use en-US URL in cookies FAQ entry --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0bb7b791f..04f423c17 100644 --- a/README.md +++ b/README.md @@ -814,7 +814,7 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. -In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/ru/firefox/addon/export-cookies/) (for Firefox). +In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox). Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. From 4c8f9c2577da2f4ba7300d44613599e96cde5c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 25 Aug 2016 18:27:15 +0700 Subject: [PATCH 1479/3599] [README.md] Add comments in sample configuration for clarity --- README.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 04f423c17..e01b71cff 100644 --- a/README.md +++ b/README.md @@ -412,11 +412,19 @@ You can configure youtube-dl by placing any supported command line option to a c For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory: ``` --x ---no-mtime ---proxy 127.0.0.1:3128 --o ~/Movies/%(title)s.%(ext)s # Lines starting with # are comments + +# Always extract audio +-x + +# Do not copy the mtime +--no-mtime + +# Use this proxy +--proxy 127.0.0.1:3128 + +# Save all videos under Movies directory in your home directory +-o ~/Movies/%(title)s.%(ext)s ``` Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. From 5a3efcd27c1262cc7132f7e1a092524b580788ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 25 Aug 2016 18:57:31 +0700 Subject: [PATCH 1480/3599] [README.md] Add FAQ entry for download archive --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index e01b71cff..1b9e2a989 100644 --- a/README.md +++ b/README.md @@ -834,6 +834,18 @@ You will first need to tell youtube-dl to stream media to stdout with `-o -`, an youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc - +### How do I download only new videos from playlist? + +Use the download archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special *download archive file*. Each subsequent run with the same `--download-archive` will download only new videos that are not yet in download archive (if any) and also record them in download archive. Note that only successful downloads are recorded in download archive. + +For example, first run will download complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create download archive `archive.txt`: + + youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" + +Each subsequent run will only download new videos if any: + + youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" + ### Can you add support for this anime video site, or site which shows current movies for free? As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl. From c1f62dd338e0965507ee0976bc88885fdb0fa780 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 25 Aug 2016 14:45:01 +0200 Subject: [PATCH 1481/3599] [README] Clean up grammar in --download-archive paragraph --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1b9e2a989..20241307f 100644 --- a/README.md +++ b/README.md @@ -834,15 +834,15 @@ You will first need to tell youtube-dl to stream media to stdout with `-o -`, an youtube-dl -o - "http://www.youtube.com/watch?v=BaW_jenozKcj" | vlc - -### How do I download only new videos from playlist? +### How do I download only new videos from a playlist? -Use the download archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special *download archive file*. Each subsequent run with the same `--download-archive` will download only new videos that are not yet in download archive (if any) and also record them in download archive. Note that only successful downloads are recorded in download archive. +Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos that and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file. -For example, first run will download complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create download archive `archive.txt`: +For example, at first, youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" -Each subsequent run will only download new videos if any: +will download the complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create a file `archive.txt`. Each subsequent run will only download new videos if any: youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" From dc2c37f3162da534281f5f3758231e4c2cb8d1b2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 25 Aug 2016 20:45:57 +0800 Subject: [PATCH 1482/3599] [spankbang] Fix description and uploader (closes #10339) --- ChangeLog | 1 + youtube_dl/extractor/spankbang.py | 8 +++----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index c3cc8f38f..5fb596e33 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [spankbang] Fix description and uploader (#10339) * [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 50433d0f6..186d22b7d 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -14,7 +14,7 @@ class SpankBangIE(InfoExtractor): 'id': '3vvn', 'ext': 'mp4', 'title': 'fantasy solo', - 'description': 'dillion harper masturbates on a bed', + 'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': 'silly2587', 'age_limit': 18, @@ -44,12 +44,10 @@ class SpankBangIE(InfoExtractor): title = self._html_search_regex( r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title') - description = self._search_regex( - r'class="desc"[^>]*>([^<]+)', - webpage, 'description', default=None) + description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) uploader = self._search_regex( - r'class="user"[^>]*>([^<]+)', + r'class="user"[^>]*><img[^>]+>([^<]+)', webpage, 'uploader', fatal=False) age_limit = self._rta_search(webpage) From b54a2da4333556baa3b34fc595060223181320d1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 25 Aug 2016 22:22:31 +0800 Subject: [PATCH 1483/3599] [crackle] Fix extraction and update _TESTS (closes #10333) --- ChangeLog | 1 + youtube_dl/extractor/crackle.py | 58 ++++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5fb596e33..0789549c0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [crackle] Fix extraction (#10333) * [spankbang] Fix description and uploader (#10339) * [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index 79238cce7..21f94d33c 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -1,5 +1,7 @@ # coding: utf-8 -from __future__ import unicode_literals +from __future__ import unicode_literals, division + +import re from .common import InfoExtractor from ..utils import int_or_none @@ -8,12 +10,22 @@ from ..utils import int_or_none class CrackleIE(InfoExtractor): _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' _TEST = { - 'url': 'http://www.crackle.com/the-art-of-more/2496419', + 'url': 'http://www.crackle.com/comedians-in-cars-getting-coffee/2498934', 'info_dict': { - 'id': '2496419', + 'id': '2498934', 'ext': 'mp4', - 'title': 'Heavy Lies the Head', - 'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca', + 'title': 'Everybody Respects A Bloody Nose', + 'description': 'Jerry is kaffeeklatsching in L.A. with funnyman J.B. Smoove (Saturday Night Live, Real Husbands of Hollywood). They’re headed for brew at 10 Speed Coffee in a 1964 Studebaker Avanti.', + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 906, + 'series': 'Comedians In Cars Getting Coffee', + 'season_number': 8, + 'episode_number': 4, + 'subtitles': { + 'en-US': [{ + 'ext': 'ttml', + }] + }, }, 'params': { # m3u8 download @@ -21,11 +33,6 @@ class CrackleIE(InfoExtractor): } } - # extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx - _SUBTITLE_SERVER = 'http://web-us-az.crackle.com' - _UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b' - _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614' - # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx _MEDIA_FILE_SLOTS = { 'c544.flv': { @@ -48,19 +55,22 @@ class CrackleIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + + config_doc = self._download_xml( + 'http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx?site=16', + video_id, 'Downloading config') + item = self._download_xml( 'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, video_id).find('i') title = item.attrib['t'] - thumbnail = None subtitles = {} formats = self._extract_m3u8_formats( - 'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id), + 'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id), video_id, 'mp4', m3u8_id='hls', fatal=None) path = item.attrib.get('p') if path: - thumbnail = self._THUMBNAIL_TEMPLATE % path http_base_url = 'http://ahttp.crackle.com/' + path for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): formats.append({ @@ -76,20 +86,36 @@ class CrackleIE(InfoExtractor): if locale not in subtitles: subtitles[locale] = [] subtitles[locale] = [{ - 'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v), + 'url': '%s/%s%s_%s.xml' % (config_doc.attrib['strSubtitleServer'], path, locale, v), 'ext': 'ttml', }] self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) + media_details = self._download_json( + 'https://web-api-us.crackle.com/Service.svc/details/media/%s/TW?format=json' % video_id, + video_id, fatal=False) + thumbnails = [] + if media_details: + for key, value in media_details.items(): + mobj = re.match('^Thumbnail_(\d+)x(\d+)$', key) + if mobj: + width, height = list(map(int, mobj.groups())) + thumbnails.append({ + 'id': '%dp' % height, + 'url': value, + 'width': width, + 'height': height, + }) + return { 'id': video_id, 'title': title, 'description': item.attrib.get('d'), - 'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None, + 'duration': int(item.attrib.get('r'), 16) / 1000 if item.attrib.get('r') else None, 'series': item.attrib.get('sn'), 'season_number': int_or_none(item.attrib.get('se')), 'episode_number': int_or_none(item.attrib.get('ep')), - 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'subtitles': subtitles, 'formats': formats, } From 20bad91d765284e06f8a8c600a122857d23efeea Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 25 Aug 2016 22:38:06 +0800 Subject: [PATCH 1484/3599] [downloader/external] Clarify that ffmpeg doesn't support SOCKS Ref: #10304 --- youtube_dl/downloader/external.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index cf4556221..17f12e970 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -220,6 +220,11 @@ class FFmpegFD(ExternalFD): if proxy: if not re.match(r'^[\da-zA-Z]+://', proxy): proxy = 'http://%s' % proxy + + if proxy.startswith('socks'): + self.report_warning( + '%s does not support SOCKS proxies. Downloading may fail.' % self.get_basename()) + # Since December 2015 ffmpeg supports -http_proxy option (see # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) # We could switch to the following code if we are able to detect version properly From a0f071a50dc611a66a5fc8ceceb0b455a88f1cb0 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 25 Aug 2016 19:40:56 +0100 Subject: [PATCH 1485/3599] [usanetwork] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/usanetwork.py | 76 ++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 youtube_dl/extractor/usanetwork.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a58145e3e..74d916e64 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -947,6 +947,7 @@ from .uplynk import ( ) from .urort import UrortIE from .urplay import URPlayIE +from .usanetwork import USANetworkIE from .usatoday import USATodayIE from .ustream import UstreamIE, UstreamChannelIE from .ustudio import ( diff --git a/youtube_dl/extractor/usanetwork.py b/youtube_dl/extractor/usanetwork.py new file mode 100644 index 000000000..823340776 --- /dev/null +++ b/youtube_dl/extractor/usanetwork.py @@ -0,0 +1,76 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .adobepass import AdobePassIE +from ..utils import ( + extract_attributes, + smuggle_url, + update_url_query, +) + + +class USANetworkIE(AdobePassIE): + _VALID_URL = r'https?://(?:www\.)?usanetwork\.com/(?:[^/]+/videos|movies)/(?P<id>[^/?#]+)' + _TEST = { + 'url': 'http://www.usanetwork.com/mrrobot/videos/hpe-cybersecurity', + 'md5': '33c0d2ba381571b414024440d08d57fd', + 'info_dict': { + 'id': '3086229', + 'ext': 'mp4', + 'title': 'HPE Cybersecurity', + 'description': 'The more we digitize our world, the more vulnerable we are.', + 'upload_date': '20160818', + 'timestamp': 1471535460, + 'uploader': 'NBCU-USA', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + player_params = extract_attributes(self._search_regex( + r'(<div[^>]+data-usa-tve-player-container[^>]*>)', webpage, 'player params')) + video_id = player_params['data-mpx-guid'] + title = player_params['data-episode-title'] + + account_pid, path = re.search( + r'data-src="(?:https?)?//player\.theplatform\.com/p/([^/]+)/.*?/(media/guid/\d+/\d+)', + webpage).groups() + + query = { + 'mbr': 'true', + } + if player_params.get('data-is-full-episode') == '1': + query['manifest'] = 'm3u' + + if player_params.get('data-entitlement') == 'auth': + adobe_pass = {} + drupal_settings = self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings', fatal=False) + if drupal_settings: + drupal_settings = self._parse_json(drupal_settings, video_id, fatal=False) + if drupal_settings: + adobe_pass = drupal_settings.get('adobePass', {}) + resource = self._get_mvpd_resource( + adobe_pass.get('adobePassResourceId', 'usa'), + title, video_id, player_params.get('data-episode-rating', 'TV-14')) + query['auth'] = self._extract_mvpd_auth( + url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource) + + info = self._search_json_ld(webpage, video_id, default={}) + info.update({ + '_type': 'url_transparent', + 'url': smuggle_url(update_url_query( + 'http://link.theplatform.com/s/%s/%s' % (account_pid, path), + query), {'force_smil_url': True}), + 'id': video_id, + 'title': title, + 'series': player_params.get('data-show-title'), + 'episode': title, + 'ie_key': 'ThePlatform', + }) + return info From e3faecde30d85f54c1a341350cba609d3f5b6691 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 26 Aug 2016 03:43:13 +0700 Subject: [PATCH 1486/3599] [trutube] Remove extractor (Closes #10438) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/trutube.py | 26 -------------------------- 2 files changed, 27 deletions(-) delete mode 100644 youtube_dl/extractor/trutube.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 74d916e64..717ba9375 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -873,7 +873,6 @@ from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE from .trollvids import TrollvidsIE -from .trutube import TruTubeIE from .tube8 import Tube8IE from .tubitv import TubiTvIE from .tudou import ( diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py deleted file mode 100644 index d55e0c563..000000000 --- a/youtube_dl/extractor/trutube.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import unicode_literals - -from .nuevo import NuevoBaseIE - - -class TruTubeIE(NuevoBaseIE): - _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', - 'md5': 'c5b6e301b0a2040b074746cbeaa26ca1', - 'info_dict': { - 'id': '14880', - 'ext': 'flv', - 'title': 'Ramses II - Proven To Be A Red Headed Caucasoid', - 'thumbnail': 're:^http:.*\.jpg$', - } - }, { - 'url': 'https://trutube.tv/nuevo/player/embed.php?v=14880', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - return self._extract_nuevo( - 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id, - video_id) From 298a120ab76008c900e30de50dc738dd63e79fb4 Mon Sep 17 00:00:00 2001 From: Aleksander Nitecki <ixendr@itogi.re> Date: Thu, 25 Aug 2016 20:21:06 +0200 Subject: [PATCH 1487/3599] [nhk] Add extractor for VoD. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nhk.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 youtube_dl/extractor/nhk.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 717ba9375..8d88d6cb4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -541,6 +541,7 @@ from .nextmedia import ( ) from .nfb import NFBIE from .nfl import NFLIE +from .nhk import NhkVodIE from .nhl import ( NHLVideocenterIE, NHLNewsIE, diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py new file mode 100644 index 000000000..90e935351 --- /dev/null +++ b/youtube_dl/extractor/nhk.py @@ -0,0 +1,29 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class NhkVodIE(InfoExtractor): + _VALID_URL = r'http://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>.+)\.html' + _TESTS = [{ + 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815.html', + 'info_dict': { + 'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5', + 'ext': 'flv', + 'title': '[nhkworld]VOD;2009-251-2016;TOKYO FASHION EXPRESS;The Kimono as Global Fashion;en', + }, + 'params': { + 'skip_download': True # Videos available only for a limited period of time. + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + embed_code = self._search_regex( + r'''nw_vod_ooplayer\('movie-area', '([^']+)'\);''', + webpage, + 'ooyala embed code') + + return self.url_result('ooyala:' + embed_code, 'Ooyala') From f9b373afda2a936c4f8303671f3160c532ccae67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 26 Aug 2016 04:48:40 +0700 Subject: [PATCH 1488/3599] [nhk:vod] Improve extraction (Closes #10424) --- youtube_dl/extractor/nhk.py | 43 +++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 90e935351..691bdfa4e 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -4,26 +4,47 @@ from .common import InfoExtractor class NhkVodIE(InfoExtractor): - _VALID_URL = r'http://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>.+)\.html' - _TESTS = [{ + _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>.+?)\.html' + _TEST = { + # Videos available only for a limited period of time. Visit + # http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples. 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815.html', 'info_dict': { 'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5', 'ext': 'flv', - 'title': '[nhkworld]VOD;2009-251-2016;TOKYO FASHION EXPRESS;The Kimono as Global Fashion;en', + 'title': 'TOKYO FASHION EXPRESS - The Kimono as Global Fashion', + 'description': 'md5:db338ee6ce8204f415b754782f819824', + 'series': 'TOKYO FASHION EXPRESS', + 'episode': 'The Kimono as Global Fashion', }, - 'params': { - 'skip_download': True # Videos available only for a limited period of time. - }, - }] + 'skip': 'Videos available only for a limited period of time', + } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) embed_code = self._search_regex( - r'''nw_vod_ooplayer\('movie-area', '([^']+)'\);''', - webpage, - 'ooyala embed code') + r'nw_vod_ooplayer\([^,]+,\s*(["\'])(?P<id>(?:(?!\1).)+)\1', + webpage, 'ooyala embed code', group='id') - return self.url_result('ooyala:' + embed_code, 'Ooyala') + title = self._search_regex( + r'<div[^>]+class=["\']episode-detail["\']>\s*<h\d+>([^<]+)', + webpage, 'title', default=None) + description = self._html_search_regex( + r'(?s)<p[^>]+class=["\']description["\'][^>]*>(.+?)</p>', + webpage, 'description', default=None) + series = self._search_regex( + r'<h2[^>]+class=["\']detail-top-player-title[^>]+><a[^>]+>([^<]+)', + webpage, 'series', default=None) + + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % embed_code, + 'title': '%s - %s' % (series, title) if series and title else title, + 'description': description, + 'series': series, + 'episode': title, + } From c9de980106990485fd9bff9a86d463349fe1d384 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 26 Aug 2016 04:49:52 +0700 Subject: [PATCH 1489/3599] Credit @Xender for nhk:vod (#10424) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 1fd4be785..b9a602c12 100644 --- a/AUTHORS +++ b/AUTHORS @@ -181,3 +181,4 @@ Nehal Patel Rob van Bekkum Petr Zvoníček Pratyush Singh +Aleksander Nitecki From 6b18a24e6ee39ab2fdb5e3d9e1cf2eec547ca3f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 26 Aug 2016 05:57:52 +0700 Subject: [PATCH 1490/3599] [tnaflix] Fix extraction (Closes #10434) --- youtube_dl/extractor/tnaflix.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 7ddf77767..77d56b8ca 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -10,6 +10,7 @@ from ..utils import ( int_or_none, parse_duration, str_to_int, + unescapeHTML, xpath_text, ) @@ -80,7 +81,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor): if not cfg_url: inputs = self._hidden_inputs(webpage) - cfg_url = 'https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s' % (inputs['vkey'], inputs['nkey']) + cfg_url = ('https://cdn-fck.tnaflix.com/tnaflix/%s.fid?key=%s&VID=%s&premium=1&vip=1&alpha' + % (inputs['vkey'], inputs['nkey'], video_id)) cfg_xml = self._download_xml( cfg_url, display_id, 'Downloading metadata', @@ -89,7 +91,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor): formats = [] def extract_video_url(vl): - return re.sub('speed=\d+', 'speed=', vl.text) + return re.sub('speed=\d+', 'speed=', unescapeHTML(vl.text)) video_link = cfg_xml.find('./videoLink') if video_link is not None: @@ -201,7 +203,7 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): _TESTS = [{ # anonymous uploader, no categories 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878', - 'md5': '7e569419fe6d69543d01e6be22f5f7c4', + 'md5': 'ecf3498417d09216374fc5907f9c6ec0', 'info_dict': { 'id': '553878', 'display_id': 'Carmella-Decesare-striptease', @@ -215,11 +217,11 @@ class TNAFlixIE(TNAFlixNetworkBaseIE): }, { # non-anonymous uploader, categories 'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538', - 'md5': 'fcba2636572895aba116171a899a5658', + 'md5': '0f5d4d490dbfd117b8607054248a07c0', 'info_dict': { 'id': '6538', 'display_id': 'Educational-xxx-video', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Educational xxx video', 'description': 'md5:b4fab8f88a8621c8fabd361a173fe5b8', 'thumbnail': 're:https?://.*\.jpg$', From b281aad2dc658e3c6535579d75b42a5634487b83 Mon Sep 17 00:00:00 2001 From: steven7851 <steven7851@msn.com> Date: Fri, 26 Aug 2016 07:32:54 +0800 Subject: [PATCH 1491/3599] [douyutv] Use new api use lapi for flv info, and html5 api for room info #10153 #10318 --- youtube_dl/extractor/douyutv.py | 87 ++++++++++++++++----------------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index ce6962755..33efc993e 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -3,9 +3,10 @@ from __future__ import unicode_literals import hashlib import time +import uuid from .common import InfoExtractor from ..utils import (ExtractorError, unescapeHTML) -from ..compat import (compat_str, compat_basestring) +from ..compat import (compat_str, compat_basestring, compat_urllib_parse_urlencode) class DouyuTVIE(InfoExtractor): @@ -21,7 +22,6 @@ class DouyuTVIE(InfoExtractor): 'description': 're:.*m7show@163\.com.*', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': '7师傅', - 'uploader_id': '431925', 'is_live': True, }, 'params': { @@ -37,7 +37,6 @@ class DouyuTVIE(InfoExtractor): 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': 'douyu小漠', - 'uploader_id': '3769985', 'is_live': True, }, 'params': { @@ -54,7 +53,6 @@ class DouyuTVIE(InfoExtractor): 'description': 're:.*m7show@163\.com.*', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': '7师傅', - 'uploader_id': '431925', 'is_live': True, }, 'params': { @@ -75,19 +73,39 @@ class DouyuTVIE(InfoExtractor): room_id = self._html_search_regex( r'"room_id"\s*:\s*(\d+),', page, 'room id') - config = None + room_url = 'http://m.douyu.com/html5/live?roomId=%s' % room_id + room_content = self._download_webpage(room_url, video_id) + room_json = self._parse_json(room_content, video_id, fatal=False) + + room = room_json['data'] + + show_status = room.get('show_status') + # 1 = live, 2 = offline + if show_status == '2': + raise ExtractorError( + 'Live stream is offline', expected=True) + + flv_json = None # Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache" # Retry with different parameters - same parameters cause same errors for i in range(5): - prefix = 'room/%s?aid=android&client_sys=android&time=%d' % ( - room_id, int(time.time())) - auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest() + tt = int(time.time() / 60) + did = uuid.uuid4().hex.upper() - config_page = self._download_webpage( - 'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth), - video_id) + # Decompile core.swf in webpage by ffdec "Search SWFs in memory" + # core.swf is encrypted originally, but ffdec can dump memory to get the decrypted one + # If API changes in the future, just use this way to update + sign_content = '{room_id}{did}A12Svb&%1UUmf@hC{tt}'.format(room_id = room_id, did = did, tt = tt) + sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest() + + payload = {'cdn': 'ws', 'rate': '0', 'tt': tt, 'did': did, 'sign': sign} + flv_data = compat_urllib_parse_urlencode(payload) + + flv_request_url = 'http://www.douyu.com/lapi/live/getPlay/%s' % room_id + flv_content = self._download_webpage(flv_request_url, video_id, data=flv_data, + headers={'Content-Type': 'application/x-www-form-urlencoded'}) try: - config = self._parse_json(config_page, video_id, fatal=False) + flv_json = self._parse_json(flv_content, video_id, fatal=False) except ExtractorError: # Wait some time before retrying to get a different time() value self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. ' @@ -95,54 +113,35 @@ class DouyuTVIE(InfoExtractor): continue else: break - if config is None: + if flv_json is None: raise ExtractorError('Unable to fetch API result') - data = config['data'] + flv = flv_json['data'] - error_code = config.get('error', 0) + error_code = flv_json.get('error', 0) if error_code is not 0: error_desc = 'Server reported error %i' % error_code - if isinstance(data, (compat_str, compat_basestring)): - error_desc += ': ' + data + if isinstance(flv, (compat_str, compat_basestring)): + error_desc += ': ' + flv raise ExtractorError(error_desc, expected=True) - show_status = data.get('show_status') - # 1 = live, 2 = offline - if show_status == '2': - raise ExtractorError( - 'Live stream is offline', expected=True) + base_url = flv['rtmp_url'] + live_path = flv['rtmp_live'] - base_url = data['rtmp_url'] - live_path = data['rtmp_live'] + video_url = '%s/%s' % (base_url, live_path) - title = self._live_title(unescapeHTML(data['room_name'])) - description = data.get('show_details') - thumbnail = data.get('room_src') - - uploader = data.get('nickname') - uploader_id = data.get('owner_uid') - - multi_formats = data.get('rtmp_multi_bitrate') - if not isinstance(multi_formats, dict): - multi_formats = {} - multi_formats['live'] = live_path - - formats = [{ - 'url': '%s/%s' % (base_url, format_path), - 'format_id': format_id, - 'preference': 1 if format_id == 'live' else 0, - } for format_id, format_path in multi_formats.items()] - self._sort_formats(formats) + title = self._live_title(unescapeHTML(room['room_name'])) + description = room.get('notice') + thumbnail = room.get('room_src') + uploader = room.get('nickname') return { 'id': room_id, 'display_id': video_id, + 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, - 'uploader_id': uploader_id, - 'formats': formats, 'is_live': True, } From 906b87cf5f6ccf28ebd75d6a92367d7c238f2ad9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 26 Aug 2016 19:58:17 +0800 Subject: [PATCH 1492/3599] [crackle] Revert to template-based thumbnail extraction To reduce to number of HTTP requests --- youtube_dl/extractor/crackle.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py index 21f94d33c..cc68f1c00 100644 --- a/youtube_dl/extractor/crackle.py +++ b/youtube_dl/extractor/crackle.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals, division -import re - from .common import InfoExtractor from ..utils import int_or_none @@ -34,6 +32,7 @@ class CrackleIE(InfoExtractor): } # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx + _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614' _MEDIA_FILE_SLOTS = { 'c544.flv': { 'width': 544, @@ -69,8 +68,10 @@ class CrackleIE(InfoExtractor): formats = self._extract_m3u8_formats( 'http://content.uplynk.com/ext/%s/%s.m3u8' % (config_doc.attrib['strUplynkOwnerId'], video_id), video_id, 'mp4', m3u8_id='hls', fatal=None) + thumbnail = None path = item.attrib.get('p') if path: + thumbnail = self._THUMBNAIL_TEMPLATE % path http_base_url = 'http://ahttp.crackle.com/' + path for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): formats.append({ @@ -91,22 +92,6 @@ class CrackleIE(InfoExtractor): }] self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) - media_details = self._download_json( - 'https://web-api-us.crackle.com/Service.svc/details/media/%s/TW?format=json' % video_id, - video_id, fatal=False) - thumbnails = [] - if media_details: - for key, value in media_details.items(): - mobj = re.match('^Thumbnail_(\d+)x(\d+)$', key) - if mobj: - width, height = list(map(int, mobj.groups())) - thumbnails.append({ - 'id': '%dp' % height, - 'url': value, - 'width': width, - 'height': height, - }) - return { 'id': video_id, 'title': title, @@ -115,7 +100,7 @@ class CrackleIE(InfoExtractor): 'series': item.attrib.get('sn'), 'season_number': int_or_none(item.attrib.get('se')), 'episode_number': int_or_none(item.attrib.get('ep')), - 'thumbnails': thumbnails, + 'thumbnail': thumbnail, 'subtitles': subtitles, 'formats': formats, } From 3b4b82d4cec702fc06e2d6b38a44dd0c7bd77a5b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 27 Aug 2016 01:16:39 +0800 Subject: [PATCH 1493/3599] [douyutv] Simplify --- youtube_dl/extractor/douyutv.py | 86 +++++++++++++++------------------ 1 file changed, 39 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index 33efc993e..e366e17e6 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -4,9 +4,16 @@ from __future__ import unicode_literals import hashlib import time import uuid + from .common import InfoExtractor -from ..utils import (ExtractorError, unescapeHTML) -from ..compat import (compat_str, compat_basestring, compat_urllib_parse_urlencode) +from ..compat import ( + compat_str, + compat_urllib_parse_urlencode, +) +from ..utils import ( + ExtractorError, + unescapeHTML, +) class DouyuTVIE(InfoExtractor): @@ -63,6 +70,10 @@ class DouyuTVIE(InfoExtractor): 'only_matching': True, }] + # Decompile core.swf in webpage by ffdec "Search SWFs in memory". core.swf + # is encrypted originally, but ffdec can dump memory to get the decrypted one. + _API_KEY = 'A12Svb&%1UUmf@hC' + def _real_extract(self, url): video_id = self._match_id(url) @@ -73,60 +84,41 @@ class DouyuTVIE(InfoExtractor): room_id = self._html_search_regex( r'"room_id"\s*:\s*(\d+),', page, 'room id') - room_url = 'http://m.douyu.com/html5/live?roomId=%s' % room_id - room_content = self._download_webpage(room_url, video_id) - room_json = self._parse_json(room_content, video_id, fatal=False) + room = self._download_json( + 'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id, + note='Downloading room info')['data'] - room = room_json['data'] - - show_status = room.get('show_status') # 1 = live, 2 = offline - if show_status == '2': - raise ExtractorError( - 'Live stream is offline', expected=True) + if room.get('show_status') == '2': + raise ExtractorError('Live stream is offline', expected=True) - flv_json = None - # Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache" - # Retry with different parameters - same parameters cause same errors - for i in range(5): - tt = int(time.time() / 60) - did = uuid.uuid4().hex.upper() + tt = compat_str(int(time.time() / 60)) + did = uuid.uuid4().hex.upper() - # Decompile core.swf in webpage by ffdec "Search SWFs in memory" - # core.swf is encrypted originally, but ffdec can dump memory to get the decrypted one - # If API changes in the future, just use this way to update - sign_content = '{room_id}{did}A12Svb&%1UUmf@hC{tt}'.format(room_id = room_id, did = did, tt = tt) - sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest() + sign_content = ''.join((room_id, did, self._API_KEY, tt)) + sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest() - payload = {'cdn': 'ws', 'rate': '0', 'tt': tt, 'did': did, 'sign': sign} - flv_data = compat_urllib_parse_urlencode(payload) + flv_data = compat_urllib_parse_urlencode({ + 'cdn': 'ws', + 'rate': '0', + 'tt': tt, + 'did': did, + 'sign': sign, + }) - flv_request_url = 'http://www.douyu.com/lapi/live/getPlay/%s' % room_id - flv_content = self._download_webpage(flv_request_url, video_id, data=flv_data, - headers={'Content-Type': 'application/x-www-form-urlencoded'}) - try: - flv_json = self._parse_json(flv_content, video_id, fatal=False) - except ExtractorError: - # Wait some time before retrying to get a different time() value - self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. ' - 'Waiting for %(timeout)s seconds before retrying') - continue - else: - break - if flv_json is None: - raise ExtractorError('Unable to fetch API result') + video_info = self._download_json( + 'http://www.douyu.com/lapi/live/getPlay/%s' % room_id, video_id, + data=flv_data, note='Downloading video info', + headers={'Content-Type': 'application/x-www-form-urlencoded'}) - flv = flv_json['data'] - - error_code = flv_json.get('error', 0) + error_code = video_info.get('error', 0) if error_code is not 0: - error_desc = 'Server reported error %i' % error_code - if isinstance(flv, (compat_str, compat_basestring)): - error_desc += ': ' + flv - raise ExtractorError(error_desc, expected=True) + raise ExtractorError( + '%s reported error %i' % (self.IE_NAME, error_code), + expected=True) - base_url = flv['rtmp_url'] - live_path = flv['rtmp_live'] + base_url = video_info['data']['rtmp_url'] + live_path = video_info['data']['rtmp_live'] video_url = '%s/%s' % (base_url, live_path) From 92c27a0dbf19eff211e7ffdd8db5895387e75529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Aug 2016 02:35:49 +0700 Subject: [PATCH 1494/3599] [periscope:user] Fix extraction (Closes #10453) --- youtube_dl/extractor/periscope.py | 47 ++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 75f5884a9..6c640089d 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -8,7 +8,14 @@ from ..utils import ( ) -class PeriscopeIE(InfoExtractor): +class PeriscopeBaseIE(InfoExtractor): + def _call_api(self, method, query, item_id): + return self._download_json( + 'https://api.periscope.tv/api/v2/%s' % method, + item_id, query=query) + + +class PeriscopeIE(PeriscopeBaseIE): IE_DESC = 'Periscope' IE_NAME = 'periscope' _VALID_URL = r'https?://(?:www\.)?periscope\.tv/[^/]+/(?P<id>[^/?#]+)' @@ -34,14 +41,11 @@ class PeriscopeIE(InfoExtractor): 'only_matching': True, }] - def _call_api(self, method, value): - return self._download_json( - 'https://api.periscope.tv/api/v2/%s?broadcast_id=%s' % (method, value), value) - def _real_extract(self, url): token = self._match_id(url) - broadcast_data = self._call_api('getBroadcastPublic', token) + broadcast_data = self._call_api( + 'getBroadcastPublic', {'broadcast_id': token}, token) broadcast = broadcast_data['broadcast'] status = broadcast['status'] @@ -61,7 +65,8 @@ class PeriscopeIE(InfoExtractor): 'url': broadcast[image], } for image in ('image_url', 'image_url_small') if broadcast.get(image)] - stream = self._call_api('getAccessPublic', token) + stream = self._call_api( + 'getAccessPublic', {'broadcast_id': token}, token) formats = [] for format_id in ('replay', 'rtmp', 'hls', 'https_hls'): @@ -88,7 +93,7 @@ class PeriscopeIE(InfoExtractor): } -class PeriscopeUserIE(InfoExtractor): +class PeriscopeUserIE(PeriscopeBaseIE): _VALID_URL = r'https?://www\.periscope\.tv/(?P<id>[^/]+)/?$' IE_DESC = 'Periscope user videos' IE_NAME = 'periscope:user' @@ -106,26 +111,34 @@ class PeriscopeUserIE(InfoExtractor): } def _real_extract(self, url): - user_id = self._match_id(url) + user_name = self._match_id(url) - webpage = self._download_webpage(url, user_id) + webpage = self._download_webpage(url, user_name) data_store = self._parse_json( unescapeHTML(self._search_regex( r'data-store=(["\'])(?P<data>.+?)\1', webpage, 'data store', default='{}', group='data')), - user_id) + user_name) - user = data_store.get('User', {}).get('user', {}) - title = user.get('display_name') or user.get('username') + user = list(data_store['UserCache']['users'].values())[0]['user'] + user_id = user['id'] + session_id = data_store['SessionToken']['broadcastHistory']['token']['session_id'] + + broadcasts = self._call_api( + 'getUserBroadcastsPublic', + {'user_id': user_id, 'session_id': session_id}, + user_name)['broadcasts'] + + broadcast_ids = [ + broadcast['id'] for broadcast in broadcasts if broadcast.get('id')] + + title = user.get('display_name') or user.get('username') or user_name description = user.get('description') - broadcast_ids = (data_store.get('UserBroadcastHistory', {}).get('broadcastIds') or - data_store.get('BroadcastCache', {}).get('broadcastIds', [])) - entries = [ self.url_result( - 'https://www.periscope.tv/%s/%s' % (user_id, broadcast_id)) + 'https://www.periscope.tv/%s/%s' % (user_name, broadcast_id)) for broadcast_id in broadcast_ids] return self.playlist_result(entries, user_id, title, description) From d7aae610f6674d96971246f916973158374f88b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Aug 2016 07:00:15 +0700 Subject: [PATCH 1495/3599] [ChangeLog] Actualize --- ChangeLog | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 0789549c0..4062c2021 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,23 @@ version <unreleased> +Core ++ Add warning message that ffmpeg doesn't support SOCKS +* Improve thumbnail sorting ++ Extract formats from #EXT-X-MEDIA tags in _extract_m3u8_formats +* Fill IV with leading zeros for IVs shorter than 16 octets in hlsnative ++ Add ac-3 to the list of audio codecs in parse_codecs + Extractors +* [periscope:user] Fix extraction (#10453) +* [douyutv] Fix extraction (#10153, #10318, #10444) ++ [nhk:vod] Add extractor for www3.nhk.or.jp on demand (#4437, #10424) +- [trutube] Remove extractor (#10438) ++ [usanetwork] Add extractor for usanetwork.com * [crackle] Fix extraction (#10333) -* [spankbang] Fix description and uploader (#10339) +* [spankbang] Fix description and uploader extraction (#10339) +* [discoverygo] Detect cable provider restricted videos (#10425) ++ [cbc] Add support for watch.cbc.ca +* [kickstarter] Silent the warning for og:description (#10415) * [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363) From 71e90766b5f7d57bdbe20b71c32ce5a8f66aecc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Aug 2016 07:09:03 +0700 Subject: [PATCH 1496/3599] [README.md] Fix typo in download archive FAQ entry --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 20241307f..87465aa5e 100644 --- a/README.md +++ b/README.md @@ -836,7 +836,7 @@ You will first need to tell youtube-dl to stream media to stdout with `-o -`, an ### How do I download only new videos from a playlist? -Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos that and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file. +Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file. For example, at first, From 1198fe14a1eff1047652c51163266246577e3682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 28 Aug 2016 07:24:08 +0700 Subject: [PATCH 1497/3599] release 2016.08.28 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 11 +++++++---- youtube_dl/version.py | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 15acc025a..a2fe59f80 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.24.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.24.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.28** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.24.1 +[debug] youtube-dl version 2016.08.28 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 4062c2021..d3496b5dc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.08.28 Core + Add warning message that ffmpeg doesn't support SOCKS diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 08db56fa9..bf08697be 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -89,7 +89,7 @@ - **Bet** - **Bigflix** - **Bild**: Bild.de - - **BiliBili** + - **BiliBili** (Currently broken) - **BioBioChileTV** - **BIQLE** - **BleacherReport** @@ -115,8 +115,10 @@ - **Canvas** - **CarambaTV** - **CarambaTVPage** - - **CBC** - - **CBCPlayer** + - **cbc.ca** + - **cbc.ca:player** + - **cbc.ca:watch** + - **cbc.ca:watch:video** - **CBS** - **CBSInteractive** - **CBSLocal** @@ -448,6 +450,7 @@ - **NextMediaActionNews**: 蘋果日報 - 動新聞 - **nfb**: National Film Board of Canada - **nfl.com** + - **NhkVod** - **nhl.com** - **nhl.com:news**: NHL news - **nhl.com:videocenter** @@ -713,7 +716,6 @@ - **TrailerAddict** (Currently broken) - **Trilulilu** - **trollvids** - - **TruTube** - **Tube8** - **TubiTv** - **tudou** @@ -758,6 +760,7 @@ - **uplynk:preplay** - **Urort**: NRK P3 Urørt - **URPlay** + - **USANetwork** - **USAToday** - **ustream** - **ustream:channel** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7447d3d7e..ee30ca2ad 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.24.1' +__version__ = '2016.08.28' From 39efc6e3e048a8323c36efcdf6b7434259a35e44 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 28 Aug 2016 15:46:11 +0800 Subject: [PATCH 1498/3599] [generic] Update some _TESTS --- youtube_dl/extractor/generic.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 506892b11..c6e655c84 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -104,7 +104,8 @@ class GenericIE(InfoExtractor): }, 'expected_warnings': [ 'URL could be a direct video link, returning it as such.' - ] + ], + 'skip': 'URL invalid', }, # Direct download with broken HEAD { @@ -268,7 +269,8 @@ class GenericIE(InfoExtractor): 'params': { # m3u8 downloads 'skip_download': True, - } + }, + 'skip': 'video gone', }, # m3u8 served with Content-Type: text/plain { @@ -283,7 +285,8 @@ class GenericIE(InfoExtractor): 'params': { # m3u8 downloads 'skip_download': True, - } + }, + 'skip': 'video gone', }, # google redirect { @@ -368,6 +371,7 @@ class GenericIE(InfoExtractor): 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', }, 'add_ie': ['BrightcoveLegacy'], + 'skip': 'video gone', }, { 'url': 'http://www.championat.com/video/football/v/87/87499.html', @@ -421,6 +425,7 @@ class GenericIE(InfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'movie expired', }, # embed.ly video { @@ -448,6 +453,8 @@ class GenericIE(InfoExtractor): 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama', 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', }, + # HEAD requests lead to endless 301, while GET is OK + 'expected_warnings': ['301'], }, # RUTV embed { @@ -522,6 +529,9 @@ class GenericIE(InfoExtractor): 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )', }, 'playlist_mincount': 7, + # This forum does not allow <iframe> syntaxes anymore + # Now HTML tags are displayed as-is + 'skip': 'No videos on this page', }, # Embedded TED video { @@ -570,7 +580,8 @@ class GenericIE(InfoExtractor): }, 'params': { 'skip_download': 'Requires rtmpdump' - } + }, + 'skip': 'video gone', }, # francetv embed { From 40eec6b15cd3135b24cb42fde5ccf62e9a1f0807 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 28 Aug 2016 20:27:08 +0800 Subject: [PATCH 1499/3599] [openload] Fix extraction (closes #10408) Thanks to @yokrysty again! --- ChangeLog | 6 ++++++ youtube_dl/extractor/openload.py | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index d3496b5dc..5d7a052a5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [openload] Fix extraction (#10408) + + version 2016.08.28 Core diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index e181d0b3a..c8dde7ae3 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -55,10 +55,12 @@ class OpenloadIE(InfoExtractor): video_url_chars = [] - for c in enc_data: + for idx, c in enumerate(enc_data): j = compat_ord(c) if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 + if idx == len(enc_data) - 1: + j += 2 video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From 04b32c8f9679004d11ee97c2b7beecaedf1b477b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 28 Aug 2016 22:06:31 +0800 Subject: [PATCH 1500/3599] [bilibili] Fix extraction (closes #10375) Thanks @gdkchan for the algorithm --- ChangeLog | 1 + youtube_dl/extractor/bilibili.py | 98 ++++++++++++-------------------- 2 files changed, 36 insertions(+), 63 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5d7a052a5..e055976c5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [bilibili] Fix extraction (#10375) * [openload] Fix extraction (#10408) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index d87c38a02..a332fbb69 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -1,35 +1,26 @@ # coding: utf-8 from __future__ import unicode_literals -import calendar -import datetime +import hashlib import re from .common import InfoExtractor -from ..compat import ( - compat_etree_fromstring, - compat_str, - compat_parse_qs, - compat_xml_parse_error, -) +from ..compat import compat_parse_qs from ..utils import ( - ExtractorError, int_or_none, float_or_none, - xpath_text, + unified_timestamp, ) class BiliBiliIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', 'info_dict': { - 'id': '1554319', + 'id': '1074402', 'ext': 'mp4', 'title': '【金坷垃】金泡沫', 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', @@ -43,24 +34,28 @@ class BiliBiliIE(InfoExtractor): }, { 'url': 'http://www.bilibili.com/video/av1041170/', 'info_dict': { - 'id': '1507019', + 'id': '1041170', 'ext': 'mp4', 'title': '【BD1080P】刀语【诸神&异域】', 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', + 'duration': 3382.259, 'timestamp': 1396530060, 'upload_date': '20140403', + 'thumbnail': 're:^https?://.+\.jpg', 'uploader': '枫叶逝去', 'uploader_id': '520116', }, }, { 'url': 'http://www.bilibili.com/video/av4808130/', 'info_dict': { - 'id': '7802182', + 'id': '4808130', 'ext': 'mp4', 'title': '【长篇】哆啦A梦443【钉铛】', 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', + 'duration': 1493.995, 'timestamp': 1464564180, 'upload_date': '20160529', + 'thumbnail': 're:^https?://.+\.jpg', 'uploader': '喜欢拉面', 'uploader_id': '151066', }, @@ -68,12 +63,14 @@ class BiliBiliIE(InfoExtractor): # Missing upload time 'url': 'http://www.bilibili.com/video/av1867637/', 'info_dict': { - 'id': '2880301', + 'id': '1867637', 'ext': 'mp4', 'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】', 'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】', + 'duration': 5760.0, 'uploader': '黑夜为猫', 'uploader_id': '610729', + 'thumbnail': 're:^https?://.+\.jpg', }, 'params': { # Just to test metadata extraction @@ -82,86 +79,61 @@ class BiliBiliIE(InfoExtractor): 'expected_warnings': ['upload time'], }] - # BiliBili blocks keys from time to time. The current key is extracted from - # the Android client - # TODO: find the sign algorithm used in the flash player - _APP_KEY = '86385cdc024c0f6c' + _APP_KEY = '6f90a59ac58a4123' + _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326' def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - params = compat_parse_qs(self._search_regex( + cid = compat_parse_qs(self._search_regex( [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], - webpage, 'player parameters')) - cid = params['cid'][0] + webpage, 'player parameters'))['cid'][0] - info_xml_str = self._download_webpage( - 'http://interface.bilibili.com/v_cdn_play', - cid, query={'appkey': self._APP_KEY, 'cid': cid}, - note='Downloading video info page') + payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid) + sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() - err_msg = None - durls = None - info_xml = None - try: - info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8')) - except compat_xml_parse_error: - info_json = self._parse_json(info_xml_str, video_id, fatal=False) - err_msg = (info_json or {}).get('error_text') - else: - err_msg = xpath_text(info_xml, './message') - - if info_xml is not None: - durls = info_xml.findall('./durl') - if not durls: - if err_msg: - raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True) - else: - raise ExtractorError('No videos found!') + video_info = self._download_json( + 'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign), + video_id, note='Downloading video info page') entries = [] - for durl in durls: - size = xpath_text(durl, ['./filesize', './size']) + for idx, durl in enumerate(video_info['durl']): formats = [{ - 'url': durl.find('./url').text, - 'filesize': int_or_none(size), + 'url': durl['url'], + 'filesize': int_or_none(durl['size']), }] - for backup_url in durl.findall('./backup_url/url'): + for backup_url in durl['backup_url']: formats.append({ - 'url': backup_url.text, + 'url': backup_url, # backup URLs have lower priorities - 'preference': -2 if 'hd.mp4' in backup_url.text else -3, + 'preference': -2 if 'hd.mp4' in backup_url else -3, }) self._sort_formats(formats) entries.append({ - 'id': '%s_part%s' % (cid, xpath_text(durl, './order')), - 'duration': int_or_none(xpath_text(durl, './length'), 1000), + 'id': '%s_part%s' % (video_id, idx), + 'duration': float_or_none(durl.get('length'), 1000), 'formats': formats, }) title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title') description = self._html_search_meta('description', webpage) - datetime_str = self._html_search_regex( - r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False) - timestamp = None - if datetime_str: - timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple()) + timestamp = unified_timestamp(self._html_search_regex( + r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)) # TODO 'view_count' requires deobfuscating Javascript info = { - 'id': compat_str(cid), + 'id': video_id, 'title': title, 'description': description, 'timestamp': timestamp, 'thumbnail': self._html_search_meta('thumbnailUrl', webpage), - 'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000), + 'duration': float_or_none(video_info.get('timelength'), scale=1000), } uploader_mobj = re.search( From 98908bcf7c50d034042ab86223b7689e91b589ba Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 28 Aug 2016 22:49:46 +0800 Subject: [PATCH 1501/3599] [openload] Update algorithm again (#10408) --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index c8dde7ae3..03baf8e32 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -60,7 +60,7 @@ class OpenloadIE(InfoExtractor): if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 if idx == len(enc_data) - 1: - j += 2 + j += 1 video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From 2982514072594b1f708abdf654b31da77c0bfa81 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 28 Aug 2016 16:43:15 +0100 Subject: [PATCH 1502/3599] [turner,nba,cnn,adultswim] add base extractor to parse cvp feeds --- youtube_dl/extractor/adultswim.py | 70 +++---------- youtube_dl/extractor/cnn.py | 97 +++++------------- youtube_dl/extractor/nba.py | 70 +++---------- youtube_dl/extractor/turner.py | 163 ++++++++++++++++++++++++++++++ 4 files changed, 214 insertions(+), 186 deletions(-) create mode 100644 youtube_dl/extractor/turner.py diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 96599048f..ef3cc2a61 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -3,16 +3,11 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor -from ..utils import ( - determine_ext, - ExtractorError, - float_or_none, - xpath_text, -) +from .turner import TurnerBaseIE +from ..utils import ExtractorError -class AdultSwimIE(InfoExtractor): +class AdultSwimIE(TurnerBaseIE): _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?' _TESTS = [{ @@ -96,7 +91,8 @@ class AdultSwimIE(InfoExtractor): 'params': { # m3u8 download 'skip_download': True, - } + }, + 'expected_warnings': ['Unable to download f4m manifest'], }] @staticmethod @@ -176,57 +172,23 @@ class AdultSwimIE(InfoExtractor): entries = [] for part_num, segment_id in enumerate(segment_ids): - segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id - + segement_info = self._extract_cvp_info( + 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id, + segment_id, { + 'secure': { + 'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big', + 'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do', + }, + }) segment_title = '%s - %s' % (show_title, episode_title) if len(segment_ids) > 1: segment_title += ' Part %d' % (part_num + 1) - - idoc = self._download_xml( - segment_url, segment_title, - 'Downloading segment information', 'Unable to download segment information') - - segment_duration = float_or_none( - xpath_text(idoc, './/trt', 'segment duration').strip()) - - formats = [] - file_els = idoc.findall('.//files/file') or idoc.findall('./files/file') - - unique_urls = [] - unique_file_els = [] - for file_el in file_els: - media_url = file_el.text - if not media_url or determine_ext(media_url) == 'f4m': - continue - if file_el.text not in unique_urls: - unique_urls.append(file_el.text) - unique_file_els.append(file_el) - - for file_el in unique_file_els: - bitrate = file_el.attrib.get('bitrate') - ftype = file_el.attrib.get('type') - media_url = file_el.text - if determine_ext(media_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - media_url, segment_title, 'mp4', preference=0, - m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'format_id': '%s_%s' % (bitrate, ftype), - 'url': file_el.text.strip(), - # The bitrate may not be a number (for example: 'iphone') - 'tbr': int(bitrate) if bitrate.isdigit() else None, - }) - - self._sort_formats(formats) - - entries.append({ + segement_info.update({ 'id': segment_id, 'title': segment_title, - 'formats': formats, - 'duration': segment_duration, - 'description': episode_description + 'description': episode_description, }) + entries.append(segement_info) return { '_type': 'playlist', diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 220bb55e8..1bf87f6ea 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -3,14 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - url_basename, -) +from .turner import TurnerBaseIE +from ..utils import url_basename -class CNNIE(InfoExtractor): +class CNNIE(TurnerBaseIE): _VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/ (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))''' @@ -18,43 +15,50 @@ class CNNIE(InfoExtractor): 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', 'md5': '3e6121ea48df7e2259fe73a0628605c4', 'info_dict': { - 'id': 'sports/2013/06/09/nadal-1-on-1.cnn', + 'id': 'nadal-1-on-1', 'ext': 'mp4', 'title': 'Nadal wins 8th French Open title', 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', 'duration': 135, 'upload_date': '20130609', }, + 'expected_warnings': ['Failed to download m3u8 information'], }, { 'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29', 'md5': 'b5cc60c60a3477d185af8f19a2a26f4e', 'info_dict': { - 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', + 'id': 'sot-student-gives-epic-speech', 'ext': 'mp4', 'title': "Student's epic speech stuns new freshmen", 'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", 'upload_date': '20130821', - } + }, + 'expected_warnings': ['Failed to download m3u8 information'], }, { 'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html', 'md5': 'f14d02ebd264df951feb2400e2c25a1b', 'info_dict': { - 'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln', + 'id': 'growing-america-nashville-salemtown-board-episode-1', 'ext': 'mp4', 'title': 'Nashville Ep. 1: Hand crafted skateboards', 'description': 'md5:e7223a503315c9f150acac52e76de086', 'upload_date': '20141222', - } + }, + 'expected_warnings': ['Failed to download m3u8 information'], }, { 'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html', 'md5': '52a515dc1b0f001cd82e4ceda32be9d1', 'info_dict': { - 'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney', + 'id': 'netflix-stunning-stats', 'ext': 'mp4', 'title': '5 stunning stats about Netflix', 'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', 'upload_date': '20160819', - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', 'only_matching': True, @@ -84,67 +88,12 @@ class CNNIE(InfoExtractor): if sub_domain not in ('money', 'edition'): sub_domain = 'edition' config = self._CONFIG[sub_domain] - info_url = config['data_src'] % path - info = self._download_xml(info_url, page_title) - - formats = [] - rex = re.compile(r'''(?x) - (?P<width>[0-9]+)x(?P<height>[0-9]+) - (?:_(?P<bitrate>[0-9]+)k)? - ''') - for f in info.findall('files/file'): - video_url = config['media_src'] + f.text.strip() - fdct = { - 'format_id': f.attrib['bitrate'], - 'url': video_url, - } - - mf = rex.match(f.attrib['bitrate']) - if mf: - fdct['width'] = int(mf.group('width')) - fdct['height'] = int(mf.group('height')) - fdct['tbr'] = int_or_none(mf.group('bitrate')) - else: - mf = rex.search(f.text) - if mf: - fdct['width'] = int(mf.group('width')) - fdct['height'] = int(mf.group('height')) - fdct['tbr'] = int_or_none(mf.group('bitrate')) - else: - mi = re.match(r'ios_(audio|[0-9]+)$', f.attrib['bitrate']) - if mi: - if mi.group(1) == 'audio': - fdct['vcodec'] = 'none' - fdct['ext'] = 'm4a' - else: - fdct['tbr'] = int(mi.group(1)) - - formats.append(fdct) - - self._sort_formats(formats) - - thumbnails = [{ - 'height': int(t.attrib['height']), - 'width': int(t.attrib['width']), - 'url': t.text, - } for t in info.findall('images/image')] - - metas_el = info.find('metas') - upload_date = ( - metas_el.attrib.get('version') if metas_el is not None else None) - - duration_el = info.find('length') - duration = parse_duration(duration_el.text) - - return { - 'id': info.attrib['id'], - 'title': info.find('headline').text, - 'formats': formats, - 'thumbnails': thumbnails, - 'description': info.find('description').text, - 'duration': duration, - 'upload_date': upload_date, - } + return self._extract_cvp_info( + config['data_src'] % path, page_title, { + 'default': { + 'media_src': config['media_src'], + } + }) class CNNBlogsIE(InfoExtractor): diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index d896b0d04..aabd5b670 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -1,25 +1,20 @@ from __future__ import unicode_literals import functools -import os.path import re -from .common import InfoExtractor +from .turner import TurnerBaseIE from ..compat import ( compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( - int_or_none, OnDemandPagedList, - parse_duration, remove_start, - xpath_text, - xpath_attr, ) -class NBAIE(InfoExtractor): +class NBAIE(TurnerBaseIE): _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$' _TESTS = [{ 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', @@ -59,7 +54,7 @@ class NBAIE(InfoExtractor): 'ext': 'mp4', 'title': 'Practice: Doc Rivers - 2/16/16', 'description': 'Head Coach Doc Rivers addresses the media following practice.', - 'upload_date': '20160217', + 'upload_date': '20160216', 'timestamp': 1455672000, }, 'params': { @@ -80,7 +75,7 @@ class NBAIE(InfoExtractor): }, { 'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#', 'info_dict': { - 'id': 'Wigginsmp4', + 'id': 'Wigginsmp4-3462601', 'ext': 'mp4', 'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins', 'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.', @@ -145,53 +140,12 @@ class NBAIE(InfoExtractor): if path.startswith('video/teams'): path = 'video/channels/proxy/' + path[6:] - video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id) - video_id = os.path.splitext(xpath_text(video_info, 'slug'))[0] - title = xpath_text(video_info, 'headline') - description = xpath_text(video_info, 'description') - duration = parse_duration(xpath_text(video_info, 'length')) - timestamp = int_or_none(xpath_attr(video_info, 'dateCreated', 'uts')) - - thumbnails = [] - for image in video_info.find('images'): - thumbnails.append({ - 'id': image.attrib.get('cut'), - 'url': image.text, - 'width': int_or_none(image.attrib.get('width')), - 'height': int_or_none(image.attrib.get('height')), + return self._extract_cvp_info( + 'http://www.nba.com/%s.xml' % path, video_id, { + 'default': { + 'media_src': 'http://nba.cdn.turner.com/nba/big', + }, + 'm3u8': { + 'media_src': 'http://nbavod-f.akamaihd.net', + }, }) - - formats = [] - for video_file in video_info.findall('.//file'): - video_url = video_file.text - if video_url.startswith('/'): - continue - if video_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) - elif video_url.endswith('.f4m'): - formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False)) - else: - key = video_file.attrib.get('bitrate') - format_info = { - 'format_id': key, - 'url': video_url, - } - mobj = re.search(r'(\d+)x(\d+)(?:_(\d+))?', key) - if mobj: - format_info.update({ - 'width': int(mobj.group(1)), - 'height': int(mobj.group(2)), - 'tbr': int_or_none(mobj.group(3)), - }) - formats.append(format_info) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'timestamp': timestamp, - 'thumbnails': thumbnails, - 'formats': formats, - } diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py new file mode 100644 index 000000000..0d4271f11 --- /dev/null +++ b/youtube_dl/extractor/turner.py @@ -0,0 +1,163 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + xpath_text, + int_or_none, + determine_ext, + parse_duration, + xpath_attr, + update_url_query, +) + + +class TurnerBaseIE(InfoExtractor): + def _extract_cvp_info(self, data_src, video_id, path_data={}): + video_data = self._download_xml(data_src, video_id) + video_id = video_data.attrib['id'].split('/')[-1].split('.')[0] + title = xpath_text(video_data, 'headline', fatal=True) + # rtmp_src = xpath_text(video_data, 'akamai/src') + # if rtmp_src: + # splited_rtmp_src = rtmp_src.split(',') + # if len(splited_rtmp_src) == 2: + # rtmp_src = splited_rtmp_src[1] + # aifp = xpath_text(video_data, 'akamai/aifp', default='') + + tokens = {} + urls = [] + formats = [] + rex = re.compile(r'''(?x) + (?P<width>[0-9]+)x(?P<height>[0-9]+) + (?:_(?P<bitrate>[0-9]+))? + ''') + for video_file in video_data.findall('files/file'): + video_url = video_file.text.strip() + if not video_url: + continue + ext = determine_ext(video_url) + if video_url.startswith('/mp4:protected/'): + continue + # TODO Correct extraction for these files + # protected_path_data = path_data.get('protected') + # if not protected_path_data or not rtmp_src: + # continue + # protected_path = self._search_regex( + # r'/mp4:(.+)\.[a-z0-9]', video_url, 'secure path') + # auth = self._download_webpage( + # protected_path_data['tokenizer_src'], query={ + # 'path': protected_path, + # 'videoId': video_id, + # 'aifp': aifp, + # }) + # token = xpath_text(auth, 'token') + # if not token: + # continue + # video_url = rtmp_src + video_url + '?' + token + elif video_url.startswith('/secure/'): + secure_path_data = path_data.get('secure') + if not secure_path_data: + continue + video_url = secure_path_data['media_src'] + video_url + secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' + token = tokens.get(secure_path) + if not token: + auth = self._download_xml( + secure_path_data['tokenizer_src'], video_id, query={ + 'path': secure_path, + 'videoId': video_id, + }) + token = xpath_text(auth, 'token') + if not token: + continue + tokens[secure_path] = token + video_url = video_url + '?hdnea=' + token + elif not re.match('https?://', video_url): + base_path_data = path_data.get(ext, path_data.get('default', {})) + media_src = base_path_data.get('media_src') + if not media_src: + continue + video_url = media_src + video_url + if video_url in urls: + continue + urls.append(video_url) + format_id = video_file.attrib['bitrate'] + if ext == 'smil': + formats.extend(self._extract_smil_formats(video_url, video_id, fatal=False)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + update_url_query(video_url, {'hdcore': '3.7.0'}), + video_id, f4m_id=format_id, fatal=False)) + else: + f = { + 'format_id': format_id, + 'url': video_url, + 'ext': ext, + } + mobj = rex.search(format_id + video_url) + if mobj: + f.update({ + 'width': int(mobj.group('width')), + 'height': int(mobj.group('height')), + 'tbr': int_or_none(mobj.group('bitrate')), + }) + elif format_id.isdigit(): + f['tbr'] = int(format_id) + else: + mobj = re.match(r'ios_(audio|[0-9]+)$', format_id) + if mobj: + if mobj.group(1) == 'audio': + f.update({ + 'vcodec': 'none', + 'ext': 'm4a', + }) + else: + f['tbr'] = int(mobj.group(1)) + formats.append(f) + self._sort_formats(formats) + + subtitles = {} + for source in video_data.findall('closedCaptions/source'): + for track in source.findall('track'): + source_url = source.get('url') + if not source_url: + continue + subtitles.set_default(source.get('lang') or source.get('label') or 'en', []).append({ + 'url': source_url, + 'ext': { + 'scc': 'scc', + 'webvtt': 'vtt', + 'smptett': 'tt', + }.get(source.get('format')) + }) + + thumbnails = [{ + 'id': image.get('cut'), + 'url': image.text, + 'width': int_or_none(image.get('width')), + 'height': int_or_none(image.get('height')), + } for image in video_data.findall('images/image')] + + timestamp = None + if 'cnn.com' not in data_src: + timestamp = int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'subtitles': subtitles, + 'thumbnails': thumbnails, + 'description': xpath_text(video_data, 'description'), + 'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')), + 'timestamp': timestamp, + 'upload_date': xpath_attr(video_data, 'metas', 'version'), + 'series': xpath_text(video_data, 'showTitle'), + 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), + 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), + } From ec65b391cbb0bc42a78515915e61602f4d1ae1f9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 28 Aug 2016 16:47:59 +0100 Subject: [PATCH 1503/3599] [cartoonnetwork] Add new extractor(#10110) --- youtube_dl/extractor/cartoonnetwork.py | 36 ++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/cartoonnetwork.py diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py new file mode 100644 index 000000000..813f53644 --- /dev/null +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .turner import TurnerBaseIE + + +class CartoonNetworkIE(TurnerBaseIE): + _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html' + _TEST = { + 'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html', + 'info_dict': { + 'id': '8a250ab04ed07e6c014ef3f1e2f9016c', + 'ext': 'mp4', + 'title': 'Starfire the Cat Lady', + 'description': 'Robin decides to become a cat so that Starfire will finally love him.', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups() + query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id + return self._extract_cvp_info( + 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?' + query, video_id, { + 'secure': { + 'media_src': 'http://apple-secure.cdn.turner.com/toon/big', + 'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do', + }, + }) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8d88d6cb4..6eb495b07 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -127,6 +127,7 @@ from .carambatv import ( CarambaTVIE, CarambaTVPageIE, ) +from .cartoonnetwork import CartoonNetworkIE from .cbc import ( CBCIE, CBCPlayerIE, From b3eaeded12f470afd6f0cb851e6b7dd2ee78b7c5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 28 Aug 2016 16:50:32 +0100 Subject: [PATCH 1504/3599] [tbs] Add new extractor(#10222) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tbs.py | 59 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/tbs.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6eb495b07..06c6746ff 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -824,6 +824,7 @@ from .tagesschau import ( TagesschauIE, ) from .tass import TassIE +from .tbs import TBSIE from .tdslifeway import TDSLifewayIE from .teachertube import ( TeacherTubeIE, diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py new file mode 100644 index 000000000..79b00e376 --- /dev/null +++ b/youtube_dl/extractor/tbs.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .turner import TurnerBaseIE +from ..utils import ( + extract_attributes, + ExtractorError, +) + + +class TBSIE(TurnerBaseIE): + _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/videos/(?:[^/]+/)+(?P<id>[^/?#]+)\.html' + _TESTS = [{ + 'url': 'http://www.tbs.com/videos/people-of-earth/season-1/extras/2007318/theatrical-trailer.html', + 'md5': '9e61d680e2285066ade7199e6408b2ee', + 'info_dict': { + 'id': '2007318', + 'ext': 'mp4', + 'title': 'Theatrical Trailer', + 'description': 'Catch the latest comedy from TBS, People of Earth, premiering Halloween night--Monday, October 31, at 9/8c.', + } + }, { + 'url': 'http://www.tntdrama.com/videos/good-behavior/season-1/extras/1538823/you-better-run.html', + 'md5': 'ce53c6ead5e9f3280b4ad2031a6fab56', + 'info_dict': { + 'id': '1538823', + 'ext': 'mp4', + 'title': 'You Better Run', + 'description': 'Letty Raines must figure out what she\'s running toward while running away from her past. Good Behavior premieres November 15 at 9/8c.', + } + }] + + def _real_extract(self, url): + domain, display_id = re.match(self._VALID_URL, url).groups() + site = domain[:3] + webpage = self._download_webpage(url, display_id) + video_params = extract_attributes(self._search_regex(r'(<[^>]+id="page-video"[^>]*>)', webpage, 'video params')) + if video_params.get('isAuthRequired') == 'true': + raise ExtractorError( + 'This video is only available via cable service provider subscription that' + ' is not currently supported.', expected=True) + query = None + clip_id = video_params.get('clipid') + if clip_id: + query = 'id=' + clip_id + else: + query = 'titleId=' + video_params['titleid'] + return self._extract_cvp_info( + 'http://www.%s.com/service/cvpXml?%s' % (domain, query), display_id, { + 'default': { + 'media_src': 'http://ht.cdn.turner.com/%s/big' % site, + }, + 'secure': { + 'media_src': 'http://apple-secure.cdn.turner.com/%s/big' % site, + 'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain, + }, + }) From 5bc8a73af69f4aac8b2df6f7c23ecfb4ee72e518 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 28 Aug 2016 17:08:26 +0100 Subject: [PATCH 1505/3599] [cartoonnetwork] make extraction work for more videos in the website some videos require `networkName=CN2` to be present in the feed url --- youtube_dl/extractor/cartoonnetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py index 813f53644..b3f30b1ca 100644 --- a/youtube_dl/extractor/cartoonnetwork.py +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -28,7 +28,7 @@ class CartoonNetworkIE(TurnerBaseIE): id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups() query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id return self._extract_cvp_info( - 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?' + query, video_id, { + 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, { 'secure': { 'media_src': 'http://apple-secure.cdn.turner.com/toon/big', 'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do', From b8079a40bc61326b17a672b073dce6cdfa791fb5 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sun, 28 Aug 2016 17:51:53 +0100 Subject: [PATCH 1506/3599] [turner] fix secure m3u8 formats downloading --- youtube_dl/downloader/hls.py | 11 +++++++---- youtube_dl/extractor/turner.py | 15 +++++++++++++-- youtube_dl/extractor/uplynk.py | 4 +--- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8dd1b898e..baaff44d5 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -83,7 +83,10 @@ class HlsFD(FragmentFD): self._prepare_and_start_frag_download(ctx) + extra_query = None extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') + if extra_param_to_segment_url: + extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} @@ -97,8 +100,8 @@ class HlsFD(FragmentFD): if re.match(r'^https?://', line) else compat_urlparse.urljoin(man_url, line)) frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) - if extra_param_to_segment_url: - frag_url = update_url_query(frag_url, extra_param_to_segment_url) + if extra_query: + frag_url = update_url_query(frag_url, extra_query) success = ctx['dl'].download(frag_filename, {'url': frag_url}) if not success: return False @@ -124,8 +127,8 @@ class HlsFD(FragmentFD): if not re.match(r'^https?://', decrypt_info['URI']): decrypt_info['URI'] = compat_urlparse.urljoin( man_url, decrypt_info['URI']) - if extra_param_to_segment_url: - decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_param_to_segment_url) + if extra_query: + decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read() elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 0d4271f11..108caa9d8 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -11,6 +11,7 @@ from ..utils import ( parse_duration, xpath_attr, update_url_query, + compat_urlparse, ) @@ -87,8 +88,18 @@ class TurnerBaseIE(InfoExtractor): if ext == 'smil': formats.extend(self._extract_smil_formats(video_url, video_id, fatal=False)) elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + m3u8_formats = self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) + if m3u8_formats: + # Sometimes final URLs inside m3u8 are unsigned, let's fix this + # ourselves + qs = compat_urlparse.urlparse(video_url).query + if qs: + query = compat_urlparse.parse_qs(qs) + for m3u8_format in m3u8_formats: + m3u8_format['url'] = update_url_query(m3u8_format['url'], query) + m3u8_format['extra_param_to_segment_url'] = qs + formats.extend(m3u8_formats) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( update_url_query(video_url, {'hdcore': '3.7.0'}), diff --git a/youtube_dl/extractor/uplynk.py b/youtube_dl/extractor/uplynk.py index ae529f690..2cd22cf8a 100644 --- a/youtube_dl/extractor/uplynk.py +++ b/youtube_dl/extractor/uplynk.py @@ -33,9 +33,7 @@ class UplynkIE(InfoExtractor): formats = self._extract_m3u8_formats('http://content.uplynk.com/%s.m3u8' % path, display_id, 'mp4') if session_id: for f in formats: - f['extra_param_to_segment_url'] = { - 'pbs': session_id, - } + f['extra_param_to_segment_url'] = 'pbs=' + session_id self._sort_formats(formats) asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id) if asset.get('error') == 1: From 9ba1e1dcc0dc27d36f3f396cb608cef7cd50e48a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 29 Aug 2016 08:26:07 +0700 Subject: [PATCH 1507/3599] [played] Remove extractor (Closes #10470) --- youtube_dl/extractor/extractors.py | 1 - youtube_dl/extractor/played.py | 60 ------------------------------ 2 files changed, 61 deletions(-) delete mode 100644 youtube_dl/extractor/played.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 06c6746ff..20e85703f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -636,7 +636,6 @@ from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .pinkbike import PinkbikeIE from .pladform import PladformIE -from .played import PlayedIE from .playfm import PlayFMIE from .plays import PlaysTVIE from .playtvak import PlaytvakIE diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py deleted file mode 100644 index 57c875ef0..000000000 --- a/youtube_dl/extractor/played.py +++ /dev/null @@ -1,60 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re -import os.path - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - sanitized_Request, - urlencode_postdata, -) - - -class PlayedIE(InfoExtractor): - IE_NAME = 'played.to' - _VALID_URL = r'https?://(?:www\.)?played\.to/(?P<id>[a-zA-Z0-9_-]+)' - - _TEST = { - 'url': 'http://played.to/j2f2sfiiukgt', - 'md5': 'c2bd75a368e82980e7257bf500c00637', - 'info_dict': { - 'id': 'j2f2sfiiukgt', - 'ext': 'flv', - 'title': 'youtube-dl_test_video.mp4', - }, - 'skip': 'Removed for copyright infringement.', # oh wow - } - - def _real_extract(self, url): - video_id = self._match_id(url) - orig_webpage = self._download_webpage(url, video_id) - - m_error = re.search( - r'(?s)Reason for deletion:.*?<b class="err"[^>]*>(?P<msg>[^<]+)</b>', orig_webpage) - if m_error: - raise ExtractorError(m_error.group('msg'), expected=True) - - data = self._hidden_inputs(orig_webpage) - - self._sleep(2, video_id) - - post = urlencode_postdata(data) - headers = { - b'Content-Type': b'application/x-www-form-urlencoded', - } - req = sanitized_Request(url, post, headers) - webpage = self._download_webpage( - req, video_id, note='Downloading video page ...') - - title = os.path.splitext(data['fname'])[0] - - video_url = self._search_regex( - r'file: "?(.+?)",', webpage, 'video URL') - - return { - 'id': video_id, - 'title': title, - 'url': video_url, - } From 93b84045994ca88b486901f54de1102347a67537 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 29 Aug 2016 07:56:54 +0100 Subject: [PATCH 1508/3599] [generic,vodplatform] improve embed regex --- youtube_dl/extractor/generic.py | 4 ++-- youtube_dl/extractor/vodplatform.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c6e655c84..24b217715 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2243,11 +2243,11 @@ class GenericIE(InfoExtractor): # Look for VODPlatform embeds mobj = re.search( - r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)', + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1', webpage) if mobj is not None: return self.url_result( - self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform') + self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform') # Look for Instagram embeds instagram_embed_url = InstagramIE._extract_embed_url(webpage) diff --git a/youtube_dl/extractor/vodplatform.py b/youtube_dl/extractor/vodplatform.py index b49542b16..7bdd8b1dc 100644 --- a/youtube_dl/extractor/vodplatform.py +++ b/youtube_dl/extractor/vodplatform.py @@ -6,7 +6,7 @@ from ..utils import unescapeHTML class VODPlatformIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/embed/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/[eE]mbed/(?P<id>[^/?#]+)' _TEST = { # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar 'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw', From 6c9b71bc0862560cbb9c4c2d9ec295072c208838 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 29 Aug 2016 19:05:38 +0800 Subject: [PATCH 1509/3599] [downloader/external] Recommend --hls-prefer-native for SOCKS users Related: #10490 --- youtube_dl/downloader/external.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 17f12e970..0aeae3b8f 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -223,7 +223,8 @@ class FFmpegFD(ExternalFD): if proxy.startswith('socks'): self.report_warning( - '%s does not support SOCKS proxies. Downloading may fail.' % self.get_basename()) + '%s does not support SOCKS proxies. Downloading is likely to fail. ' + 'Consider adding --hls-prefer-native to your command.' % self.get_basename()) # Since December 2015 ffmpeg supports -http_proxy option (see # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) From 547993dcd09dd46fda2fd429ed0ed72db7263503 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 29 Aug 2016 21:52:41 +0700 Subject: [PATCH 1510/3599] [turner] Fix subtitles extraction --- youtube_dl/extractor/turner.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 108caa9d8..d69977b56 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -135,11 +135,12 @@ class TurnerBaseIE(InfoExtractor): subtitles = {} for source in video_data.findall('closedCaptions/source'): for track in source.findall('track'): - source_url = source.get('url') - if not source_url: + track_url = track.get('url') + if not track_url: continue - subtitles.set_default(source.get('lang') or source.get('label') or 'en', []).append({ - 'url': source_url, + lang = track.get('lang') or track.get('label') or 'en' + subtitles.setdefault(lang, []).append({ + 'url': track_url, 'ext': { 'scc': 'scc', 'webvtt': 'vtt', From cd10b3ea63fd167216234932aba4d63a34aec4c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 29 Aug 2016 22:13:49 +0700 Subject: [PATCH 1511/3599] [turner] Extract all formats --- youtube_dl/extractor/turner.py | 46 ++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index d69977b56..6df22fd24 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( xpath_text, int_or_none, @@ -30,11 +31,11 @@ class TurnerBaseIE(InfoExtractor): tokens = {} urls = [] formats = [] - rex = re.compile(r'''(?x) - (?P<width>[0-9]+)x(?P<height>[0-9]+) - (?:_(?P<bitrate>[0-9]+))? - ''') - for video_file in video_data.findall('files/file'): + rex = re.compile( + r'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?') + # Possible formats locations: files/file, files/groupFiles/files + # and maybe others + for video_file in video_data.findall('.//file'): video_url = video_file.text.strip() if not video_url: continue @@ -84,12 +85,14 @@ class TurnerBaseIE(InfoExtractor): if video_url in urls: continue urls.append(video_url) - format_id = video_file.attrib['bitrate'] + format_id = video_file.get('bitrate') if ext == 'smil': - formats.extend(self._extract_smil_formats(video_url, video_id, fatal=False)) + formats.extend(self._extract_smil_formats( + video_url, video_id, fatal=False)) elif ext == 'm3u8': m3u8_formats = self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) + video_url, video_id, 'mp4', m3u8_id=format_id or 'hls', + fatal=False) if m3u8_formats: # Sometimes final URLs inside m3u8 are unsigned, let's fix this # ourselves @@ -103,7 +106,7 @@ class TurnerBaseIE(InfoExtractor): elif ext == 'f4m': formats.extend(self._extract_f4m_formats( update_url_query(video_url, {'hdcore': '3.7.0'}), - video_id, f4m_id=format_id, fatal=False)) + video_id, f4m_id=format_id or 'hds', fatal=False)) else: f = { 'format_id': format_id, @@ -117,18 +120,19 @@ class TurnerBaseIE(InfoExtractor): 'height': int(mobj.group('height')), 'tbr': int_or_none(mobj.group('bitrate')), }) - elif format_id.isdigit(): - f['tbr'] = int(format_id) - else: - mobj = re.match(r'ios_(audio|[0-9]+)$', format_id) - if mobj: - if mobj.group(1) == 'audio': - f.update({ - 'vcodec': 'none', - 'ext': 'm4a', - }) - else: - f['tbr'] = int(mobj.group(1)) + elif isinstance(format_id, compat_str): + if format_id.isdigit(): + f['tbr'] = int(format_id) + else: + mobj = re.match(r'ios_(audio|[0-9]+)$', format_id) + if mobj: + if mobj.group(1) == 'audio': + f.update({ + 'vcodec': 'none', + 'ext': 'm4a', + }) + else: + f['tbr'] = int(mobj.group(1)) formats.append(f) self._sort_formats(formats) From 7be15d40976bf40f44bc47301d4e839a1e171e52 Mon Sep 17 00:00:00 2001 From: PeterDing <dfhayst@gmail.com> Date: Fri, 29 Jul 2016 23:21:50 +0800 Subject: [PATCH 1512/3599] [bilibili] Support episodes [extractor/bilibili] add md5 for testing [extractor/bilibili] remove unnecessary headers [extractor/bilibili] correct _TESTS; find thumbnail for episode [extractor/bilibili] [Fix] restore removed tests --- youtube_dl/extractor/bilibili.py | 40 ++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index a332fbb69..35313c62b 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -12,9 +12,13 @@ from ..utils import ( unified_timestamp, ) +HEADERS = { + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', +} + class BiliBiliIE(InfoExtractor): - _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)' + _VALID_URL = r'https?://(www.|bangumi.|)bilibili\.(?:tv|com)/(video/av|anime/v/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', @@ -77,6 +81,17 @@ class BiliBiliIE(InfoExtractor): 'skip_download': True, }, 'expected_warnings': ['upload time'], + }, { + 'url': 'http://bangumi.bilibili.com/anime/v/40068', + 'md5': '08d539a0884f3deb7b698fb13ba69696', + 'info_dict': { + 'id': '40068', + 'ext': 'mp4', + 'duration': 1402.357, + 'title': '混沌武士 : 第7集 四面楚歌 A Risky Racket', + 'description': "故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子”无幻,说以50个丸子帮她搞定这群人,风觉得他莫名其妙,也就没多搭理他。而在这时,风因为一个意外而将茶水泼在了恶霸头领——龙次郎身上。愤怒的恶霸们欲将风的手指砍掉,风在无奈中大喊道:“丸子100个!”……   另一方面,龙次郎的父亲也就是当地的代官,依仗自己有着雄厚的保镖实力,在当地欺压穷人,当看到一穷人无法交齐足够的钱过桥时,欲下令将其杀死,武士仁看不惯这一幕,于是走上前,与代官的保镖交手了……   酒馆内,因为风答应给无幻100个团子,无幻将恶霸们打败了,就在这时,仁进来了。好战的无幻立刻向仁发了战书,最后两败俱伤,被代官抓入牢房,预计第二天斩首……   得知该状况的风,为报救命之恩,来到了刑场,利用烟花救出了无幻和仁。而风则以救命恩人的身份,命令二人和她一起去寻找带着向日葵香味的武士……(by百科)", + 'thumbnail': 're:^http?://.+\.jpg', + }, }] _APP_KEY = '6f90a59ac58a4123' @@ -84,13 +99,20 @@ class BiliBiliIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - cid = compat_parse_qs(self._search_regex( - [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', - r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], - webpage, 'player parameters'))['cid'][0] + _is_episode = 'anime/v' in url + if not _is_episode: + cid = compat_parse_qs(self._search_regex( + [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', + r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], + webpage, 'player parameters'))['cid'][0] + else: + url_t = 'http://bangumi.bilibili.com/web_api/get_source' + js = self._download_json(url_t, video_id, + data='episode_id=%s' % video_id, + headers=HEADERS) + cid = js['result']['cid'] payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid) sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() @@ -125,6 +147,10 @@ class BiliBiliIE(InfoExtractor): description = self._html_search_meta('description', webpage) timestamp = unified_timestamp(self._html_search_regex( r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)) + if _is_episode: + thumbnail = self._html_search_meta('og:image', webpage) + else: + thumbnail = self._html_search_meta('thumbnailUrl', webpage) # TODO 'view_count' requires deobfuscating Javascript info = { @@ -132,7 +158,7 @@ class BiliBiliIE(InfoExtractor): 'title': title, 'description': description, 'timestamp': timestamp, - 'thumbnail': self._html_search_meta('thumbnailUrl', webpage), + 'thumbnail': thumbnail, 'duration': float_or_none(video_info.get('timelength'), scale=1000), } From 3fb2a23029934dcbf6fe2cd283d851506dcdff5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 29 Aug 2016 22:40:35 +0700 Subject: [PATCH 1513/3599] [adultswim] Extract video info from onlineOriginals (Closes #10492) --- youtube_dl/extractor/adultswim.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index ef3cc2a61..5d0bf5a68 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .turner import TurnerBaseIE -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + int_or_none, +) class AdultSwimIE(TurnerBaseIE): @@ -144,7 +147,10 @@ class AdultSwimIE(TurnerBaseIE): if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path: video_info = bootstrapped_data['slugged_video'] if not video_info: - video_info = bootstrapped_data.get('heroMetadata', {}).get('trailer').get('video') + video_info = bootstrapped_data.get( + 'heroMetadata', {}).get('trailer', {}).get('video') + if not video_info: + video_info = bootstrapped_data.get('onlineOriginals', [None])[0] if not video_info: raise ExtractorError('Unable to find video info') @@ -167,8 +173,9 @@ class AdultSwimIE(TurnerBaseIE): episode_id = video_info['id'] episode_title = video_info['title'] - episode_description = video_info['description'] - episode_duration = video_info.get('duration') + episode_description = video_info.get('description') + episode_duration = int_or_none(video_info.get('duration')) + view_count = int_or_none(video_info.get('views')) entries = [] for part_num, segment_id in enumerate(segment_ids): @@ -197,5 +204,6 @@ class AdultSwimIE(TurnerBaseIE): 'entries': entries, 'title': '%s - %s' % (show_title, episode_title), 'description': episode_description, - 'duration': episode_duration + 'duration': episode_duration, + 'view_count': view_count, } From 5a80e7b43a7abc83e104f1cd711d8fe7985c30eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 29 Aug 2016 22:44:15 +0700 Subject: [PATCH 1514/3599] [turner] Skip invalid subtitles' URLs --- youtube_dl/extractor/turner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 6df22fd24..f5736bd15 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -140,7 +140,7 @@ class TurnerBaseIE(InfoExtractor): for source in video_data.findall('closedCaptions/source'): for track in source.findall('track'): track_url = track.get('url') - if not track_url: + if not isinstance(track_url, compat_str) or track_url.endswith('/big'): continue lang = track.get('lang') or track.get('label') or 'en' subtitles.setdefault(lang, []).append({ From a06e1498aa7fc02e6db5c6ec8411e90f210ce2c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 29 Aug 2016 22:54:33 +0700 Subject: [PATCH 1515/3599] [kusi] Update test --- youtube_dl/extractor/kusi.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/kusi.py b/youtube_dl/extractor/kusi.py index 12cc56e44..2e66e8cf9 100644 --- a/youtube_dl/extractor/kusi.py +++ b/youtube_dl/extractor/kusi.py @@ -18,31 +18,20 @@ from ..utils import ( class KUSIIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))' _TESTS = [{ - 'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold', - 'md5': 'f926e7684294cf8cb7bdf8858e1b3988', + 'url': 'http://www.kusi.com/story/32849881/turko-files-refused-to-help-it-aint-right', + 'md5': '4e76ce8e53660ce9697d06c0ba6fc47d', 'info_dict': { - 'id': '12203019', + 'id': '12689020', 'ext': 'mp4', - 'title': 'Turko Files: Case Closed! & Put On Hold!', - 'duration': 231.0, - 'upload_date': '20160210', - 'timestamp': 1455087571, + 'title': "Turko Files: Refused to Help, It Ain't Right!", + 'duration': 223.586, + 'upload_date': '20160826', + 'timestamp': 1472233118, 'thumbnail': 're:^https?://.*\.jpg$' }, }, { 'url': 'http://kusi.com/video?clipId=12203019', - 'info_dict': { - 'id': '12203019', - 'ext': 'mp4', - 'title': 'Turko Files: Case Closed! & Put On Hold!', - 'duration': 231.0, - 'upload_date': '20160210', - 'timestamp': 1455087571, - 'thumbnail': 're:^https?://.*\.jpg$' - }, - 'params': { - 'skip_download': True, # Same as previous one - }, + 'only_matching': True, }] def _real_extract(self, url): From fe45b0e06081752ff3617cdfae701408a1d8256a Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 29 Aug 2016 18:17:32 +0100 Subject: [PATCH 1516/3599] [9c9media] fix multiple stacks extraction and extract more metadata(#10016) --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/ninecninemedia.py | 126 +++++++++++++++++++------ 2 files changed, 103 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 20e85703f..21efa96b2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -554,7 +554,10 @@ from .nick import ( NickDeIE, ) from .niconico import NiconicoIE, NiconicoPlaylistIE -from .ninecninemedia import NineCNineMediaIE +from .ninecninemedia import ( + NineCNineMediaStackIE, + NineCNineMediaIE, +) from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE diff --git a/youtube_dl/extractor/ninecninemedia.py b/youtube_dl/extractor/ninecninemedia.py index d889245ad..ec4d675e2 100644 --- a/youtube_dl/extractor/ninecninemedia.py +++ b/youtube_dl/extractor/ninecninemedia.py @@ -4,40 +4,36 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( parse_iso8601, - parse_duration, - ExtractorError + float_or_none, + ExtractorError, + int_or_none, ) -class NineCNineMediaIE(InfoExtractor): - _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)' +class NineCNineMediaBaseIE(InfoExtractor): + _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' + + +class NineCNineMediaStackIE(NineCNineMediaBaseIE): + IE_NAME = '9c9media:stack' + _VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)' def _real_extract(self, url): - destination_code, video_id = re.match(self._VALID_URL, url).groups() - api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id) - content = self._download_json(api_base_url, video_id, query={ - '$include': '[contentpackages]', - }) - title = content['Name'] - if len(content['ContentPackages']) > 1: - raise ExtractorError('multiple content packages') - content_package = content['ContentPackages'][0] - stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id'] - stacks = self._download_json(stacks_base_url, video_id)['Items'] - if len(stacks) > 1: - raise ExtractorError('multiple stacks') - stack = stacks[0] - stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id']) + destination_code, content_id, package_id, stack_id = re.match(self._VALID_URL, url).groups() + stack_base_url_template = self._API_BASE_TEMPLATE + 'contentpackages/%s/stacks/%s/manifest.' + stack_base_url = stack_base_url_template % (destination_code, content_id, package_id, stack_id) + formats = [] formats.extend(self._extract_m3u8_formats( - stack_base_url + 'm3u8', video_id, 'mp4', + stack_base_url + 'm3u8', stack_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) formats.extend(self._extract_f4m_formats( - stack_base_url + 'f4m', video_id, + stack_base_url + 'f4m', stack_id, f4m_id='hds', fatal=False)) - mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False) + mp4_url = self._download_webpage(stack_base_url + 'pd', stack_id, fatal=False) if mp4_url: formats.append({ 'url': mp4_url, @@ -46,10 +42,86 @@ class NineCNineMediaIE(InfoExtractor): self._sort_formats(formats) return { - 'id': video_id, - 'title': title, - 'description': content.get('Desc') or content.get('ShortDesc'), - 'timestamp': parse_iso8601(content.get('BroadcastDateTime')), - 'duration': parse_duration(content.get('BroadcastTime')), + 'id': stack_id, 'formats': formats, } + + +class NineCNineMediaIE(NineCNineMediaBaseIE): + IE_NAME = '9c9media' + _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)' + + def _real_extract(self, url): + destination_code, content_id = re.match(self._VALID_URL, url).groups() + api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id) + content = self._download_json(api_base_url, content_id, query={ + '$include': '[Media,Season,ContentPackages]', + }) + title = content['Name'] + if len(content['ContentPackages']) > 1: + raise ExtractorError('multiple content packages') + content_package = content['ContentPackages'][0] + package_id = content_package['Id'] + content_package_url = api_base_url + 'contentpackages/%s/' % package_id + content_package = self._download_json(content_package_url, content_id) + + if content_package.get('Constraints', {}).get('Security', {}).get('Type') == 'adobe-drm': + raise ExtractorError('This video is DRM protected.', expected=True) + + stacks = self._download_json(content_package_url + 'stacks/', package_id)['Items'] + multistacks = len(stacks) > 1 + + thumbnails = [] + for image in content.get('Images', []): + image_url = image.get('Url') + if not image_url: + continue + thumbnails.append({ + 'url': image_url, + 'width': int_or_none(image.get('Width')), + 'height': int_or_none(image.get('Height')), + }) + + tags, categories = [], [] + for source_name, container in (('Tags', tags), ('Genres', categories)): + for e in content.get(source_name, []): + e_name = e.get('Name') + if not e_name: + continue + container.append(e_name) + + description = content.get('Desc') or content.get('ShortDesc') + season = content.get('Season', {}) + base_info = { + 'description': description, + 'timestamp': parse_iso8601(content.get('BroadcastDateTime')), + 'episode_number': int_or_none(content.get('Episode')), + 'season': season.get('Name'), + 'season_number': season.get('Number'), + 'season_id': season.get('Id'), + 'series': content.get('Media', {}).get('Name'), + 'tags': tags, + 'categories': categories, + } + + entries = [] + for stack in stacks: + stack_id = compat_str(stack['Id']) + entry = { + '_type': 'url_transparent', + 'url': '9c9media:stack:%s:%s:%s:%s' % (destination_code, content_id, package_id, stack_id), + 'id': stack_id, + 'title': '%s_part%s' % (title, stack['Name']) if multistacks else title, + 'duration': float_or_none(stack.get('Duration')), + 'ie_key': 'NineCNineMediaStack', + } + entry.update(base_info) + entries.append(entry) + + return { + '_type': 'multi_video', + 'id': content_id, + 'title': title, + 'description': description, + 'entries': entries, + } From 42e05be8671e149f79307145eda78892003279dc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 29 Aug 2016 18:20:58 +0100 Subject: [PATCH 1517/3599] [ctv] add support for (tsn,bnn,thecomedynetwork).ca websites(#10016) --- youtube_dl/extractor/ctv.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ctv.py b/youtube_dl/extractor/ctv.py index 5807fbac9..a1fe86316 100644 --- a/youtube_dl/extractor/ctv.py +++ b/youtube_dl/extractor/ctv.py @@ -1,11 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class CTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)' + _VALID_URL = r'https?://(?:www\.)?(?P<domain>ctv|tsn|bnn|thecomedynetwork)\.ca/.*?(?:\bvid=|-vid|~|%7E)(?P<id>[0-9.]+)' _TESTS = [{ 'url': 'http://www.ctv.ca/video/player?vid=706966', 'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', @@ -18,13 +20,27 @@ class CTVIE(InfoExtractor): 'timestamp': 1442624700, }, 'expected_warnings': ['HTTP Error 404'], + }, { + 'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582', + 'only_matching': True, + }, { + 'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549', + 'only_matching': True, + }, { + 'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654', + 'only_matching': True, + }, { + 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + domain, video_id = re.match(self._VALID_URL, url).groups() + if domain == 'thecomedynetwork': + domain = 'comedy' return { '_type': 'url_transparent', 'id': video_id, - 'url': '9c9media:ctv_web:%s' % video_id, + 'url': '9c9media:%s_web:%s' % (domain, video_id), 'ie_key': 'NineCNineMedia', } From 1fe48afea5f203cbcb29c0d2984b7b850df8103f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 29 Aug 2016 18:23:21 +0100 Subject: [PATCH 1518/3599] [cnn] update _TEST for CNNBlogsIE and CNNArticleIE(closes #10489) --- youtube_dl/extractor/cnn.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 1bf87f6ea..bb42f35bd 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -102,12 +102,13 @@ class CNNBlogsIE(InfoExtractor): 'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/', 'md5': '3e56f97b0b6ffb4b79f4ea0749551084', 'info_dict': { - 'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn', + 'id': 'criminalizing-journalism', 'ext': 'mp4', 'title': 'Criminalizing journalism?', 'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.', 'upload_date': '20140209', }, + 'expected_warnings': ['Failed to download m3u8 information'], 'add_ie': ['CNN'], } @@ -127,12 +128,13 @@ class CNNArticleIE(InfoExtractor): 'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/', 'md5': '689034c2a3d9c6dc4aa72d65a81efd01', 'info_dict': { - 'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn', + 'id': 'ip-north-korea-obama', 'ext': 'mp4', 'title': 'Obama: Cyberattack not an act of war', - 'description': 'md5:51ce6750450603795cad0cdfbd7d05c5', + 'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b', 'upload_date': '20141221', }, + 'expected_warnings': ['Failed to download m3u8 information'], 'add_ie': ['CNN'], } From da30a20a4d8b0ece61c271a5d0f0c6de2817ef5f Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 29 Aug 2016 19:26:53 +0100 Subject: [PATCH 1519/3599] [turner,cnn] move a check for wrong timestamp to CNNIE --- youtube_dl/extractor/cnn.py | 4 ++++ youtube_dl/extractor/turner.py | 9 ++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index bb42f35bd..a51b239cc 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -83,6 +83,10 @@ class CNNIE(TurnerBaseIE): }, } + def _extract_timestamp(self, video_data): + # TODO: fix timestamp extraction + return None + def _real_extract(self, url): sub_domain, path, page_title = re.match(self._VALID_URL, url).groups() if sub_domain not in ('money', 'edition'): diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index f5736bd15..64fdcc56e 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -17,6 +17,9 @@ from ..utils import ( class TurnerBaseIE(InfoExtractor): + def _extract_timestamp(self, video_data): + return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) + def _extract_cvp_info(self, data_src, video_id, path_data={}): video_data = self._download_xml(data_src, video_id) video_id = video_data.attrib['id'].split('/')[-1].split('.')[0] @@ -159,10 +162,6 @@ class TurnerBaseIE(InfoExtractor): 'height': int_or_none(image.get('height')), } for image in video_data.findall('images/image')] - timestamp = None - if 'cnn.com' not in data_src: - timestamp = int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) - return { 'id': video_id, 'title': title, @@ -171,7 +170,7 @@ class TurnerBaseIE(InfoExtractor): 'thumbnails': thumbnails, 'description': xpath_text(video_data, 'description'), 'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')), - 'timestamp': timestamp, + 'timestamp': self._extract_timestamp(video_data), 'upload_date': xpath_attr(video_data, 'metas', 'version'), 'series': xpath_text(video_data, 'showTitle'), 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), From 3c77a54d5dfa1097d5e3a5eaa0c631b5b01e93ce Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 30 Aug 2016 10:46:48 +0100 Subject: [PATCH 1520/3599] [turner] keep video id intact --- youtube_dl/extractor/cnn.py | 12 ++++++------ youtube_dl/extractor/nba.py | 11 +++++++---- youtube_dl/extractor/turner.py | 2 +- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index a51b239cc..5fc311f53 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -15,7 +15,7 @@ class CNNIE(TurnerBaseIE): 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', 'md5': '3e6121ea48df7e2259fe73a0628605c4', 'info_dict': { - 'id': 'nadal-1-on-1', + 'id': 'sports/2013/06/09/nadal-1-on-1.cnn', 'ext': 'mp4', 'title': 'Nadal wins 8th French Open title', 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', @@ -27,7 +27,7 @@ class CNNIE(TurnerBaseIE): 'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29', 'md5': 'b5cc60c60a3477d185af8f19a2a26f4e', 'info_dict': { - 'id': 'sot-student-gives-epic-speech', + 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', 'ext': 'mp4', 'title': "Student's epic speech stuns new freshmen", 'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", @@ -38,7 +38,7 @@ class CNNIE(TurnerBaseIE): 'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html', 'md5': 'f14d02ebd264df951feb2400e2c25a1b', 'info_dict': { - 'id': 'growing-america-nashville-salemtown-board-episode-1', + 'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln', 'ext': 'mp4', 'title': 'Nashville Ep. 1: Hand crafted skateboards', 'description': 'md5:e7223a503315c9f150acac52e76de086', @@ -49,7 +49,7 @@ class CNNIE(TurnerBaseIE): 'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html', 'md5': '52a515dc1b0f001cd82e4ceda32be9d1', 'info_dict': { - 'id': 'netflix-stunning-stats', + 'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney', 'ext': 'mp4', 'title': '5 stunning stats about Netflix', 'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', @@ -106,7 +106,7 @@ class CNNBlogsIE(InfoExtractor): 'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/', 'md5': '3e56f97b0b6ffb4b79f4ea0749551084', 'info_dict': { - 'id': 'criminalizing-journalism', + 'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn', 'ext': 'mp4', 'title': 'Criminalizing journalism?', 'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.', @@ -132,7 +132,7 @@ class CNNArticleIE(InfoExtractor): 'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/', 'md5': '689034c2a3d9c6dc4aa72d65a81efd01', 'info_dict': { - 'id': 'ip-north-korea-obama', + 'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn', 'ext': 'mp4', 'title': 'Obama: Cyberattack not an act of war', 'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b', diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index aabd5b670..53561961c 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -39,18 +39,19 @@ class NBAIE(TurnerBaseIE): 'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba', 'md5': 'b2b39b81cf28615ae0c3360a3f9668c4', 'info_dict': { - 'id': '0041400301-cle-atl-recap', + 'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba', 'ext': 'mp4', 'title': 'Hawks vs. Cavaliers Game 1', 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d', 'duration': 228, 'timestamp': 1432134543, 'upload_date': '20150520', - } + }, + 'expected_warnings': ['Unable to download f4m manifest'], }, { 'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake', 'info_dict': { - 'id': '1455672027478-Doc_Feb16_720', + 'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324', 'ext': 'mp4', 'title': 'Practice: Doc Rivers - 2/16/16', 'description': 'Head Coach Doc Rivers addresses the media following practice.', @@ -61,6 +62,7 @@ class NBAIE(TurnerBaseIE): # m3u8 download 'skip_download': True, }, + 'expected_warnings': ['Unable to download f4m manifest'], }, { 'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#', 'info_dict': { @@ -75,7 +77,7 @@ class NBAIE(TurnerBaseIE): }, { 'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#', 'info_dict': { - 'id': 'Wigginsmp4-3462601', + 'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601', 'ext': 'mp4', 'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins', 'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.', @@ -87,6 +89,7 @@ class NBAIE(TurnerBaseIE): # m3u8 download 'skip_download': True, }, + 'expected_warnings': ['Unable to download f4m manifest'], }] _PAGE_SIZE = 30 diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 64fdcc56e..b59dafda6 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -22,7 +22,7 @@ class TurnerBaseIE(InfoExtractor): def _extract_cvp_info(self, data_src, video_id, path_data={}): video_data = self._download_xml(data_src, video_id) - video_id = video_data.attrib['id'].split('/')[-1].split('.')[0] + video_id = video_data.attrib['id'] title = xpath_text(video_data, 'headline', fatal=True) # rtmp_src = xpath_text(video_data, 'akamai/src') # if rtmp_src: From 245023a86145f7074dacdab4c735dea268d766ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 30 Aug 2016 23:51:18 +0700 Subject: [PATCH 1521/3599] [pyvideo] Fix extraction (Closes #10468) --- youtube_dl/extractor/pyvideo.py | 94 +++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py index cc0416cb8..08ec09183 100644 --- a/youtube_dl/extractor/pyvideo.py +++ b/youtube_dl/extractor/pyvideo.py @@ -1,59 +1,73 @@ from __future__ import unicode_literals import re -import os from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none class PyvideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)' + _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)' - _TESTS = [ - { - 'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes', - 'md5': '520915673e53a5c5d487c36e0c4d85b5', - 'info_dict': { - 'id': '24_4WWkSmNo', - 'ext': 'webm', - 'title': 'Become a logging expert in 30 minutes', - 'description': 'md5:9665350d466c67fb5b1598de379021f7', - 'upload_date': '20130320', - 'uploader': 'Next Day Video', - 'uploader_id': 'NextDayVideo', - }, - 'add_ie': ['Youtube'], + _TESTS = [{ + 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', + 'info_dict': { + 'id': 'become-a-logging-expert-in-30-minutes', }, - { - 'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v', - 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', - 'info_dict': { - 'id': '2542', - 'ext': 'm4v', - 'title': 'Gloriajw-SpotifyWithErikBernhardsson182', - }, + 'playlist_count': 2, + }, { + 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', + 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', + 'info_dict': { + 'id': '2542', + 'ext': 'm4v', + 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v', }, - ] + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + category = mobj.group('category') video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) + entries = [] - m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage) - if m_youtube is not None: - return self.url_result(m_youtube.group(1), 'Youtube') + data = self._download_json( + 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json' + % (category, video_id), video_id, fatal=False) - title = self._html_search_regex( - r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>', - webpage, 'title', flags=re.DOTALL) - video_url = self._search_regex( - [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'], - webpage, 'video url', flags=re.DOTALL) + if data: + print(data) + for video in data['videos']: + video_url = video.get('url') + if video_url: + if video.get('type') == 'youtube': + entries.append(self.url_result(video_url, 'Youtube')) + else: + entries.append({ + 'id': compat_str(data.get('id') or video_id), + 'url': video_url, + 'title': data['title'], + 'description': data.get('description') or data.get('summary'), + 'thumbnail': data.get('thumbnail_url'), + 'duration': int_or_none(data.get('duration')), + }) + else: + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + media_urls = self._search_regex( + r'(?s)Media URL:(.+?)</li>', webpage, 'media urls') + for m in re.finditer( + r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls): + media_url = m.group('url') + if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url): + entries.append(self.url_result(media_url, 'Youtube')) + else: + entries.append({ + 'id': video_id, + 'url': media_url, + 'title': title, + }) - return { - 'id': video_id, - 'title': os.path.splitext(title)[0], - 'url': video_url, - } + return self.playlist_result(entries, video_id) From 64fc49aba018ebd51627ddcc92f8fa88f2c499cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 00:29:49 +0700 Subject: [PATCH 1522/3599] [bandcamp:album] Fix title extraction (Closes #10455) --- youtube_dl/extractor/bandcamp.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 991ab0676..249c3d956 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -162,6 +162,15 @@ class BandcampAlbumIE(InfoExtractor): 'uploader_id': 'dotscale', }, 'playlist_mincount': 7, + }, { + # with escaped quote in title + 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep', + 'info_dict': { + 'title': '"Entropy" EP', + 'uploader_id': 'jstrecords', + 'id': 'entropy-ep', + }, + 'playlist_mincount': 3, }] def _real_extract(self, url): @@ -176,8 +185,11 @@ class BandcampAlbumIE(InfoExtractor): entries = [ self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) for t_path in tracks_paths] - title = self._search_regex( - r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False) + title = self._html_search_regex( + r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"', + webpage, 'title', fatal=False) + if title: + title = title.replace(r'\"', '"') return { '_type': 'playlist', 'uploader_id': uploader_id, From f7043ef39cb73f8501d18d2e1f93997357397ba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 01:56:15 +0700 Subject: [PATCH 1523/3599] [soundcloud] Fix _VALID_URL clashes with sets (Closes #10505) --- youtube_dl/extractor/soundcloud.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index aeae931a2..9635c2b49 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -32,7 +32,7 @@ class SoundcloudIE(InfoExtractor): _VALID_URL = r'''(?x)^(?:https?://)? (?:(?:(?:www\.|m\.)?soundcloud\.com/ (?P<uploader>[\w\d-]+)/ - (?!(?:tracks|sets(?:/[^/?#]+)?|reposts|likes|spotlight)/?(?:$|[?#])) + (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) (?P<title>[\w\d-]+)/? (?P<token>[^?]+?)?(?:[?].*)?$) |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) @@ -265,6 +265,9 @@ class SoundcloudSetIE(SoundcloudIE): 'title': 'The Royal Concept EP', }, 'playlist_mincount': 6, + }, { + 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token', + 'only_matching': True, }] def _real_extract(self, url): From a249ab83cb1d7765d787a7b1d050449736aaa789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 01:56:58 +0700 Subject: [PATCH 1524/3599] [pyvideo] Remove debugging code --- youtube_dl/extractor/pyvideo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py index 08ec09183..b8ac93a62 100644 --- a/youtube_dl/extractor/pyvideo.py +++ b/youtube_dl/extractor/pyvideo.py @@ -38,7 +38,6 @@ class PyvideoIE(InfoExtractor): % (category, video_id), video_id, fatal=False) if data: - print(data) for video in data['videos']: video_url = video.get('url') if video_url: From 263fef43dea463ab4b897c8374dbb11c705f061c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 02:37:40 +0700 Subject: [PATCH 1525/3599] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index e055976c5..7e24b8c6b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,21 @@ version <unreleased> Extractors +* [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505) +* [bandcamp:album] Fix title extraction (#10455) +* [pyvideo] Fix extraction (#10468) ++ [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016) +* [9c9media] Extract more metadata +* [9c9media] Fix multiple stacks extraction (#10016) +* [adultswim] Improve video info extraction (#10492) +* [vodplatform] Improve embed regular expression +- [played] Remove extractor (#10470) ++ [tbs] Add extractor for tbs.com and tntdrama.com (#10222) ++ [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110) +* [adultswim] Rework in terms of turner extractor +* [cnn] Rework in terms of turner extractor +* [nba] Rework in terms of turner extractor ++ [turner] Add base extractor for Turner Broadcasting System based sites * [bilibili] Fix extraction (#10375) * [openload] Fix extraction (#10408) From 4fd350611c71571733950ad2473d4148f7bb6a63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 02:39:39 +0700 Subject: [PATCH 1526/3599] release 2016.08.31 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 8 +++++--- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a2fe59f80..2caca5115 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.28*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.28** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.31** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.28 +[debug] youtube-dl version 2016.08.31 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 7e24b8c6b..0f8076d96 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.08.31 Extractors * [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index bf08697be..42bf291e2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -13,6 +13,8 @@ - **5min** - **8tracks** - **91porn** + - **9c9media** + - **9c9media:stack** - **9gag** - **9now.com.au** - **abc.net.au** @@ -89,7 +91,7 @@ - **Bet** - **Bigflix** - **Bild**: Bild.de - - **BiliBili** (Currently broken) + - **BiliBili** - **BioBioChileTV** - **BIQLE** - **BleacherReport** @@ -115,6 +117,7 @@ - **Canvas** - **CarambaTV** - **CarambaTVPage** + - **CartoonNetwork** - **cbc.ca** - **cbc.ca:player** - **cbc.ca:watch** @@ -459,7 +462,6 @@ - **nick.de** - **niconico**: ニコニコ動画 - **NiconicoPlaylist** - - **NineCNineMedia** - **Nintendo** - **njoy**: N-JOY - **njoy:embed** @@ -517,7 +519,6 @@ - **Pinkbike** - **Pladform** - **play.fm** - - **played.to** - **PlaysTV** - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - **Playvid** @@ -675,6 +676,7 @@ - **Tagesschau** - **tagesschau:player** - **Tass** + - **TBS** - **TDSLifeway** - **teachertube**: teachertube.com videos - **teachertube:user:collection**: teachertube.com user and collection videos diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ee30ca2ad..fe442dd88 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.28' +__version__ = '2016.08.31' From 165620e320ecb9213ee9928466a9209e7608f83c Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 30 Aug 2016 21:48:59 +0100 Subject: [PATCH 1527/3599] [yahoo] extract more and better formats --- youtube_dl/extractor/yahoo.py | 81 +++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b0679dfb7..d7a81ab8c 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -8,7 +8,6 @@ import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( compat_urllib_parse, - compat_urllib_parse_urlencode, compat_urlparse, ) from ..utils import ( @@ -17,6 +16,7 @@ from ..utils import ( ExtractorError, int_or_none, mimetype2ext, + determine_ext, ) from .brightcove import BrightcoveNewIE @@ -39,7 +39,7 @@ class YahooIE(InfoExtractor): }, { 'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', - 'md5': 'c3466d2b6d5dd6b9f41ba9ed04c24b23', + 'md5': '251af144a19ebc4a033e8ba91ac726bb', 'info_dict': { 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9', 'ext': 'mp4', @@ -50,7 +50,7 @@ class YahooIE(InfoExtractor): }, { 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', - 'md5': '75ffabdb87c16d4ffe8c036dc4d1c136', + 'md5': '7993e572fac98e044588d0b5260f4352', 'info_dict': { 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb', 'ext': 'mp4', @@ -61,7 +61,7 @@ class YahooIE(InfoExtractor): }, { 'url': 'https://tw.news.yahoo.com/%E6%95%A2%E5%95%8F%E5%B8%82%E9%95%B7%20%E9%BB%83%E7%A7%80%E9%9C%9C%E6%89%B9%E8%B3%B4%E6%B8%85%E5%BE%B7%20%E9%9D%9E%E5%B8%B8%E9%AB%98%E5%82%B2-034024051.html', - 'md5': '9035d38f88b1782682a3e89f985be5bb', + 'md5': '45c024bad51e63e9b6f6fad7a43a8c23', 'info_dict': { 'id': 'cac903b3-fcf4-3c14-b632-643ab541712f', 'ext': 'mp4', @@ -72,10 +72,10 @@ class YahooIE(InfoExtractor): }, { 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html', - 'md5': '0b51660361f0e27c9789e7037ef76f4b', + 'md5': '71298482f7c64cbb7fa064e4553ff1c1', 'info_dict': { 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58', - 'ext': 'mp4', + 'ext': 'webm', 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder', 'description': 'md5:f66c890e1490f4910a9953c941dee944', 'duration': 97, @@ -98,7 +98,7 @@ class YahooIE(InfoExtractor): 'id': '154609075', }, 'playlist': [{ - 'md5': 'f8e336c6b66f503282e5f719641d6565', + 'md5': '000887d0dc609bc3a47c974151a40fb8', 'info_dict': { 'id': 'e624c4bc-3389-34de-9dfc-025f74943409', 'ext': 'mp4', @@ -107,7 +107,7 @@ class YahooIE(InfoExtractor): 'duration': 30, }, }, { - 'md5': '958bcb90b4d6df71c56312137ee1cd5a', + 'md5': '81bc74faf10750fe36e4542f9a184c66', 'info_dict': { 'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9', 'ext': 'mp4', @@ -139,7 +139,7 @@ class YahooIE(InfoExtractor): 'skip': 'Domain name in.lifestyle.yahoo.com gone', }, { 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html', - 'md5': 'b17ac378b1134fa44370fb27db09a744', + 'md5': '2a9752f74cb898af5d1083ea9f661b58', 'info_dict': { 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1', 'ext': 'mp4', @@ -168,7 +168,7 @@ class YahooIE(InfoExtractor): }, { # Query result is embedded in webpage, but explicit request to video API fails with geo restriction 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html', - 'md5': '1ddbf7c850777548438e5c4f147c7b8c', + 'md5': '4fbafb9c9b6f07aa8f870629f6671b35', 'info_dict': { 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504', 'ext': 'mp4', @@ -196,6 +196,7 @@ class YahooIE(InfoExtractor): 'description': 'Galactic', 'title': 'Dolla Diva (feat. Maggie Koerner)', }, + 'skip': 'redirect to https://www.yahoo.com/music', }, ] @@ -213,15 +214,7 @@ class YahooIE(InfoExtractor): entries = [] iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage) for idx, iframe_url in enumerate(iframe_urls): - iframepage = self._download_webpage( - host + iframe_url, display_id, - note='Downloading iframe webpage for video #%d' % idx) - items_json = self._search_regex( - r'mediaItems: (\[.+?\])$', iframepage, 'items', flags=re.MULTILINE, default=None) - if items_json: - items = json.loads(items_json) - video_id = items[0]['id'] - entries.append(self._get_info(video_id, display_id, webpage)) + entries.append(self.url_result(host + iframe_url, 'Yahoo')) if entries: return self.playlist_result(entries, page_id) @@ -246,7 +239,9 @@ class YahooIE(InfoExtractor): if config: sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi') if sapi and 'query' in sapi: - return self._extract_info(display_id, sapi, webpage) + info = self._extract_info(display_id, sapi, webpage) + self._sort_formats(info['formats']) + return info items_json = self._search_regex( r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, @@ -292,15 +287,17 @@ class YahooIE(InfoExtractor): formats = [] for s in info['streams']: + tbr = int_or_none(s.get('bitrate')) format_info = { 'width': int_or_none(s.get('width')), 'height': int_or_none(s.get('height')), - 'tbr': int_or_none(s.get('bitrate')), + 'tbr': tbr, } host = s['host'] path = s['path'] if host.startswith('rtmp'): + fmt = 'rtmp' format_info.update({ 'url': host, 'play_path': path, @@ -308,14 +305,18 @@ class YahooIE(InfoExtractor): }) else: if s.get('format') == 'm3u8_playlist': - format_info['protocol'] = 'm3u8_native' - format_info['ext'] = 'mp4' + fmt = 'hls' + format_info.update({ + 'protocol': 'm3u8_native', + 'ext': 'mp4', + }) + else: + fmt = format_info['ext'] = determine_ext(path) format_url = compat_urlparse.urljoin(host, path) format_info['url'] = format_url + format_info['format_id'] = fmt + ('-%d' % tbr if tbr else '') formats.append(format_info) - self._sort_formats(formats) - closed_captions = self._html_search_regex( r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions', default='[]') @@ -346,17 +347,25 @@ class YahooIE(InfoExtractor): def _get_info(self, video_id, display_id, webpage): region = self._search_regex( r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', - webpage, 'region', fatal=False, default='US') - data = compat_urllib_parse_urlencode({ - 'protocol': 'http', - 'region': region.upper(), - }) - query_url = ( - 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' - '{id}?{data}'.format(id=video_id, data=data)) - query_result = self._download_json( - query_url, display_id, 'Downloading video info') - return self._extract_info(display_id, query_result, webpage) + webpage, 'region', fatal=False, default='US').upper() + formats = [] + info = {} + for fmt in ('webm', 'mp4'): + query_result = self._download_json( + 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + video_id, + display_id, 'Downloading %s video info' % fmt, query={ + 'protocol': 'http', + 'region': region, + 'format': fmt, + }) + info = self._extract_info(display_id, query_result, webpage) + formats.extend(info['formats']) + formats.extend(self._extract_m3u8_formats( + 'http://video.media.yql.yahoo.com/v1/hls/%s?region=%s' % (video_id, region), + video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + self._sort_formats(formats) + info['formats'] = formats + return info class YahooSearchIE(SearchInfoExtractor): From 196c6ba06792ec38238631d9173fc146822baa7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 22:12:37 +0700 Subject: [PATCH 1528/3599] [facebook] Extract timestamp (Closes #10508) --- youtube_dl/extractor/facebook.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 0fb781a73..228b0b6d7 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -15,6 +15,7 @@ from ..compat import ( from ..utils import ( error_to_compat_str, ExtractorError, + int_or_none, limit_length, sanitized_Request, urlencode_postdata, @@ -62,6 +63,8 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam', 'uploader': 'Tennis on Facebook', + 'upload_date': '20140908', + 'timestamp': 1410199200, } }, { 'note': 'Video without discernible title', @@ -71,6 +74,8 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 'Facebook video #274175099429670', 'uploader': 'Asif Nawab Butt', + 'upload_date': '20140506', + 'timestamp': 1399398998, }, 'expected_warnings': [ 'title' @@ -78,12 +83,14 @@ class FacebookIE(InfoExtractor): }, { 'note': 'Video with DASH manifest', 'url': 'https://www.facebook.com/video.php?v=957955867617029', - 'md5': '54706e4db4f5ad58fbad82dde1f1213f', + 'md5': 'b2c28d528273b323abe5c6ab59f0f030', 'info_dict': { 'id': '957955867617029', 'ext': 'mp4', 'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...', 'uploader': 'Demy de Zeeuw', + 'upload_date': '20160110', + 'timestamp': 1452431627, }, }, { 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570', @@ -306,12 +313,16 @@ class FacebookIE(InfoExtractor): if not video_title: video_title = 'Facebook video #%s' % video_id uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage)) + timestamp = int_or_none(self._search_regex( + r'<abbr[^>]+data-utime=["\'](\d+)', webpage, + 'timestamp', default=None)) info_dict = { 'id': video_id, 'title': video_title, 'formats': formats, 'uploader': uploader, + 'timestamp': timestamp, } return webpage, info_dict From 7a3e849f6eaf51b1d86b843a63664012ced2258c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 31 Aug 2016 22:23:55 +0700 Subject: [PATCH 1529/3599] [porncom] Extract categories and tags (Closes #10510) --- youtube_dl/extractor/porncom.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py index 4baf79688..d85e0294d 100644 --- a/youtube_dl/extractor/porncom.py +++ b/youtube_dl/extractor/porncom.py @@ -26,6 +26,8 @@ class PornComIE(InfoExtractor): 'duration': 551, 'view_count': int, 'age_limit': 18, + 'categories': list, + 'tags': list, }, }, { 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067', @@ -75,7 +77,14 @@ class PornComIE(InfoExtractor): self._sort_formats(formats) view_count = str_to_int(self._search_regex( - r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, 'view count')) + r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, + 'view count', fatal=False)) + + def extract_list(kind): + s = self._search_regex( + r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(), + webpage, kind, fatal=False) + return re.findall(r'<a[^>]+>([^<]+)</a>', s or '') return { 'id': video_id, @@ -86,4 +95,6 @@ class PornComIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': 18, + 'categories': extract_list('categories'), + 'tags': extract_list('tags'), } From f8fd510eb4b2733a5c083d767d45baa88b289298 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 31 Aug 2016 18:31:49 +0100 Subject: [PATCH 1530/3599] [limelight] skip ism manifests and reduce requests --- youtube_dl/extractor/limelight.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index a425bafe3..6752ffee2 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -34,11 +34,12 @@ class LimelightBaseIE(InfoExtractor): def _extract_info(self, streams, mobile_urls, properties): video_id = properties['media_id'] formats = [] - + urls = [] for stream in streams: stream_url = stream.get('url') - if not stream_url or stream.get('drmProtected'): + if not stream_url or stream.get('drmProtected') or stream_url in urls: continue + urls.append(stream_url) ext = determine_ext(stream_url) if ext == 'f4m': formats.extend(self._extract_f4m_formats( @@ -58,9 +59,11 @@ class LimelightBaseIE(InfoExtractor): format_id = 'rtmp' if stream.get('videoBitRate'): format_id += '-%d' % int_or_none(stream['videoBitRate']) + http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]) + urls.append(http_url) http_fmt = fmt.copy() http_fmt.update({ - 'url': 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]), + 'url': http_url, 'format_id': format_id.replace('rtmp', 'http'), }) formats.append(http_fmt) @@ -76,8 +79,9 @@ class LimelightBaseIE(InfoExtractor): for mobile_url in mobile_urls: media_url = mobile_url.get('mobileUrl') format_id = mobile_url.get('targetMediaPlatform') - if not media_url or format_id == 'Widevine': + if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls: continue + urls.append(media_url) ext = determine_ext(media_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( From 2896dd73bc2c9844175258086c0300395722e5c9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 1 Sep 2016 08:00:13 +0100 Subject: [PATCH 1531/3599] [cbs] extract once formats(closes #10515) --- youtube_dl/extractor/cbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index c72ed2dbb..3f4dea40c 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -51,7 +51,7 @@ class CBSIE(CBSBaseIE): path = 'dJ5BDC/media/guid/2198311517/' + guid smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid) - for r in ('HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): + for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): try: tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0]) formats.extend(tp_formats) From 165c54e97d10705614934d5b1d86d90c06951b7c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 1 Sep 2016 16:28:03 +0800 Subject: [PATCH 1532/3599] =?UTF-8?q?[southpark.cc.com:espa=C3=B1ol]=20Ski?= =?UTF-8?q?p=20geo-restricted=20=5FTESTS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Breaks https://travis-ci.org/rg3/youtube-dl/jobs/156728175 --- youtube_dl/extractor/southpark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index a147f7db1..e2a9e45ac 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -35,6 +35,7 @@ class SouthParkEsIE(SouthParkIE): 'description': 'Cartman Consigue Una Sonda Anal', }, 'playlist_count': 4, + 'skip': 'Geo-restricted', }] From 746a695b362cb602625ed7357294bb18de133883 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 1 Sep 2016 16:42:35 +0800 Subject: [PATCH 1533/3599] [myvidster] Update _TESTS (closes #10473) --- youtube_dl/extractor/myvidster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/myvidster.py b/youtube_dl/extractor/myvidster.py index 731c24542..2117d302d 100644 --- a/youtube_dl/extractor/myvidster.py +++ b/youtube_dl/extractor/myvidster.py @@ -13,7 +13,7 @@ class MyVidsterIE(InfoExtractor): 'id': '3685814', 'title': 'md5:7d8427d6d02c4fbcef50fe269980c749', 'upload_date': '20141027', - 'uploader_id': 'utkualp', + 'uploader': 'utkualp', 'ext': 'mp4', 'age_limit': 18, }, From 05d4612947d6dbfaedb8f2a00daa5f29d85f73df Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 1 Sep 2016 16:58:16 +0800 Subject: [PATCH 1534/3599] [movingimage] Adapt to the new domain name and fix extraction Closes #10466 --- ChangeLog | 6 +++++ youtube_dl/extractor/extractors.py | 2 +- .../extractor/{ssa.py => movingimage.py} | 26 +++++++------------ 3 files changed, 17 insertions(+), 17 deletions(-) rename youtube_dl/extractor/{ssa.py => movingimage.py} (65%) diff --git a/ChangeLog b/ChangeLog index 0f8076d96..877e8112e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [movingimage] Fix for the new site name (#10466) + + version 2016.08.31 Extractors diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 21efa96b2..8d0688f53 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -486,6 +486,7 @@ from .motherless import MotherlessIE from .motorsport import MotorsportIE from .movieclips import MovieClipsIE from .moviezine import MoviezineIE +from .movingimage import MovingImageIE from .msn import MSNIE from .mtv import ( MTVIE, @@ -806,7 +807,6 @@ from .srgssr import ( SRGSSRPlayIE, ) from .srmediathek import SRMediathekIE -from .ssa import SSAIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamable import StreamableIE diff --git a/youtube_dl/extractor/ssa.py b/youtube_dl/extractor/movingimage.py similarity index 65% rename from youtube_dl/extractor/ssa.py rename to youtube_dl/extractor/movingimage.py index 54d1843f2..bb789c32e 100644 --- a/youtube_dl/extractor/ssa.py +++ b/youtube_dl/extractor/movingimage.py @@ -7,22 +7,19 @@ from ..utils import ( ) -class SSAIE(InfoExtractor): - _VALID_URL = r'https?://ssa\.nls\.uk/film/(?P<id>\d+)' +class MovingImageIE(InfoExtractor): + _VALID_URL = r'https?://movingimage\.nls\.uk/film/(?P<id>\d+)' _TEST = { - 'url': 'http://ssa.nls.uk/film/3561', + 'url': 'http://movingimage.nls.uk/film/3561', + 'md5': '4caa05c2b38453e6f862197571a7be2f', 'info_dict': { 'id': '3561', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'SHETLAND WOOL', 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04', 'duration': 900, 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - # rtmp download - 'skip_download': True, - }, } def _real_extract(self, url): @@ -30,10 +27,9 @@ class SSAIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - streamer = self._search_regex( - r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer') - play_path = self._search_regex( - r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0] + formats = self._extract_m3u8_formats( + self._html_search_regex(r'file\s*:\s*"([^"]+)"', webpage, 'm3u8 manifest URL'), + video_id, ext='mp4', entry_protocol='m3u8_native') def search_field(field_name, fatal=False): return self._search_regex( @@ -44,13 +40,11 @@ class SSAIE(InfoExtractor): description = unescapeHTML(search_field('Description')) duration = parse_duration(search_field('Running time')) thumbnail = self._search_regex( - r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False) + r"image\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) return { 'id': video_id, - 'url': streamer, - 'play_path': play_path, - 'ext': 'flv', + 'formats': formats, 'title': title, 'description': description, 'duration': duration, From 4c8ab6fd715249290feab89bbc86eb803b459993 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 1 Sep 2016 17:04:41 +0800 Subject: [PATCH 1535/3599] [thvideo] Remove extractor. Website down. Closes #10464 According to a screenshot in http://tieba.baidu.com/p/4691302183, thvideo.tv is shut down "temporarily". I see no clues that it will be up again, so I remove it here. --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 4 -- youtube_dl/extractor/thvideo.py | 84 ------------------------------ 3 files changed, 1 insertion(+), 88 deletions(-) delete mode 100644 youtube_dl/extractor/thvideo.py diff --git a/ChangeLog b/ChangeLog index 877e8112e..2e75c003d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +- [thvideo] Remove extractor (#10464) * [movingimage] Fix for the new site name (#10466) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8d0688f53..459d776b3 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -869,10 +869,6 @@ from .tnaflix import ( MovieFapIE, ) from .toggle import ToggleIE -from .thvideo import ( - THVideoIE, - THVideoPlaylistIE -) from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE diff --git a/youtube_dl/extractor/thvideo.py b/youtube_dl/extractor/thvideo.py deleted file mode 100644 index 406f4a826..000000000 --- a/youtube_dl/extractor/thvideo.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - unified_strdate -) - - -class THVideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?thvideo\.tv/(?:v/th|mobile\.php\?cid=)(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://thvideo.tv/v/th1987/', - 'md5': 'fa107b1f73817e325e9433505a70db50', - 'info_dict': { - 'id': '1987', - 'ext': 'mp4', - 'title': '【动画】秘封活动记录 ~ The Sealed Esoteric History.分镜稿预览', - 'display_id': 'th1987', - 'thumbnail': 'http://thvideo.tv/uploadfile/2014/0722/20140722013459856.jpg', - 'description': '社团京都幻想剧团的第一个东方二次同人动画作品「秘封活动记录 ~ The Sealed Esoteric History.」 本视频是该动画第一期的分镜草稿...', - 'upload_date': '20140722' - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - # extract download link from mobile player page - webpage_player = self._download_webpage( - 'http://thvideo.tv/mobile.php?cid=%s-0' % (video_id), - video_id, note='Downloading video source page') - video_url = self._html_search_regex( - r'<source src="(.*?)" type', webpage_player, 'video url') - - # extract video info from main page - webpage = self._download_webpage( - 'http://thvideo.tv/v/th%s' % (video_id), video_id) - title = self._og_search_title(webpage) - display_id = 'th%s' % video_id - thumbnail = self._og_search_thumbnail(webpage) - description = self._og_search_description(webpage) - upload_date = unified_strdate(self._html_search_regex( - r'span itemprop="datePublished" content="(.*?)">', webpage, - 'upload date', fatal=False)) - - return { - 'id': video_id, - 'ext': 'mp4', - 'url': video_url, - 'title': title, - 'display_id': display_id, - 'thumbnail': thumbnail, - 'description': description, - 'upload_date': upload_date - } - - -class THVideoPlaylistIE(InfoExtractor): - _VALID_URL = r'http?://(?:www\.)?thvideo\.tv/mylist(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://thvideo.tv/mylist2', - 'info_dict': { - 'id': '2', - 'title': '幻想万華鏡', - }, - 'playlist_mincount': 23, - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - list_title = self._html_search_regex( - r'<h1 class="show_title">(.*?)<b id', webpage, 'playlist title', - fatal=False) - - entries = [ - self.url_result('http://thvideo.tv/v/th' + id, 'THVideo') - for id in re.findall(r'<dd><a href="http://thvideo.tv/v/th(\d+)/" target=', webpage)] - - return self.playlist_result(entries, playlist_id, list_title) From f096ec262544babf6ea23347160c1c550e4e157e Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 1 Sep 2016 13:34:12 +0100 Subject: [PATCH 1536/3599] [curiositystream] Add new extractor --- youtube_dl/extractor/curiositystream.py | 128 ++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 4 + 2 files changed, 132 insertions(+) create mode 100644 youtube_dl/extractor/curiositystream.py diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py new file mode 100644 index 000000000..7105e3c4c --- /dev/null +++ b/youtube_dl/extractor/curiositystream.py @@ -0,0 +1,128 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + urlencode_postdata, + compat_str, + ExtractorError, +) + + +class CuriosityStreamBaseIE(InfoExtractor): + _NETRC_MACHINE = 'curiositystream' + _auth_token = None + _API_BASE_URL = 'https://api.curiositystream.com/v1/' + + def _handle_errors(self, result): + error = result.get('error', {}).get('message') + if error: + if isinstance(error, dict): + error = ', '.join(error.values()) + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error), expected=True) + + def _call_api(self, path, video_id): + headers = {} + if self._auth_token: + headers['X-Auth-Token'] = self._auth_token + result = self._download_json( + self._API_BASE_URL + path, video_id, headers=headers) + self._handle_errors(result) + return result['data'] + + def _real_initialize(self): + if not self._auth_token: + user = self._downloader.cache.load('curiositystream', 'user') or {} + self._auth_token = user.get('auth_token') + if not self._auth_token: + (email, password) = self._get_login_info() + if email is None: + return + result = self._download_json( + self._API_BASE_URL + 'login', None, data=urlencode_postdata({ + 'email': email, + 'password': password, + })) + self._handle_errors(result) + self._auth_token = result['message']['auth_token'] + self._downloader.cache.store( + 'curiositystream', 'user', { + 'auth_token': self._auth_token, + }) + + def _extract_media_info(self, media): + video_id = compat_str(media['id']) + limelight_media_id = media['limelight_media_id'] + title = media['title'] + + subtitles = {} + for closed_caption in media.get('closed_captions', []): + sub_url = closed_caption.get('file') + if not sub_url: + continue + lang = closed_caption.get('code') or closed_caption.get('language') or 'en' + subtitles.setdefault(lang, []).append({ + 'url': sub_url, + }) + + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'limelight:media:' + limelight_media_id, + 'title': title, + 'description': media.get('description'), + 'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'), + 'duration': int_or_none(media.get('duration')), + 'tags': media.get('tags'), + 'subtitles': subtitles, + 'ie_key': 'LimelightMedia', + } + + +class CuriosityStreamIE(CuriosityStreamBaseIE): + IE_NAME = 'curiositystream' + _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)' + _TEST = { + 'url': 'https://app.curiositystream.com/video/2', + 'md5': 'a0074c190e6cddaf86900b28d3e9ee7a', + 'info_dict': { + 'id': '2', + 'ext': 'mp4', + 'title': 'How Did You Develop The Internet?', + 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', + 'timestamp': 1448388615, + 'upload_date': '20151124', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + media = self._call_api('media/' + video_id, video_id) + return self._extract_media_info(media) + + +class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): + IE_NAME = 'curiositystream:collection' + _VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)' + _TEST = { + 'url': 'https://app.curiositystream.com/collection/2', + 'info_dict': { + 'id': '2', + 'title': 'Curious Minds: The Internet', + 'description': 'How is the internet shaping our lives in the 21st Century?', + }, + 'playlist_mincount': 17, + } + + def _real_extract(self, url): + collection_id = self._match_id(url) + collection = self._call_api( + 'collections/' + collection_id, collection_id) + entries = [] + for media in collection.get('media', []): + entries.append(self._extract_media_info(media)) + return self.playlist_result( + entries, collection_id, + collection.get('title'), collection.get('description')) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 459d776b3..0c2436b67 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -194,6 +194,10 @@ from .ctsnews import CtsNewsIE from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE +from .curiositystream import ( + CuriosityStreamIE, + CuriosityStreamCollectionIE, +) from .cwtv import CWTVIE from .dailymail import DailyMailIE from .dailymotion import ( From 9250181f37cf0289c02d18ab91203c6181f9cc71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Sep 2016 21:37:25 +0700 Subject: [PATCH 1537/3599] [extractor/common] Restore NAME usage from EXT-X-MEDIA tag for formats codes in _extract_m3u8_formats (Closes #10522) --- youtube_dl/extractor/common.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da0af29ec..36d43fd50 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1202,6 +1202,7 @@ class InfoExtractor(object): 'preference': preference, }] last_info = None + last_media = None for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): last_info = parse_m3u8_attributes(line) @@ -1224,6 +1225,10 @@ class InfoExtractor(object): 'protocol': entry_protocol, 'preference': preference, }) + else: + # When there is no URI in EXT-X-MEDIA let this tag's + # data be used by regular URI lines below + last_media = media elif line.startswith('#') or not line.strip(): continue else: @@ -1234,13 +1239,14 @@ class InfoExtractor(object): format_id = [] if m3u8_id: format_id.append(m3u8_id) + last_media_name = last_media.get('NAME') if last_media else None + # Despite specification does not mention NAME attribute for + # EXT-X-STREAM-INF it still sometimes may be present + stream_name = last_info.get('NAME') or last_media_name # Bandwidth of live streams may differ over time thus making # format_id unpredictable. So it's better to keep provided # format_id intact. if not live: - # Despite specification does not mention NAME attribute for - # EXT-X-STREAM-INF it still sometimes may be present - stream_name = last_info.get('NAME') format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats))) f = { 'format_id': '-'.join(format_id), From e816c9d158629ef054c1cc77eecf83043d06fe8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Sep 2016 22:18:16 +0700 Subject: [PATCH 1538/3599] [extractor/common] Simplify _extract_m3u8_formats --- youtube_dl/extractor/common.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 36d43fd50..a9c7a8d16 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1201,8 +1201,8 @@ class InfoExtractor(object): 'protocol': entry_protocol, 'preference': preference, }] - last_info = None - last_media = None + last_info = {} + last_media = {} for line in m3u8_doc.splitlines(): if line.startswith('#EXT-X-STREAM-INF:'): last_info = parse_m3u8_attributes(line) @@ -1232,17 +1232,13 @@ class InfoExtractor(object): elif line.startswith('#') or not line.strip(): continue else: - if last_info is None: - formats.append({'url': format_url(line)}) - continue tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000) format_id = [] if m3u8_id: format_id.append(m3u8_id) - last_media_name = last_media.get('NAME') if last_media else None # Despite specification does not mention NAME attribute for # EXT-X-STREAM-INF it still sometimes may be present - stream_name = last_info.get('NAME') or last_media_name + stream_name = last_info.get('NAME') or last_media.get('NAME') # Bandwidth of live streams may differ over time thus making # format_id unpredictable. So it's better to keep provided # format_id intact. @@ -1275,6 +1271,7 @@ class InfoExtractor(object): f.update(parse_codecs(last_info.get('CODECS'))) formats.append(f) last_info = {} + last_media = {} return formats @staticmethod From f6af0f888b03e8c072b86c04492cc84c966c9f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Sep 2016 23:15:01 +0700 Subject: [PATCH 1539/3599] [youporn] Fix categories and tags extraction (Closes #10521) --- youtube_dl/extractor/youporn.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 0df2d76ee..0265a64a7 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -35,7 +35,7 @@ class YouPornIE(InfoExtractor): 'age_limit': 18, }, }, { - # Anonymous User uploader + # Unknown uploader 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4', 'info_dict': { 'id': '561726', @@ -44,7 +44,7 @@ class YouPornIE(InfoExtractor): 'title': 'Big Tits Awesome Brunette On amazing webcam show', 'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4', 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'Anonymous User', + 'uploader': 'Unknown', 'upload_date': '20111125', 'average_rating': int, 'view_count': int, @@ -140,17 +140,17 @@ class YouPornIE(InfoExtractor): r'>All [Cc]omments? \(([\d,.]+)\)', webpage, 'comment count', fatal=False)) - def extract_tag_box(title): - tag_box = self._search_regex( - (r'<div[^>]+class=["\']tagBoxTitle["\'][^>]*>\s*%s\b.*?</div>\s*' - '<div[^>]+class=["\']tagBoxContent["\']>(.+?)</div>') % re.escape(title), - webpage, '%s tag box' % title, default=None) + def extract_tag_box(regex, title): + tag_box = self._search_regex(regex, webpage, title, default=None) if not tag_box: return [] return re.findall(r'<a[^>]+href=[^>]+>([^<]+)', tag_box) - categories = extract_tag_box('Category') - tags = extract_tag_box('Tags') + categories = extract_tag_box( + r'(?s)Categories:.*?</[^>]+>(.+?)</div>', 'categories') + tags = extract_tag_box( + r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>', + 'tags') return { 'id': video_id, From 8fb6af6bba201c9f750aadb7b092704195c7f8e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Sep 2016 23:32:28 +0700 Subject: [PATCH 1540/3599] [exfm] Remove extractor (Closes #10482) --- youtube_dl/extractor/exfm.py | 58 ------------------------------ youtube_dl/extractor/extractors.py | 1 - 2 files changed, 59 deletions(-) delete mode 100644 youtube_dl/extractor/exfm.py diff --git a/youtube_dl/extractor/exfm.py b/youtube_dl/extractor/exfm.py deleted file mode 100644 index 09ed4f2b5..000000000 --- a/youtube_dl/extractor/exfm.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class ExfmIE(InfoExtractor): - IE_NAME = 'exfm' - IE_DESC = 'ex.fm' - _VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)' - _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream' - _TESTS = [ - { - 'url': 'http://ex.fm/song/eh359', - 'md5': 'e45513df5631e6d760970b14cc0c11e7', - 'info_dict': { - 'id': '44216187', - 'ext': 'mp3', - 'title': 'Test House "Love Is Not Enough" (Extended Mix) DeadJournalist Exclusive', - 'uploader': 'deadjournalist', - 'upload_date': '20120424', - 'description': 'Test House \"Love Is Not Enough\" (Extended Mix) DeadJournalist Exclusive', - }, - 'note': 'Soundcloud song', - 'skip': 'The site is down too often', - }, - { - 'url': 'http://ex.fm/song/wddt8', - 'md5': '966bd70741ac5b8570d8e45bfaed3643', - 'info_dict': { - 'id': 'wddt8', - 'ext': 'mp3', - 'title': 'Safe and Sound', - 'uploader': 'Capital Cities', - }, - 'skip': 'The site is down too often', - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - song_id = mobj.group('id') - info_url = 'http://ex.fm/api/v3/song/%s' % song_id - info = self._download_json(info_url, song_id)['song'] - song_url = info['url'] - if re.match(self._SOUNDCLOUD_URL, song_url) is not None: - self.to_screen('Soundcloud song detected') - return self.url_result(song_url.replace('/stream', ''), 'Soundcloud') - return { - 'id': song_id, - 'url': song_url, - 'ext': 'mp3', - 'title': info['title'], - 'thumbnail': info['image']['large'], - 'uploader': info['artist'], - 'view_count': info['loved_count'], - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0c2436b67..7b59d5db2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -261,7 +261,6 @@ from .espn import ESPNIE from .esri import EsriVideoIE from .europa import EuropaIE from .everyonesmixtape import EveryonesMixtapeIE -from .exfm import ExfmIE from .expotv import ExpoTVIE from .extremetube import ExtremeTubeIE from .eyedotv import EyedoTVIE From af95ee94b4554449db175ae44060a66c89bd96ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 1 Sep 2016 23:38:49 +0700 Subject: [PATCH 1541/3599] [glide] Fix extraction (Closes #10478) --- youtube_dl/extractor/glide.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py index 62ff84835..50f698803 100644 --- a/youtube_dl/extractor/glide.py +++ b/youtube_dl/extractor/glide.py @@ -14,10 +14,8 @@ class GlideIE(InfoExtractor): 'info_dict': { 'id': 'UZF8zlmuQbe4mr+7dCiQ0w==', 'ext': 'mp4', - 'title': 'Damon Timm\'s Glide message', + 'title': "Damon's Glide message", 'thumbnail': 're:^https?://.*?\.cloudfront\.net/.*\.jpg$', - 'uploader': 'Damon Timm', - 'upload_date': '20140919', } } @@ -27,7 +25,8 @@ class GlideIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex( - r'<title>(.+?)', webpage, 'title') + r'(.+?)', webpage, + 'title', default=None) or self._og_search_title(webpage) video_url = self._proto_relative_url(self._search_regex( r']+src=(["\'])(?P.+?)\1', webpage, 'video URL', default=None, @@ -36,18 +35,10 @@ class GlideIE(InfoExtractor): r']+id=["\']video-thumbnail["\'][^>]+src=(["\'])(?P.+?)\1', webpage, 'thumbnail url', default=None, group='url')) or self._og_search_thumbnail(webpage) - uploader = self._search_regex( - r']+class=["\']info-name["\'][^>]*>([^<]+)', - webpage, 'uploader', fatal=False) - upload_date = unified_strdate(self._search_regex( - r']+class="info-date"[^>]*>([^<]+)', - webpage, 'upload date', fatal=False)) return { 'id': video_id, 'title': title, 'url': video_url, 'thumbnail': thumbnail, - 'uploader': uploader, - 'upload_date': upload_date, } From 8276d3b87a54f43ca2f47b7709a6557ea979327c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 1 Sep 2016 23:46:15 +0700 Subject: [PATCH 1542/3599] [thestar] Fix extraction (Closes #10465) --- youtube_dl/extractor/thestar.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/thestar.py b/youtube_dl/extractor/thestar.py index ba1380abc..c3f118894 100644 --- a/youtube_dl/extractor/thestar.py +++ b/youtube_dl/extractor/thestar.py @@ -2,8 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .brightcove import BrightcoveLegacyIE -from ..compat import compat_parse_qs class TheStarIE(InfoExtractor): @@ -30,6 +28,9 @@ class TheStarIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) - brightcove_id = compat_parse_qs(brightcove_legacy_url)['@videoPlayer'][0] - return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) + brightcove_id = self._search_regex( + r'mainartBrightcoveVideoId["\']?\s*:\s*["\']?(\d+)', + webpage, 'brightcove id') + return self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + 'BrightcoveNew', brightcove_id) From f97ec8bcb95b45d9a657392cd24eabfadb4053e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 1 Sep 2016 23:46:58 +0700 Subject: [PATCH 1543/3599] [glide] Remove unused import --- youtube_dl/extractor/glide.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/glide.py b/youtube_dl/extractor/glide.py index 50f698803..f0d951396 100644 --- a/youtube_dl/extractor/glide.py +++ b/youtube_dl/extractor/glide.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import unified_strdate class GlideIE(InfoExtractor): From 4191779dcda8a80faf6e53579e011b63ee5c3878 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 1 Sep 2016 19:07:41 +0100 Subject: [PATCH 1544/3599] [nytimes] improve extraction --- youtube_dl/extractor/nytimes.py | 93 +++++++++++++++++++++++---------- 1 file changed, 64 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index 681683e86..142c34256 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -1,26 +1,37 @@ from __future__ import unicode_literals +import hmac +import hashlib +import base64 + from .common import InfoExtractor from ..utils import ( float_or_none, int_or_none, parse_iso8601, + mimetype2ext, + determine_ext, ) class NYTimesBaseIE(InfoExtractor): + _SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v' + def _extract_video_from_id(self, video_id): - video_data = self._download_json( - 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, - video_id, 'Downloading video JSON') + # Authorization generation algorithm is reverse engineered from `signer` in + # http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js + path = '/svc/video/api/v3/video/' + video_id + hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest() + video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={ + 'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(), + 'X-NYTV': 'vhs', + }, fatal=False) + if not video_data: + video_data = self._download_json( + 'http://www.nytimes.com/svc/video/api/v2/video/' + video_id, + video_id, 'Downloading video JSON') title = video_data['headline'] - description = video_data.get('summary') - duration = float_or_none(video_data.get('duration'), 1000) - - uploader = video_data.get('byline') - publication_date = video_data.get('publication_date') - timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None def get_file_size(file_size): if isinstance(file_size, int): @@ -28,35 +39,59 @@ class NYTimesBaseIE(InfoExtractor): elif isinstance(file_size, dict): return int(file_size.get('value', 0)) else: - return 0 + return None - formats = [ - { - 'url': video['url'], - 'format_id': video.get('type'), - 'vcodec': video.get('video_codec'), - 'width': int_or_none(video.get('width')), - 'height': int_or_none(video.get('height')), - 'filesize': get_file_size(video.get('fileSize')), - } for video in video_data['renditions'] if video.get('url') - ] + urls = [] + formats = [] + for video in video_data.get('renditions', []): + video_url = video.get('url') + format_id = video.get('type') + if not video_url or format_id == 'thumbs' or video_url in urls: + continue + urls.append(video_url) + ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=format_id or 'hls', fatal=False)) + elif ext == 'mpd': + continue + # formats.extend(self._extract_mpd_formats( + # video_url, video_id, format_id or 'dash', fatal=False)) + else: + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'vcodec': video.get('videoencoding') or video.get('video_codec'), + 'width': int_or_none(video.get('width')), + 'height': int_or_none(video.get('height')), + 'filesize': get_file_size(video.get('file_size') or video.get('fileSize')), + 'tbr': int_or_none(video.get('bitrate'), 1000), + 'ext': ext, + }) self._sort_formats(formats) - thumbnails = [ - { - 'url': 'http://www.nytimes.com/%s' % image['url'], + thumbnails = [] + for image in video_data.get('images', []): + image_url = image.get('url') + if not image_url: + continue + thumbnails.append({ + 'url': 'http://www.nytimes.com/' + image_url, 'width': int_or_none(image.get('width')), 'height': int_or_none(image.get('height')), - } for image in video_data.get('images', []) if image.get('url') - ] + }) + + publication_date = video_data.get('publication_date') + timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None return { 'id': video_id, 'title': title, - 'description': description, + 'description': video_data.get('summary'), 'timestamp': timestamp, - 'uploader': uploader, - 'duration': duration, + 'uploader': video_data.get('byline'), + 'duration': float_or_none(video_data.get('duration'), 1000), 'formats': formats, 'thumbnails': thumbnails, } @@ -67,7 +102,7 @@ class NYTimesIE(NYTimesBaseIE): _TESTS = [{ 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263', - 'md5': '18a525a510f942ada2720db5f31644c0', + 'md5': 'd665342765db043f7e225cff19df0f2d', 'info_dict': { 'id': '100000002847155', 'ext': 'mov', From b207d5ebd4eab80e07673aba9696d240d1009bcf Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 1 Sep 2016 19:46:58 +0100 Subject: [PATCH 1545/3599] [curiositystream] don't cache auth token --- youtube_dl/extractor/curiositystream.py | 28 +++++++++---------------- 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py index 7105e3c4c..e3c99468c 100644 --- a/youtube_dl/extractor/curiositystream.py +++ b/youtube_dl/extractor/curiositystream.py @@ -33,24 +33,16 @@ class CuriosityStreamBaseIE(InfoExtractor): return result['data'] def _real_initialize(self): - if not self._auth_token: - user = self._downloader.cache.load('curiositystream', 'user') or {} - self._auth_token = user.get('auth_token') - if not self._auth_token: - (email, password) = self._get_login_info() - if email is None: - return - result = self._download_json( - self._API_BASE_URL + 'login', None, data=urlencode_postdata({ - 'email': email, - 'password': password, - })) - self._handle_errors(result) - self._auth_token = result['message']['auth_token'] - self._downloader.cache.store( - 'curiositystream', 'user', { - 'auth_token': self._auth_token, - }) + (email, password) = self._get_login_info() + if email is None: + return + result = self._download_json( + self._API_BASE_URL + 'login', None, data=urlencode_postdata({ + 'email': email, + 'password': password, + })) + self._handle_errors(result) + self._auth_token = result['message']['auth_token'] def _extract_media_info(self, media): video_id = compat_str(media['id']) From 6150502e4709b6b2ebc226c9c38fa346b9358699 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 1 Sep 2016 22:14:40 +0100 Subject: [PATCH 1546/3599] [adobepass] check for authz_token expiration(#10527) --- youtube_dl/extractor/adobepass.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 9e3a3e362..68ec37e00 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -37,6 +37,10 @@ class AdobePassIE(InfoExtractor): return self._search_regex( '<%s>(.+?)' % (tag, tag), xml_str, tag) + def is_expired(token, date_ele): + token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) + return token_expires and token_expires <= int(time.time()) + mvpd_headers = { 'ap_42': 'anonymous', 'ap_11': 'Linux i686', @@ -47,11 +51,8 @@ class AdobePassIE(InfoExtractor): guid = xml_text(resource, 'guid') requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} authn_token = requestor_info.get('authn_token') - if authn_token: - token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires'))) - if token_expires and token_expires <= int(time.time()): - authn_token = None - requestor_info = {} + if authn_token and is_expired(authn_token, 'simpleTokenExpires'): + authn_token = None if not authn_token: # TODO add support for other TV Providers mso_id = 'DTV' @@ -98,6 +99,8 @@ class AdobePassIE(InfoExtractor): self._downloader.cache.store('mvpd', requestor_id, requestor_info) authz_token = requestor_info.get(guid) + if authz_token and is_expired(authz_token, 'simpleTokenTTL'): + authz_token = None if not authz_token: authorize = self._download_webpage( self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id, From 2c3e0af93e00d7e2e20283be12541aaebabfa1bf Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 2 Sep 2016 09:53:04 +0100 Subject: [PATCH 1547/3599] [go] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/go.py | 101 +++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 youtube_dl/extractor/go.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7b59d5db2..2bcd5a0cd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -318,6 +318,7 @@ from .globo import ( GloboIE, GloboArticleIE, ) +from .go import GoIE from .godtube import GodTubeIE from .godtv import GodTVIE from .golem import GolemIE diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py new file mode 100644 index 000000000..6a437c54d --- /dev/null +++ b/youtube_dl/extractor/go.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + determine_ext, + parse_age_limit, +) + + +class GoIE(InfoExtractor): + _BRANDS = { + 'abc': '001', + 'freeform': '002', + 'watchdisneychannel': '004', + 'watchdisneyjunior': '008', + 'watchdisneyxd': '009', + } + _VALID_URL = r'https?://(?:(?P%s)\.)?go\.com/.*?vdka(?P\w+)' % '|'.join(_BRANDS.keys()) + _TESTS = [{ + 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', + 'info_dict': { + 'id': '0_g86w5onx', + 'ext': 'mp4', + 'title': 'Sneak Peek: Language Arts', + 'description': 'md5:7dcdab3b2d17e5217c953256af964e9c', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601', + 'only_matching': True, + }] + + def _real_extract(self, url): + sub_domain, video_id = re.match(self._VALID_URL, url).groups() + video_data = self._download_json( + 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (self._BRANDS[sub_domain], video_id), + video_id)['video'][0] + title = video_data['title'] + + formats = [] + for asset in video_data.get('assets', {}).get('asset', []): + asset_url = asset.get('value') + if not asset_url: + continue + format_id = asset.get('format') + ext = determine_ext(asset_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) + else: + formats.append({ + 'format_id': format_id, + 'url': asset_url, + 'ext': ext, + }) + self._sort_formats(formats) + + subtitles = {} + for cc in video_data.get('closedcaption', {}).get('src', []): + cc_url = cc.get('value') + if not cc_url: + continue + ext = determine_ext(cc_url) + if ext == 'xml': + ext = 'ttml' + subtitles.setdefault(cc.get('lang'), []).append({ + 'url': cc_url, + 'ext': ext, + }) + + thumbnails = [] + for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []): + thumbnail_url = thumbnail.get('value') + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('longdescription') or video_data.get('description'), + 'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000), + 'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')), + 'episode_number': int_or_none(video_data.get('episodenumber')), + 'series': video_data.get('show', {}).get('title'), + 'season_number': int_or_none(video_data.get('season', {}).get('num')), + 'thumbnails': thumbnails, + 'formats': formats, + 'subtitles': subtitles, + } From 349fc5c705d6b81ae53d698972f40b1125bee13e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 21:13:50 +0700 Subject: [PATCH 1548/3599] [facebook:plugins:video] Add extractor (Closes #10530) --- youtube_dl/extractor/extractors.py | 5 ++++- youtube_dl/extractor/facebook.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2bcd5a0cd..bc616223e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -264,7 +264,10 @@ from .everyonesmixtape import EveryonesMixtapeIE from .expotv import ExpoTVIE from .extremetube import ExtremeTubeIE from .eyedotv import EyedoTVIE -from .facebook import FacebookIE +from .facebook import ( + FacebookIE, + FacebookPluginsVideoIE, +) from .faz import FazIE from .fc2 import FC2IE from .fczenit import FczenitIE diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 228b0b6d7..3a220e995 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -351,3 +351,32 @@ class FacebookIE(InfoExtractor): self._VIDEO_PAGE_TEMPLATE % video_id, video_id, fatal_if_no_video=True) return info_dict + + +class FacebookPluginsVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?Phttps.+)' + + _TESTS = [{ + 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560', + 'md5': '5954e92cdfe51fe5782ae9bda7058a07', + 'info_dict': { + 'id': '10154383743583686', + 'ext': 'mp4', + 'title': 'What to do during the haze?', + 'uploader': 'Gov.sg', + 'upload_date': '20160826', + 'timestamp': 1472184808, + }, + 'add_ie': [FacebookIE.ie_key()], + }, { + 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104', + 'only_matching': True, + }, { + 'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560', + 'only_matching': True, + }] + + def _real_extract(self, url): + return self.url_result( + compat_urllib_parse_unquote(self._match_id(url)), + FacebookIE.ie_key()) From 5e9e3d0f6bf2055c557f360758d6d7eb146edcba Mon Sep 17 00:00:00 2001 From: Sebastian Blunt Date: Fri, 2 Sep 2016 14:48:56 +0200 Subject: [PATCH 1549/3599] [drtv] Add support for dr.dk/nyheder It's the same video player, the only difference is that the video player is loaded differently, and certain metadata (title and description) is not available under dr.dk/mu, so make it by default get that from some of the html meta tags. Skip the dr.dk/tv test dr.dk/tv videos are only available for between 7 and 90 days due to Danish law, and in certain cases may be readded. Skip this test as it is no longer available. --- youtube_dl/extractor/drtv.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 2d74ff855..e210cb610 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -5,13 +5,14 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, parse_iso8601, + remove_end, ) class DRTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' + _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' - _TEST = { + _TESTS = [{ 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5', 'md5': 'dc515a9ab50577fa14cc4e4b0265168f', 'info_dict': { @@ -23,7 +24,20 @@ class DRTVIE(InfoExtractor): 'upload_date': '20150322', 'duration': 1455, }, - } + 'skip': 'Video is no longer available', + }, { + 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', + 'md5': '2ada5074f9e79afc0d324a8e9784d850', + 'info_dict': { + 'id': 'christiania-pusher-street-ryddes-drdkrjpo', + 'ext': 'mp4', + 'title': 'LIVE Christianias rydning af Pusher Street er i gang', + 'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.', + 'timestamp': 1472800279, + 'upload_date': '20160902', + 'duration': 131.4, + } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -35,7 +49,8 @@ class DRTVIE(InfoExtractor): 'Video %s is not available' % video_id, expected=True) video_id = self._search_regex( - r'data-(?:material-identifier|episode-slug)="([^"]+)"', + (r'data-(?:material-identifier|episode-slug)="([^"]+)"', + r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), webpage, 'video id') programcard = self._download_json( @@ -43,8 +58,9 @@ class DRTVIE(InfoExtractor): video_id, 'Downloading video JSON') data = programcard['Data'][0] - title = data['Title'] - description = data['Description'] + title = remove_end(self._og_search_title(webpage), ' | TV | DR') or data['Title'] + description = self._og_search_description(webpage) or data['Description'] + timestamp = parse_iso8601(data['CreatedTime']) thumbnail = None From 6562d34a8cbdb93de77a8042f7409ebe31e3e3e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 22:57:48 +0700 Subject: [PATCH 1550/3599] [utils] Improve mimetype2ext --- test/test_utils.py | 9 +++++++++ youtube_dl/utils.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index d16ea7f77..405c5d351 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -39,6 +39,7 @@ from youtube_dl.utils import ( is_html, js_to_json, limit_length, + mimetype2ext, ohdave_rsa_encrypt, OnDemandPagedList, orderedSet, @@ -625,6 +626,14 @@ class TestUtil(unittest.TestCase): limit_length('foo bar baz asd', 12).startswith('foo bar')) self.assertTrue('...' in limit_length('foo bar baz asd', 12)) + def test_mimetype2ext(self): + self.assertEqual(mimetype2ext(None), None) + self.assertEqual(mimetype2ext('video/x-flv'), 'flv') + self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8') + self.assertEqual(mimetype2ext('text/vtt'), 'vtt') + self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') + self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') + def test_parse_codecs(self): self.assertEqual(parse_codecs(''), {}) self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1091f17f3..904f23fd7 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2148,7 +2148,7 @@ def mimetype2ext(mt): return ext _, _, res = mt.rpartition('/') - res = res.lower() + res = res.split(';')[0].strip().lower() return { '3gpp': '3gp', From 6066d03db02b9c545435b2b8faffe2e0f6c66702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 23:02:15 +0700 Subject: [PATCH 1551/3599] [drtv] Modernize and make more robust --- youtube_dl/extractor/drtv.py | 53 ++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index e210cb610..7122449a3 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -4,6 +4,9 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( ExtractorError, + int_or_none, + float_or_none, + mimetype2ext, parse_iso8601, remove_end, ) @@ -58,10 +61,12 @@ class DRTVIE(InfoExtractor): video_id, 'Downloading video JSON') data = programcard['Data'][0] - title = remove_end(self._og_search_title(webpage), ' | TV | DR') or data['Title'] - description = self._og_search_description(webpage) or data['Description'] + title = remove_end(self._og_search_title( + webpage, default=None), ' | TV | DR') or data['Title'] + description = self._og_search_description( + webpage, default=None) or data.get('Description') - timestamp = parse_iso8601(data['CreatedTime']) + timestamp = parse_iso8601(data.get('CreatedTime')) thumbnail = None duration = None @@ -72,16 +77,18 @@ class DRTVIE(InfoExtractor): subtitles = {} for asset in data['Assets']: - if asset['Kind'] == 'Image': - thumbnail = asset['Uri'] - elif asset['Kind'] == 'VideoResource': - duration = asset['DurationInMilliseconds'] / 1000.0 - restricted_to_denmark = asset['RestrictedToDenmark'] - spoken_subtitles = asset['Target'] == 'SpokenSubtitles' - for link in asset['Links']: - uri = link['Uri'] - target = link['Target'] - format_id = target + if asset.get('Kind') == 'Image': + thumbnail = asset.get('Uri') + elif asset.get('Kind') == 'VideoResource': + duration = float_or_none(asset.get('DurationInMilliseconds'), 1000) + restricted_to_denmark = asset.get('RestrictedToDenmark') + spoken_subtitles = asset.get('Target') == 'SpokenSubtitles' + for link in asset.get('Links', []): + uri = link.get('Uri') + if not uri: + continue + target = link.get('Target') + format_id = target or '' preference = None if spoken_subtitles: preference = -1 @@ -92,8 +99,8 @@ class DRTVIE(InfoExtractor): video_id, preference, f4m_id=format_id)) elif target == 'HLS': formats.extend(self._extract_m3u8_formats( - uri, video_id, 'mp4', preference=preference, - m3u8_id=format_id)) + uri, video_id, 'mp4', entry_protocol='m3u8_native', + preference=preference, m3u8_id=format_id)) else: bitrate = link.get('Bitrate') if bitrate: @@ -101,7 +108,7 @@ class DRTVIE(InfoExtractor): formats.append({ 'url': uri, 'format_id': format_id, - 'tbr': bitrate, + 'tbr': int_or_none(bitrate), 'ext': link.get('FileFormat'), }) subtitles_list = asset.get('SubtitlesList') @@ -110,12 +117,18 @@ class DRTVIE(InfoExtractor): 'Danish': 'da', } for subs in subtitles_list: - lang = subs['Language'] - subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}] + if not subs.get('Uri'): + continue + lang = subs.get('Language') or 'da' + subtitles.setdefault(LANGS.get(lang, lang), []).append({ + 'url': subs['Uri'], + 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt' + }) if not formats and restricted_to_denmark: - raise ExtractorError( - 'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True) + self.raise_geo_restricted( + 'Unfortunately, DR is not allowed to show this program outside Denmark.', + expected=True) self._sort_formats(formats) From dacb3a864a8c89edb312cd28c3de1605a5467d0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 23:43:20 +0700 Subject: [PATCH 1552/3599] [youtube:playlist] Fallback to video extraction for video/playlist URLs when playlist is broken (Closes #10537) --- youtube_dl/extractor/youtube.py | 56 +++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d5d5b7334..ea98fbf69 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1841,6 +1841,28 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', }, 'playlist_mincout': 21, + }, { + # Playlist URL that does not actually serve a playlist + 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4', + 'info_dict': { + 'id': 'FqZTN594JQw', + 'ext': 'webm', + 'title': "Smiley's People 01 detective, Adventure Series, Action", + 'uploader': 'STREEM', + 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng', + 'upload_date': '20150526', + 'license': 'Standard YouTube License', + 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', + 'categories': ['People & Blogs'], + 'tags': list, + 'like_count': int, + 'dislike_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [YoutubeIE.ie_key()], }] def _real_initialize(self): @@ -1901,9 +1923,20 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): playlist_title = self._html_search_regex( r'(?s)

]*>\s*(.*?)\s*

', - page, 'title') + page, 'title', default=None) - return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title) + has_videos = True + + if not playlist_title: + try: + # Some playlist URLs don't actually serve a playlist (e.g. + # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4) + next(self._entries(page, playlist_id)) + except StopIteration: + has_videos = False + + return has_videos, self.playlist_result( + self._entries(page, playlist_id), playlist_id, playlist_title) def _check_download_just_video(self, url, playlist_id): # Check if it's a video-specific URL @@ -1912,9 +1945,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): video_id = query_dict['v'][0] if self._downloader.params.get('noplaylist'): self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - return self.url_result(video_id, 'Youtube', video_id=video_id) + return video_id, self.url_result(video_id, 'Youtube', video_id=video_id) else: self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) + return video_id, None + return None, None def _real_extract(self, url): # Extract playlist id @@ -1923,7 +1958,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): raise ExtractorError('Invalid URL: %s' % url) playlist_id = mobj.group(1) or mobj.group(2) - video = self._check_download_just_video(url, playlist_id) + video_id, video = self._check_download_just_video(url, playlist_id) if video: return video @@ -1931,7 +1966,15 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): # Mixes require a custom extraction process return self._extract_mix(playlist_id) - return self._extract_playlist(playlist_id) + has_videos, playlist = self._extract_playlist(playlist_id) + if has_videos or not video_id: + return playlist + + # Some playlist URLs don't actually serve a playlist (see + # https://github.com/rg3/youtube-dl/issues/10537). + # Fallback to plain video extraction if there is a video id + # along with playlist id. + return self.url_result(video_id, 'Youtube', video_id=video_id) class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): @@ -2312,7 +2355,8 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): video = self._check_download_just_video(url, 'WL') if video: return video - return self._extract_playlist('WL') + _, playlist = self._extract_playlist('WL') + return playlist class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): From c2b2c7e1386056698ee1b0de5427ea90abf8e9c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 23:50:42 +0700 Subject: [PATCH 1553/3599] [utils] Add quicktime to mimetype2ext --- youtube_dl/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 904f23fd7..ed199c4ad 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2168,6 +2168,7 @@ def mimetype2ext(mt): 'f4m+xml': 'f4m', 'hds+xml': 'f4m', 'vnd.ms-sstr+xml': 'ism', + 'quicktime': 'mov', }.get(res, res) From 3fcce30289a475901728af7a8dbe85304105b8ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 2 Sep 2016 23:53:17 +0700 Subject: [PATCH 1554/3599] [drtv] Update tests --- youtube_dl/extractor/drtv.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 7122449a3..88d096b30 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -16,21 +16,23 @@ class DRTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder)/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' _TESTS = [{ - 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5', - 'md5': 'dc515a9ab50577fa14cc4e4b0265168f', + 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', + 'md5': '25e659cccc9a2ed956110a299fdf5983', 'info_dict': { - 'id': 'panisk-paske-5', + 'id': 'klassen-darlig-taber-10', 'ext': 'mp4', - 'title': 'Panisk Påske (5)', - 'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c', - 'timestamp': 1426984612, - 'upload_date': '20150322', - 'duration': 1455, + 'title': 'Klassen - Dårlig taber (10)', + 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa', + 'timestamp': 1471991907, + 'upload_date': '20160823', + 'duration': 606.84, + }, + 'params': { + 'skip_download': True, }, - 'skip': 'Video is no longer available', }, { 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', - 'md5': '2ada5074f9e79afc0d324a8e9784d850', + 'md5': '2c37175c718155930f939ef59952474a', 'info_dict': { 'id': 'christiania-pusher-street-ryddes-drdkrjpo', 'ext': 'mp4', @@ -39,7 +41,7 @@ class DRTVIE(InfoExtractor): 'timestamp': 1472800279, 'upload_date': '20160902', 'duration': 131.4, - } + }, }] def _real_extract(self, url): From 6496ccb41398971373a2f7162a0684dd12f0b56e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Sep 2016 01:17:15 +0700 Subject: [PATCH 1555/3599] [youtube] Add support for rental videos' previews (Closes #10532) --- youtube_dl/extractor/youtube.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ea98fbf69..4c8edef8d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -844,6 +844,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059) 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo', 'only_matching': True, + }, + { + # Rental video preview + 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg', + 'info_dict': { + 'id': 'uGpuVWrhIzE', + 'ext': 'mp4', + 'title': 'Piku - Trailer', + 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb', + 'upload_date': '20150811', + 'uploader': 'FlixMatrix', + 'uploader_id': 'FlixMatrixKaravan', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan', + 'license': 'Standard YouTube License', + }, + 'params': { + 'skip_download': True, + }, } ] @@ -1254,6 +1272,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Convert to the same format returned by compat_parse_qs video_info = dict((k, [v]) for k, v in args.items()) add_dash_mpd(video_info) + # Rental video is not rented but preview is available (e.g. + # https://www.youtube.com/watch?v=yYr8q0y5Jfg, + # https://github.com/rg3/youtube-dl/issues/10532) + if not video_info and args.get('ypc_vid'): + return self.url_result( + args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid']) if args.get('livestream') == '1' or args.get('live_playback') == 1: is_live = True if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): From 3a7d35b982fac19ca47b87358001379fafbd5731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Sep 2016 01:42:33 +0700 Subject: [PATCH 1556/3599] Credit @C4K3 for #10536 --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index b9a602c12..c4bef040a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -182,3 +182,4 @@ Rob van Bekkum Petr Zvoníček Pratyush Singh Aleksander Nitecki +Sebastian Blunt From 4b3a6076586a38450fa9633480d175a13e33dac7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Sep 2016 01:45:17 +0700 Subject: [PATCH 1557/3599] [ChangeLog] Actualize --- ChangeLog | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/ChangeLog b/ChangeLog index 2e75c003d..eb05fe77e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,32 @@ version +Core +* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in + _extract_m3u8_formats (#10522) +* Handle semicolon in mimetype2ext + + Extractors ++ [youtube] Add support for rental videos' previews (#10532) +* [youtube:playlist] Fallback to video extraction for video/playlist URLs when + no playlist is actually served (#10537) ++ [drtv] Add support for dr.dk/nyheder (#10536) ++ [facebook:plugins:video] Add extractor (#10530) ++ [go] Add extractor for *.go.com sites +* [adobepass] Check for authz_token expiration (#10527) +* [nytimes] improve extraction +* [thestar] Fix extraction (#10465) +* [glide] Fix extraction (#10478) +- [exfm] Remove extractor (#10482) +* [youporn] Fix categories and tags extraction (#10521) ++ [curiositystream] Add extractor for app.curiositystream.com - [thvideo] Remove extractor (#10464) * [movingimage] Fix for the new site name (#10466) ++ [cbs] Add support for once formats (#10515) +* [limelight] Skip ism snd duplicate manifests ++ [porncom] Extract categories and tags (#10510) ++ [facebook] Extract timestamp (#10508) ++ [yahoo] Extract more formats version 2016.08.31 From 86c3bbbcede6efa175f5a93e02511fe32585521f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 3 Sep 2016 01:46:41 +0700 Subject: [PATCH 1558/3599] release 2016.09.03 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 9 +++++---- youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 2caca5115..fc18e733b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.31*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.31** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.03** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.08.31 +[debug] youtube-dl version 2016.09.03 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index eb05fe77e..68dbeb696 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.03 Core * Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 42bf291e2..015332bca 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -171,6 +171,8 @@ - **CTVNews** - **culturebox.francetvinfo.fr** - **CultureUnplugged** + - **curiositystream** + - **curiositystream:collection** - **CWTV** - **DailyMail** - **dailymotion** @@ -223,11 +225,11 @@ - **EsriVideo** - **Europa** - **EveryonesMixtape** - - **exfm**: ex.fm - **ExpoTV** - **ExtremeTube** - **EyedoTV** - **facebook** + - **FacebookPluginsVideo** - **faz.net** - **fc2** - **Fczenit** @@ -271,6 +273,7 @@ - **Glide**: Glide mobile video messages (glide.me) - **Globo** - **GloboArticle** + - **Go** - **GodTube** - **GodTV** - **Golem** @@ -406,6 +409,7 @@ - **MovieClips** - **MovieFap** - **Moviezine** + - **MovingImage** - **MPORA** - **MSN** - **mtg**: MTG services @@ -659,7 +663,6 @@ - **sr:mediathek**: Saarländischer Rundfunk - **SRGSSR** - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - - **SSA** - **stanfordoc**: Stanford Open ClassRoom - **Steam** - **Stitcher** @@ -702,8 +705,6 @@ - **TheStar** - **ThisAmericanLife** - **ThisAV** - - **THVideo** - - **THVideoPlaylist** - **tinypic**: tinypic.com videos - **tlc.de** - **TMZ** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fe442dd88..5be8c0122 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.08.31' +__version__ = '2016.09.03' From dedb1770295d214225a3a31b5f99da877cf01eee Mon Sep 17 00:00:00 2001 From: Christian Pointner Date: Sat, 3 Sep 2016 01:50:26 +0200 Subject: [PATCH 1559/3599] Fix parsing of HTML5 media elements This fixes an error in _parse_html5_media_entries in case an audio or video tag directly uses a src attribute insted of elements in it's body. --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a9c7a8d16..a82968162 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1749,7 +1749,7 @@ class InfoExtractor(object): media_attributes = extract_attributes(media_tag) src = media_attributes.get('src') if src: - _, formats = _media_formats(src) + _, formats = _media_formats(src, media_type) media_info['formats'].extend(formats) media_info['thumbnail'] = media_attributes.get('poster') if media_content: From cf0efe96366259a5f0f07ae79280bfa17dc6f6e7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 17:25:03 +0800 Subject: [PATCH 1560/3599] [fc2:embed] New extractor for Flash player URLs Closes #10512 --- ChangeLog | 6 ++++ youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/fc2.py | 58 ++++++++++++++++++++++++++---- 3 files changed, 61 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 68dbeb696..065fc83a8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [fc2] Recognize Flash player URLs (#10512) + + version 2016.09.03 Core diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bc616223e..d851e5f36 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -269,7 +269,10 @@ from .facebook import ( FacebookPluginsVideoIE, ) from .faz import FazIE -from .fc2 import FC2IE +from .fc2 import ( + FC2IE, + FC2EmbedIE, +) from .fczenit import FczenitIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index c7d69ff1f..b9e58d4df 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -1,10 +1,12 @@ -#! -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import hashlib +import re from .common import InfoExtractor from ..compat import ( + compat_parse_qs, compat_urllib_request, compat_urlparse, ) @@ -16,7 +18,7 @@ from ..utils import ( class FC2IE(InfoExtractor): - _VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P[^/]+)' + _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P[^/]+)' IE_NAME = 'fc2' _NETRC_MACHINE = 'fc2' _TESTS = [{ @@ -75,12 +77,17 @@ class FC2IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) self._login() - webpage = self._download_webpage(url, video_id) - self._downloader.cookiejar.clear_session_cookies() # must clear - self._login() + webpage = None + if not url.startswith('fc2:'): + webpage = self._download_webpage(url, video_id) + self._downloader.cookiejar.clear_session_cookies() # must clear + self._login() - title = self._og_search_title(webpage) - thumbnail = self._og_search_thumbnail(webpage) + title = 'FC2 video %s' % video_id + thumbnail = None + if webpage is not None: + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) refer = url.replace('/content/', '/a/content/') if '/a/content/' not in url else url mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest() @@ -113,3 +120,40 @@ class FC2IE(InfoExtractor): 'ext': 'flv', 'thumbnail': thumbnail, } + + +class FC2EmbedIE(InfoExtractor): + _VALID_URL = r'https?://video\.fc2\.com/flv2\.swf\?(?P.+)' + IE_NAME = 'fc2:embed' + + _TEST = { + 'url': 'http://video.fc2.com/flv2.swf?t=201404182936758512407645&i=20130316kwishtfitaknmcgd76kjd864hso93htfjcnaogz629mcgfs6rbfk0hsycma7shkf85937cbchfygd74&i=201403223kCqB3Ez&d=2625&sj=11&lang=ja&rel=1&from=11&cmt=1&tk=TlRBM09EQTNNekU9&tl=プリズン・ブレイク%20S1-01%20マイケル%20【吹替】', + 'md5': 'b8aae5334cb691bdb1193a88a6ab5d5a', + 'info_dict': { + 'id': '201403223kCqB3Ez', + 'ext': 'flv', + 'title': 'プリズン・ブレイク S1-01 マイケル 【吹替】', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + query = compat_parse_qs(mobj.group('query')) + + video_id = query['i'][-1] + title = query.get('tl', ['FC2 video %s' % video_id])[0] + + sj = query.get('sj', [None])[0] + thumbnail = None + if sj: + # See thumbnailImagePath() in ServerConst.as of flv2.swf + thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % ( + sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id))) + + return { + '_type': 'url_transparent', + 'url': 'fc2:%s' % video_id, + 'title': title, + 'thumbnail': thumbnail, + } From cdc783510bb575b2318b1d7d42fb98f0c0f0df18 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 18:16:19 +0800 Subject: [PATCH 1561/3599] [foxnews:insider] Add new extractor Closes #10445 --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 5 +++- youtube_dl/extractor/foxnews.py | 48 +++++++++++++++++++++++++++++- 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 065fc83a8..199983674 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [foxnews] Add support for FoxNews Insider (#10445) + [fc2] Recognize Flash player URLs (#10512) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d851e5f36..8c6ee0503 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -287,7 +287,10 @@ from .formula1 import Formula1IE from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE -from .foxnews import FoxNewsIE +from .foxnews import ( + FoxNewsIE, + FoxNewsInsiderIE, +) from .foxsports import FoxSportsIE from .franceculture import FranceCultureIE from .franceinter import FranceInterIE diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index b04da2415..5c7acd795 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -3,11 +3,12 @@ from __future__ import unicode_literals import re from .amp import AMPIE +from .common import InfoExtractor class FoxNewsIE(AMPIE): IE_DESC = 'Fox News and Fox Business Video' - _VALID_URL = r'https?://(?Pvideo\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' + _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ { 'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', @@ -49,6 +50,11 @@ class FoxNewsIE(AMPIE): 'url': 'http://video.foxbusiness.com/v/4442309889001', 'only_matching': True, }, + { + # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words + 'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true', + 'only_matching': True, + }, ] def _real_extract(self, url): @@ -58,3 +64,43 @@ class FoxNewsIE(AMPIE): 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id)) info['id'] = video_id return info + + +class FoxNewsInsiderIE(InfoExtractor): + _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P[a-z-]+)' + IE_NAME = 'foxnews:insider' + + _TEST = { + 'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words', + 'md5': 'a10c755e582d28120c62749b4feb4c0c', + 'info_dict': { + 'id': '5099377331001', + 'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words', + 'ext': 'mp4', + 'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive', + 'description': 'Is campus censorship getting out of control?', + 'timestamp': 1472168725, + 'upload_date': '20160825', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'add_ie': [FoxNewsIE.ie_key()], + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL') + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + + return { + '_type': 'url_transparent', + 'ie_key': FoxNewsIE.ie_key(), + 'url': embed_url, + 'display_id': display_id, + 'title': title, + 'description': description, + } From ed2bfe93aaa11f49f7b2b92b581abb6aa385dfbf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 18:22:00 +0800 Subject: [PATCH 1562/3599] [fc2:embed] Add ie_key --- youtube_dl/extractor/fc2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index b9e58d4df..c032d4d02 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -153,6 +153,7 @@ class FC2EmbedIE(InfoExtractor): return { '_type': 'url_transparent', + 'ie_key': FC2IE.ie_key(), 'url': 'fc2:%s' % video_id, 'title': title, 'thumbnail': thumbnail, From 45aab4d30b7c3fc03c9be9680550cba88bd85b5c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 18:37:36 +0800 Subject: [PATCH 1563/3599] [youjizz] Fix extraction. The site has moved to HTML5 Closes #10437 --- ChangeLog | 1 + youtube_dl/extractor/youjizz.py | 43 +++++++-------------------------- 2 files changed, 10 insertions(+), 34 deletions(-) diff --git a/ChangeLog b/ChangeLog index 199983674..2809e55d7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [youjizz] Fix extraction (#10437) + [foxnews] Add support for FoxNews Insider (#10445) + [fc2] Recognize Flash player URLs (#10512) diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index 31e2f9263..b50f34e9b 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -1,21 +1,16 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( - ExtractorError, -) class YouJizzIE(InfoExtractor): _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P[0-9]+)\.html(?:$|[?#])' _TESTS = [{ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', - 'md5': '07e15fa469ba384c7693fd246905547c', + 'md5': '78fc1901148284c69af12640e01c6310', 'info_dict': { 'id': '2189178', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Zeichentrick 1', 'age_limit': 18, } @@ -27,38 +22,18 @@ class YouJizzIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + # YouJizz's HTML5 player has invalid HTML + webpage = webpage.replace('"controls', '" controls') age_limit = self._rta_search(webpage) video_title = self._html_search_regex( r'\s*(.*)\s*', webpage, 'title') - embed_page_url = self._search_regex( - r'(https?://www.youjizz.com/videos/embed/[0-9]+)', - webpage, 'embed page') - webpage = self._download_webpage( - embed_page_url, video_id, note='downloading embed page') + info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] - # Get the video URL - m_playlist = re.search(r'so.addVariable\("playlist", ?"(?P.+?)"\);', webpage) - if m_playlist is not None: - playlist_url = m_playlist.group('playlist') - playlist_page = self._download_webpage(playlist_url, video_id, - 'Downloading playlist page') - m_levels = list(re.finditer(r'[^"]+)"\)\);', - webpage, 'video URL') - - return { + info_dict.update({ 'id': video_id, - 'url': video_url, 'title': video_title, - 'ext': 'flv', - 'format': 'flv', - 'player_url': embed_page_url, 'age_limit': age_limit, - } + }) + + return info_dict From 9603b6601208333bc49e0c69199f0e652a7aaea3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Aug 2016 04:52:18 +0700 Subject: [PATCH 1564/3599] Introduce --skip-unavailable-fragments --- youtube_dl/__init__.py | 1 + youtube_dl/downloader/fragment.py | 10 ++++++++-- youtube_dl/options.py | 10 +++++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a9730292c..42128272a 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -318,6 +318,7 @@ def _real_main(argv=None): 'nooverwrites': opts.nooverwrites, 'retries': opts.retries, 'fragment_retries': opts.fragment_retries, + 'skip_unavailable_fragments': opts.skip_unavailable_fragments, 'buffersize': opts.buffersize, 'noresizebuffer': opts.noresizebuffer, 'continuedl': opts.continue_dl, diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index ba903ae10..b4a798f8f 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -22,14 +22,20 @@ class FragmentFD(FileDownloader): Available options: - fragment_retries: Number of times to retry a fragment for HTTP error (DASH only) + fragment_retries: Number of times to retry a fragment for HTTP error (DASH + and hlsnative only) + skip_unavailable_fragments: + Skip unavailable fragments (DASH and hlsnative only) """ def report_retry_fragment(self, fragment_name, count, retries): self.to_screen( - '[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...' + '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...' % (fragment_name, count, self.format_retries(retries))) + def report_skip_fragment(self, fragment_name): + self.to_screen('[download] Skipping fragment %s...' % fragment_name) + def _prepare_and_start_frag_download(self, ctx): self._prepare_frag_download(ctx) self._start_frag_download(ctx) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 5d62deef4..56f312f57 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -423,7 +423,15 @@ def parseOpts(overrideArguments=None): downloader.add_option( '--fragment-retries', dest='fragment_retries', metavar='RETRIES', default=10, - help='Number of retries for a fragment (default is %default), or "infinite" (DASH only)') + help='Number of retries for a fragment (default is %default), or "infinite" (DASH and hlsnative only)') + downloader.add_option( + '--skip-unavailable-fragments', + action='store_true', dest='skip_unavailable_fragments', default=True, + help='Skip unavailable fragments (DASH and hlsnative only)') + general.add_option( + '--abort-on-unavailable-fragment', + action='store_false', dest='skip_unavailable_fragments', + help='Abort downloading when some fragment is not available') downloader.add_option( '--buffer-size', dest='buffersize', metavar='SIZE', default='1024', From 25afc2a7830e281e849609202b4f70728664bdb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Aug 2016 04:55:55 +0700 Subject: [PATCH 1565/3599] [downloader/dash:hls] Respect --fragment-retries and --skip-unavailable-fragments (Closes #10165, closes #10448) --- youtube_dl/downloader/dash.py | 12 +++++----- youtube_dl/downloader/hls.py | 41 +++++++++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 8bbab9dbc..cbcee324d 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -38,6 +38,7 @@ class DashSegmentsFD(FragmentFD): segments_filenames = [] fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) def append_url_to_file(target_url, tmp_filename, segment_name): target_filename = '%s-%s' % (tmp_filename, segment_name) @@ -52,19 +53,20 @@ class DashSegmentsFD(FragmentFD): down.close() segments_filenames.append(target_sanitized) break - except (compat_urllib_error.HTTPError, ) as err: + except compat_urllib_error.HTTPError: # YouTube may often return 404 HTTP error for a fragment causing the # whole download to fail. However if the same fragment is immediately # retried with the same request data this usually succeeds (1-2 attemps # is usually enough) thus allowing to download the whole file successfully. - # So, we will retry all fragments that fail with 404 HTTP error for now. - if err.code != 404: - raise - # Retry fragment + # To be future-proof we will retry all fragments that fail with any + # HTTP error. count += 1 if count <= fragment_retries: self.report_retry_fragment(segment_name, count, fragment_retries) if count > fragment_retries: + if skip_unavailable_fragments: + self.report_skip_fragment(segment_name) + return self.report_error('giving up after %s fragment retries' % fragment_retries) return False diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index baaff44d5..7412620a5 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -13,6 +13,7 @@ from .fragment import FragmentFD from .external import FFmpegFD from ..compat import ( + compat_urllib_error, compat_urlparse, compat_struct_pack, ) @@ -83,6 +84,10 @@ class HlsFD(FragmentFD): self._prepare_and_start_frag_download(ctx) + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + test = self.params.get('test', False) + extra_query = None extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') if extra_param_to_segment_url: @@ -99,15 +104,37 @@ class HlsFD(FragmentFD): line if re.match(r'^https?://', line) else compat_urlparse.urljoin(man_url, line)) - frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) + frag_name = 'Frag%d' % i + frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name) if extra_query: frag_url = update_url_query(frag_url, extra_query) - success = ctx['dl'].download(frag_filename, {'url': frag_url}) - if not success: + count = 0 + while count <= fragment_retries: + try: + success = ctx['dl'].download(frag_filename, {'url': frag_url}) + if not success: + return False + down, frag_sanitized = sanitize_open(frag_filename, 'rb') + frag_content = down.read() + down.close() + break + except compat_urllib_error.HTTPError: + # Unavailable (possibly temporary) fragments may be served. + # First we try to retry then either skip or abort. + # See https://github.com/rg3/youtube-dl/issues/10165, + # https://github.com/rg3/youtube-dl/issues/10448). + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(frag_name, count, fragment_retries) + if count > fragment_retries: + if skip_unavailable_fragments: + i += 1 + media_sequence += 1 + self.report_skip_fragment(frag_name) + continue + self.report_error( + 'giving up after %s fragment retries' % fragment_retries) return False - down, frag_sanitized = sanitize_open(frag_filename, 'rb') - frag_content = down.read() - down.close() if decrypt_info['METHOD'] == 'AES-128': iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) frag_content = AES.new( @@ -115,7 +142,7 @@ class HlsFD(FragmentFD): ctx['dest_stream'].write(frag_content) frags_filenames.append(frag_sanitized) # We only download the first fragment during the test - if self.params.get('test', False): + if test: break i += 1 media_sequence += 1 From 2e99cd30c3108fd8da6a9f9fadfa89852c8d8826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 27 Aug 2016 04:57:59 +0700 Subject: [PATCH 1566/3599] [downloader/dash:hls] Report exact fragment error on retry --- youtube_dl/downloader/dash.py | 4 ++-- youtube_dl/downloader/fragment.py | 5 +++-- youtube_dl/downloader/hls.py | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index cbcee324d..e087cf142 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -53,7 +53,7 @@ class DashSegmentsFD(FragmentFD): down.close() segments_filenames.append(target_sanitized) break - except compat_urllib_error.HTTPError: + except compat_urllib_error.HTTPError as err: # YouTube may often return 404 HTTP error for a fragment causing the # whole download to fail. However if the same fragment is immediately # retried with the same request data this usually succeeds (1-2 attemps @@ -62,7 +62,7 @@ class DashSegmentsFD(FragmentFD): # HTTP error. count += 1 if count <= fragment_retries: - self.report_retry_fragment(segment_name, count, fragment_retries) + self.report_retry_fragment(err, segment_name, count, fragment_retries) if count > fragment_retries: if skip_unavailable_fragments: self.report_skip_fragment(segment_name) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index b4a798f8f..84aacf7db 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -6,6 +6,7 @@ import time from .common import FileDownloader from .http import HttpFD from ..utils import ( + error_to_compat_str, encodeFilename, sanitize_open, ) @@ -28,10 +29,10 @@ class FragmentFD(FileDownloader): Skip unavailable fragments (DASH and hlsnative only) """ - def report_retry_fragment(self, fragment_name, count, retries): + def report_retry_fragment(self, err, fragment_name, count, retries): self.to_screen( '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...' - % (fragment_name, count, self.format_retries(retries))) + % (error_to_compat_str(err), fragment_name, count, self.format_retries(retries))) def report_skip_fragment(self, fragment_name): self.to_screen('[download] Skipping fragment %s...' % fragment_name) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 7412620a5..5d70abf62 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -118,14 +118,14 @@ class HlsFD(FragmentFD): frag_content = down.read() down.close() break - except compat_urllib_error.HTTPError: + except compat_urllib_error.HTTPError as err: # Unavailable (possibly temporary) fragments may be served. # First we try to retry then either skip or abort. # See https://github.com/rg3/youtube-dl/issues/10165, # https://github.com/rg3/youtube-dl/issues/10448). count += 1 if count <= fragment_retries: - self.report_retry_fragment(frag_name, count, fragment_retries) + self.report_retry_fragment(err, frag_name, count, fragment_retries) if count > fragment_retries: if skip_unavailable_fragments: i += 1 From 4a69fa04e0074a3d5938ffb03decff9cc33f5d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 30 Aug 2016 22:28:14 +0700 Subject: [PATCH 1567/3599] [downloader/dash] Abort download immediately after giving up on some fragment --- youtube_dl/downloader/dash.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index e087cf142..efeae02a3 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -66,14 +66,17 @@ class DashSegmentsFD(FragmentFD): if count > fragment_retries: if skip_unavailable_fragments: self.report_skip_fragment(segment_name) - return + return True self.report_error('giving up after %s fragment retries' % fragment_retries) return False + return True if initialization_url: - append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init') + if not append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init'): + return False for i, segment_url in enumerate(segment_urls): - append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i) + if not append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i): + return False self._finish_frag_download(ctx) From 7e5dc339de14547aa7b489e88b4c456ec613ba9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 00:29:01 +0700 Subject: [PATCH 1568/3599] [youtube:watchlater] Fix extraction (Closes #10544) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4c8edef8d..0bc85af74 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2376,7 +2376,7 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): }] def _real_extract(self, url): - video = self._check_download_just_video(url, 'WL') + _, video = self._check_download_just_video(url, 'WL') if video: return video _, playlist = self._extract_playlist('WL') From 091624f9da491ef3a98e63367bf4ffd9836dafde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 03:39:13 +0700 Subject: [PATCH 1569/3599] [vimple] Extend _VALID_URL (Closes #10547) --- youtube_dl/extractor/vimple.py | 35 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 92321d66e..7fd9b777b 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -28,23 +28,24 @@ class SprutoBaseIE(InfoExtractor): class VimpleIE(SprutoBaseIE): IE_DESC = 'Vimple - one-click video hosting' - _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P[\da-f-]{32,36})' - _TESTS = [ - { - 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', - 'md5': '2e750a330ed211d3fd41821c6ad9a279', - 'info_dict': { - 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf', - 'ext': 'mp4', - 'title': 'Sunset', - 'duration': 20, - 'thumbnail': 're:https?://.*?\.jpg', - }, - }, { - 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9', - 'only_matching': True, - } - ] + _VALID_URL = r'https?://(?:player\.vimple\.(?:ru|co)/iframe|vimple\.(?:ru|co))/(?P[\da-f-]{32,36})' + _TESTS = [{ + 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', + 'md5': '2e750a330ed211d3fd41821c6ad9a279', + 'info_dict': { + 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf', + 'ext': 'mp4', + 'title': 'Sunset', + 'duration': 20, + 'thumbnail': 're:https?://.*?\.jpg', + }, + }, { + 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9', + 'only_matching': True, + }, { + 'url': 'http://vimple.co/04506a053f124483b8fb05ed73899f19', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 37c7490ac62d4aacbf9103bf6760d20f21984a55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 04:59:46 +0700 Subject: [PATCH 1570/3599] [espn] Extend _VALID_URL (Closes #10549) --- youtube_dl/extractor/espn.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 66c08bec4..6d10f8e68 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -5,7 +5,7 @@ from ..utils import remove_end class ESPNIE(InfoExtractor): - _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P[^/]+)' + _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P[^/]+)' _TESTS = [{ 'url': 'http://espn.go.com/video/clip?id=10365079', 'md5': '60e5d097a523e767d06479335d1bdc58', @@ -47,6 +47,9 @@ class ESPNIE(InfoExtractor): }, { 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return', 'only_matching': True, + }, { + 'url': 'http://www.espn.com/video/clip?id=10365079', + 'only_matching': True, }] def _real_extract(self, url): From 622638512b8241c39837b634e75c44cf9105a299 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 4 Sep 2016 16:25:59 +0800 Subject: [PATCH 1571/3599] [rottentomatoes] Fix extraction Closes #10467 --- ChangeLog | 1 + youtube_dl/extractor/rottentomatoes.py | 30 +++++++++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2809e55d7..e6a2d24e1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [rottentomatoes] Fix extraction (#10467) * [youjizz] Fix extraction (#10437) + [foxnews] Add support for FoxNews Insider (#10445) + [fc2] Recognize Flash player URLs (#10512) diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index f9cd48790..df39ed3f2 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -1,8 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse -from .internetvideoarchive import InternetVideoArchiveIE +from ..utils import js_to_json class RottenTomatoesIE(InfoExtractor): @@ -11,21 +10,36 @@ class RottenTomatoesIE(InfoExtractor): _TEST = { 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', 'info_dict': { - 'id': '613340', + 'id': '11028566', 'ext': 'mp4', 'title': 'Toy Story 3', + 'thumbnail': 're:^https?://.*\.jpg$', }, } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - og_video = self._og_search_video_url(webpage) - query = compat_urlparse.urlparse(og_video).query + + params = self._parse_json( + self._search_regex(r'(?s)RTVideo\(({.+?})\);', webpage, 'player parameters'), + video_id, transform_source=lambda s: js_to_json(s.replace('window.location.href', '""'))) + + formats = [] + if params.get('urlHLS'): + formats.extend(self._extract_m3u8_formats( + params['urlHLS'], video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + if params.get('urlMP4'): + formats.append({ + 'url': params['urlMP4'], + 'format_id': 'mp4', + }) + self._sort_formats(formats) return { - '_type': 'url_transparent', - 'url': InternetVideoArchiveIE._build_xml_url(query), - 'ie_key': InternetVideoArchiveIE.ie_key(), + 'id': video_id, 'title': self._og_search_title(webpage), + 'formats': formats, + 'thumbnail': params.get('thumbnailImg'), } From b29cd56591f1ef001d9f30bdff87789815f1fa0c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 4 Sep 2016 17:01:39 +0800 Subject: [PATCH 1572/3599] [pornovoisines] Fix extraction (closes #10469) --- ChangeLog | 1 + youtube_dl/extractor/pornovoisines.py | 80 +++++++++++++++------------ 2 files changed, 47 insertions(+), 34 deletions(-) diff --git a/ChangeLog b/ChangeLog index e6a2d24e1..616b55803 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [pornvoisines] Fix extraction (#10469) * [rottentomatoes] Fix extraction (#10467) * [youjizz] Fix extraction (#10437) + [foxnews] Add support for FoxNews Insider (#10445) diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py index 6b51e5c54..58f557e39 100644 --- a/youtube_dl/extractor/pornovoisines.py +++ b/youtube_dl/extractor/pornovoisines.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import random from .common import InfoExtractor from ..utils import ( @@ -13,61 +12,69 @@ from ..utils import ( class PornoVoisinesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/showvideo/(?P\d+)/(?P[^/]+)' - - _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ - '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4' - - _SERVER_NUMBERS = (1, 2) + _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P\d+)/(?P[^/.]+)' _TEST = { - 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/', - 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1', + 'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html', + 'md5': '6f8aca6a058592ab49fe701c8ba8317b', 'info_dict': { - 'id': '1285', + 'id': '919', 'display_id': 'recherche-appartement', 'ext': 'mp4', 'title': 'Recherche appartement', - 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e', + 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493', 'thumbnail': 're:^https?://.*\.jpg$', 'upload_date': '20140925', 'duration': 120, 'view_count': int, 'average_rating': float, - 'categories': ['Débutantes', 'Scénario', 'Sodomie'], + 'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'], 'age_limit': 18, + 'subtitles': { + 'fr': [{ + 'ext': 'vtt', + }] + }, } } - @classmethod - def build_video_url(cls, num): - return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num) - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') display_id = mobj.group('display_id') + settings_url = self._download_json( + 'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id, + video_id, note='Getting settings URL')['video_settings_url'] + settings = self._download_json(settings_url, video_id)['data'] + + formats = [] + for kind, data in settings['variants'].items(): + if kind == 'HLS': + formats.extend(self._extract_m3u8_formats( + data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls')) + elif kind == 'MP4': + for item in data: + formats.append({ + 'url': item['url'], + 'height': item.get('height'), + 'bitrate': item.get('bitrate'), + }) + self._sort_formats(formats) + webpage = self._download_webpage(url, video_id) - video_url = self.build_video_url(video_id) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) - title = self._html_search_regex( - r'

(.+?)

', webpage, 'title', flags=re.DOTALL) - description = self._html_search_regex( - r'
(.+?)
', - webpage, 'description', fatal=False, flags=re.DOTALL) - - thumbnail = self._search_regex( - r'
\s*]+class=([\'"])thumb\1[^>]*src=([\'"])(?P[^"]+)\2', + webpage, 'thumbnail', fatal=False, group='url') upload_date = unified_strdate(self._search_regex( - r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False)) - duration = int_or_none(self._search_regex( - 'Durée (\d+)', webpage, 'duration', fatal=False)) + r'Le\s*([\d/]+)', webpage, 'upload date', fatal=False)) + duration = settings.get('main', {}).get('duration') view_count = int_or_none(self._search_regex( r'(\d+) vues', webpage, 'view count', fatal=False)) average_rating = self._search_regex( @@ -75,15 +82,19 @@ class PornoVoisinesIE(InfoExtractor): if average_rating: average_rating = float_or_none(average_rating.replace(',', '.')) - categories = self._html_search_meta( - 'keywords', webpage, 'categories', fatal=False) + categories = self._html_search_regex( + r'(?s)Catégories\s*:\s*(.+?)', webpage, 'categories', fatal=False) if categories: categories = [category.strip() for category in categories.split(',')] + subtitles = {'fr': [{ + 'url': subtitle, + } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]} + return { 'id': video_id, 'display_id': display_id, - 'url': video_url, + 'formats': formats, 'title': title, 'description': description, 'thumbnail': thumbnail, @@ -93,4 +104,5 @@ class PornoVoisinesIE(InfoExtractor): 'average_rating': average_rating, 'categories': categories, 'age_limit': 18, + 'subtitles': subtitles, } From 919cf1a62f022c61cfa65498e8c1b1cc0d21046e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 3 Sep 2016 23:00:52 +0800 Subject: [PATCH 1573/3599] [downloader/dash] Abort if the first segment fails Closes #10497, Closes #10542 --- ChangeLog | 4 ++++ youtube_dl/downloader/dash.py | 20 +++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 616b55803..1d277b562 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ version +Core +* If the first segment of DASH fails, abort the whole download process to + prevent throttling (#10497) + Extractors * [pornvoisines] Fix extraction (#10469) * [rottentomatoes] Fix extraction (#10467) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index efeae02a3..41fc9cfc2 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -40,7 +40,8 @@ class DashSegmentsFD(FragmentFD): fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - def append_url_to_file(target_url, tmp_filename, segment_name): + def process_segment(segment, tmp_filename, fatal): + target_url, segment_name = segment target_filename = '%s-%s' % (tmp_filename, segment_name) count = 0 while count <= fragment_retries: @@ -64,18 +65,23 @@ class DashSegmentsFD(FragmentFD): if count <= fragment_retries: self.report_retry_fragment(err, segment_name, count, fragment_retries) if count > fragment_retries: - if skip_unavailable_fragments: + if not fatal: self.report_skip_fragment(segment_name) return True self.report_error('giving up after %s fragment retries' % fragment_retries) return False return True - if initialization_url: - if not append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init'): - return False - for i, segment_url in enumerate(segment_urls): - if not append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i): + segments_to_download = [(initialization_url, 'Init')] if initialization_url else [] + segments_to_download.extend([ + (segment_url, 'Seg%d' % i) + for i, segment_url in enumerate(segment_urls)]) + + for i, segment in enumerate(segments_to_download): + # In DASH, the first segment contains necessary headers to + # generate a valid MP4 file, so always abort for the first segment + fatal = i == 0 or not skip_unavailable_fragments + if not process_segment(segment, ctx['tmpfilename'], fatal): return False self._finish_frag_download(ctx) From 0def758782c273e0a1c9984f895638845796715b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 4 Sep 2016 11:42:15 +0100 Subject: [PATCH 1574/3599] [internetvideoarchive] extract all formats --- youtube_dl/extractor/common.py | 14 +++++++------- youtube_dl/extractor/internetvideoarchive.py | 15 ++++++++++++--- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a82968162..6edd5a769 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1163,13 +1163,6 @@ class InfoExtractor(object): m3u8_id=None, note=None, errnote=None, fatal=True, live=False): - formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] - - format_url = lambda u: ( - u - if re.match(r'^https?://', u) - else compat_urlparse.urljoin(m3u8_url, u)) - res = self._download_webpage_handle( m3u8_url, video_id, note=note or 'Downloading m3u8 information', @@ -1180,6 +1173,13 @@ class InfoExtractor(object): m3u8_doc, urlh = res m3u8_url = urlh.geturl() + formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)] + + format_url = lambda u: ( + u + if re.match(r'^https?://', u) + else compat_urlparse.urljoin(m3u8_url, u)) + # We should try extracting formats only from master playlists [1], i.e. # playlists that describe available qualities. On the other hand media # playlists [2] should be returned as is since they contain just the media diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py index 45add007f..76cc5ec3e 100644 --- a/youtube_dl/extractor/internetvideoarchive.py +++ b/youtube_dl/extractor/internetvideoarchive.py @@ -48,13 +48,23 @@ class InternetVideoArchiveIE(InfoExtractor): # There are multiple videos in the playlist whlie only the first one # matches the video played in browsers video_info = configuration['playlist'][0] + title = video_info['title'] formats = [] for source in video_info['sources']: file_url = source['file'] if determine_ext(file_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - file_url, video_id, ext='mp4', m3u8_id='hls')) + m3u8_formats = self._extract_m3u8_formats( + file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) + file_url = m3u8_formats[0]['url'] + formats.extend(self._extract_f4m_formats( + file_url.replace('.m3u8', '.f4m'), + video_id, f4m_id='hds', fatal=False)) + formats.extend(self._extract_mpd_formats( + file_url.replace('.m3u8', '.mpd'), + video_id, mpd_id='dash', fatal=False)) else: a_format = { 'url': file_url, @@ -70,7 +80,6 @@ class InternetVideoArchiveIE(InfoExtractor): self._sort_formats(formats) - title = video_info['title'] description = video_info.get('description') thumbnail = video_info.get('image') else: From 100bd86a68b5ee84669d162c9bcda31616f6596a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 4 Sep 2016 11:44:13 +0100 Subject: [PATCH 1575/3599] [rottentomatoes] delegate extraction to InternetVideoArchiveIE --- youtube_dl/extractor/rottentomatoes.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index df39ed3f2..23abf7a27 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import js_to_json +from .internetvideoarchive import InternetVideoArchiveIE class RottenTomatoesIE(InfoExtractor): @@ -13,6 +13,7 @@ class RottenTomatoesIE(InfoExtractor): 'id': '11028566', 'ext': 'mp4', 'title': 'Toy Story 3', + 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', 'thumbnail': 're:^https?://.*\.jpg$', }, } @@ -20,26 +21,12 @@ class RottenTomatoesIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - - params = self._parse_json( - self._search_regex(r'(?s)RTVideo\(({.+?})\);', webpage, 'player parameters'), - video_id, transform_source=lambda s: js_to_json(s.replace('window.location.href', '""'))) - - formats = [] - if params.get('urlHLS'): - formats.extend(self._extract_m3u8_formats( - params['urlHLS'], video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - if params.get('urlMP4'): - formats.append({ - 'url': params['urlMP4'], - 'format_id': 'mp4', - }) - self._sort_formats(formats) + iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id') return { + '_type': 'url_transparent', + 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id, + 'ie_key': InternetVideoArchiveIE.ie_key(), 'id': video_id, 'title': self._og_search_title(webpage), - 'formats': formats, - 'thumbnail': params.get('thumbnailImg'), } From feaa5ad787cdc28e4b6979f1c7798134b1bee723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 20:12:34 +0700 Subject: [PATCH 1576/3599] [youtube:playlist] Extend _VALID_URL --- youtube_dl/extractor/youtube.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0bc85af74..8fc26bd02 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -264,7 +264,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ) )? # all until now is optional -> you can pass the naked ID ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID - (?!.*?&list=) # combined list/video URLs are handled by the playlist IE + (?!.*?\blist=) # combined list/video URLs are handled by the playlist IE (?(1).+)? # if we found the ID, everything can follow $""" _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' @@ -1778,11 +1778,14 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): _VALID_URL = r"""(?x)(?: (?:https?://)? (?:\w+\.)? - youtube\.com/ (?: - (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) - \? (?:.*?[&;])*? (?:p|a|list)= - | p/ + youtube\.com/ + (?: + (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) + \? (?:.*?[&;])*? (?:p|a|list)= + | p/ + )| + youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist= ) ( (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,} @@ -1887,6 +1890,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'skip_download': True, }, 'add_ie': [YoutubeIE.ie_key()], + }, { + 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21', + 'only_matching': True, }] def _real_initialize(self): From 433af6ad3002424ecb316e23946722d54010dbe1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 4 Sep 2016 14:18:41 +0100 Subject: [PATCH 1577/3599] [theplatform] fix player regex(closes #10546) --- youtube_dl/extractor/theplatform.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 23067e8c6..6febf805b 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -96,7 +96,7 @@ class ThePlatformBaseIE(OnceIE): class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P[^/]+)/ - (?:(?:(?:[^/]+/)+select/)?(?Pmedia/(?:guid/\d+/)?)|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? + (?:(?:(?:[^/]+/)+select/)?(?Pmedia/(?:guid/\d+/)?)?|(?P(?:[^/\?]+/(?:swf|config)|onsite)/select/))? |theplatform:)(?P[^/\?&]+)''' _TESTS = [{ @@ -116,6 +116,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): # rtmp download 'skip_download': True, }, + 'skip': '404 Not Found', }, { # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/ 'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT', From d9606d9b6cb44ee7600abf63333db4b88532a391 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 20:51:48 +0700 Subject: [PATCH 1578/3599] release 2016.09.04 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- README.md | 7 ++++++- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index fc18e733b..1ddb3ef85 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.03*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.03** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.04** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.03 +[debug] youtube-dl version 2016.09.04 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 1d277b562..a26f5d4aa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.04 Core * If the first segment of DASH fails, abort the whole download process to diff --git a/README.md b/README.md index 87465aa5e..207b633db 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,8 @@ which means you can modify it, redistribute it or use it however you like. --mark-watched Mark videos watched (YouTube only) --no-mark-watched Do not mark videos watched (YouTube only) --no-color Do not emit color codes in output + --abort-on-unavailable-fragment Abort downloading when some fragment is not + available ## Network Options: --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. @@ -173,7 +175,10 @@ which means you can modify it, redistribute it or use it however you like. -R, --retries RETRIES Number of retries (default is 10), or "infinite". --fragment-retries RETRIES Number of retries for a fragment (default - is 10), or "infinite" (DASH only) + is 10), or "infinite" (DASH and hlsnative + only) + --skip-unavailable-fragments Skip unavailable fragments (DASH and + hlsnative only) --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) (default is 1024) --no-resize-buffer Do not automatically adjust the buffer diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 015332bca..9e21016f7 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -232,6 +232,7 @@ - **FacebookPluginsVideo** - **faz.net** - **fc2** + - **fc2:embed** - **Fczenit** - **features.aol.com** - **fernsehkritik.tv** @@ -245,6 +246,7 @@ - **FOX** - **Foxgay** - **FoxNews**: Fox News and Fox Business Video + - **foxnews:insider** - **FoxSports** - **france2.fr:generation-quoi** - **FranceCulture** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5be8c0122..3d12a47e8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.03' +__version__ = '2016.09.04' From 8112bfeabae792754f51e0c012ed34c4dc521bac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 20:57:18 +0700 Subject: [PATCH 1579/3599] [ChangeLog] Actualize --- ChangeLog | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index a26f5d4aa..a542496a3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,26 @@ -version 2016.09.04 +version Core -* If the first segment of DASH fails, abort the whole download process to - prevent throttling (#10497) +* In DASH downloader if the first segment fails, abort the whole download + process to prevent throttling (#10497) ++ Add support for --skip-unavailable-fragments and --fragment retries in + hlsnative downloader (#10165, #10448). ++ Add support for --skip-unavailable-fragments in DASH downloader ++ Introduce --skip-unavailable-fragments option for fragment based downloaders + that allows to skip fragments unavailable due to a HTTP error +* Fix extraction of video/audio entries with src attribute in + _parse_html5_media_entries (#10540) Extractors +* [theplatform] Relax URL regular expression (#10546) +* [youtube:playlist] Extend URL regular expression +* [rottentomatoes] Delegate extraction to internetvideoarchive extractor +* [internetvideoarchive] Extract all formats * [pornvoisines] Fix extraction (#10469) * [rottentomatoes] Fix extraction (#10467) +* [espn] Extend URL regular expression (#10549) +* [vimple] Extend URL regular expression (#10547) +* [youtube:watchlater] Fix extraction (#10544) * [youjizz] Fix extraction (#10437) + [foxnews] Add support for FoxNews Insider (#10445) + [fc2] Recognize Flash player URLs (#10512) @@ -19,7 +33,6 @@ Core _extract_m3u8_formats (#10522) * Handle semicolon in mimetype2ext - Extractors + [youtube] Add support for rental videos' previews (#10532) * [youtube:playlist] Fallback to video extraction for video/playlist URLs when From 48094901086534533ca89283067f2ab732857654 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 4 Sep 2016 20:58:28 +0700 Subject: [PATCH 1580/3599] release 2016.09.04.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 1ddb3ef85..c03092442 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.04*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.04** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.04.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.04.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.04 +[debug] youtube-dl version 2016.09.04.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index a542496a3..d392513ce 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.04.1 Core * In DASH downloader if the first segment fails, abort the whole download diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3d12a47e8..b2ea6dac6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.04' +__version__ = '2016.09.04.1' From 78e762d23c48f85c61a8afcae29307912000a7dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= Date: Thu, 1 Sep 2016 17:31:08 +0200 Subject: [PATCH 1581/3599] Add new extractor for TV Noe (Czech Christian TV). Fixes #10520 --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tvnoe.py | 44 ++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/tvnoe.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8c6ee0503..e47adc26c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -916,6 +916,7 @@ from .tvc import ( ) from .tvigle import TvigleIE from .tvland import TVLandIE +from .tvnoe import TVNoeIE from .tvp import ( TVPEmbedIE, TVPIE, diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py new file mode 100644 index 000000000..d50261ddd --- /dev/null +++ b/youtube_dl/extractor/tvnoe.py @@ -0,0 +1,44 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .jwplatform import JWPlatformBaseIE +from ..utils import clean_html, get_element_by_class, js_to_json + + +class TVNoeIE(JWPlatformBaseIE): + _VALID_URL = r'https?://(www\.)?tvnoe\.cz/video/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.tvnoe.cz/video/10362', + 'md5': 'aee983f279aab96ec45ab6e2abb3c2ca', + 'info_dict': { + 'id': '10362', + 'ext': 'mp4', + 'series': 'Noční univerzita', + 'title': 'prof. Tomáš Halík, Th.D. - ' + + 'Návrat náboženství a střet civilizací', + 'description': 'md5:f337bae384e1a531a52c55ebc50fff41', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + iframe_url = self._search_regex(r']+src="([^"]+)"', + webpage, 'iframe src attribute') + + ifs_page = self._download_webpage(iframe_url, video_id) + jwplayer_data = self._parse_json(self._find_jwplayer_data(ifs_page), + video_id, transform_source=js_to_json) + info_dict = self._parse_jwplayer_data( + jwplayer_data, video_id, require_title=False, base_url=iframe_url) + + info_dict.update({ + 'id': video_id, + 'title': clean_html( + get_element_by_class('field-name-field-podnazev', webpage)), + 'description': clean_html(get_element_by_class('field-name-body', + webpage)), + 'series': clean_html(get_element_by_class('title', webpage)) + }) + return info_dict From 9127e1533d294eb672d783d1eeed15aeb9b2cbe1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 5 Sep 2016 13:37:36 +0800 Subject: [PATCH 1582/3599] [tvnoe] PEP8 and coding style --- youtube_dl/extractor/tvnoe.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dl/extractor/tvnoe.py index d50261ddd..1cd3e6a58 100644 --- a/youtube_dl/extractor/tvnoe.py +++ b/youtube_dl/extractor/tvnoe.py @@ -2,7 +2,11 @@ from __future__ import unicode_literals from .jwplatform import JWPlatformBaseIE -from ..utils import clean_html, get_element_by_class, js_to_json +from ..utils import ( + clean_html, + get_element_by_class, + js_to_json, +) class TVNoeIE(JWPlatformBaseIE): @@ -14,8 +18,7 @@ class TVNoeIE(JWPlatformBaseIE): 'id': '10362', 'ext': 'mp4', 'series': 'Noční univerzita', - 'title': 'prof. Tomáš Halík, Th.D. - ' + - 'Návrat náboženství a střet civilizací', + 'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací', 'description': 'md5:f337bae384e1a531a52c55ebc50fff41', } } @@ -24,21 +27,23 @@ class TVNoeIE(JWPlatformBaseIE): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - iframe_url = self._search_regex(r']+src="([^"]+)"', - webpage, 'iframe src attribute') + iframe_url = self._search_regex( + r']+src="([^"]+)"', webpage, 'iframe URL') ifs_page = self._download_webpage(iframe_url, video_id) - jwplayer_data = self._parse_json(self._find_jwplayer_data(ifs_page), - video_id, transform_source=js_to_json) + jwplayer_data = self._parse_json( + self._find_jwplayer_data(ifs_page), + video_id, transform_source=js_to_json) info_dict = self._parse_jwplayer_data( jwplayer_data, video_id, require_title=False, base_url=iframe_url) info_dict.update({ 'id': video_id, - 'title': clean_html( - get_element_by_class('field-name-field-podnazev', webpage)), - 'description': clean_html(get_element_by_class('field-name-body', - webpage)), + 'title': clean_html(get_element_by_class( + 'field-name-field-podnazev', webpage)), + 'description': clean_html(get_element_by_class( + 'field-name-body', webpage)), 'series': clean_html(get_element_by_class('title', webpage)) }) + return info_dict From b49ad71ce1d985165e07fd0f59f80f677434ad84 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 5 Sep 2016 13:38:55 +0800 Subject: [PATCH 1583/3599] [ChangeLog] Update for #10524 --- ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ChangeLog b/ChangeLog index d392513ce..0be9b0fbb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [tvnoe] New extractor (#10524) + + version 2016.09.04.1 Core From 95be19d436d1938d104310e194e85ea5a10c3353 Mon Sep 17 00:00:00 2001 From: Xie Yanbo Date: Sun, 4 Sep 2016 23:23:40 +0800 Subject: [PATCH 1584/3599] [miaopai] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/miaopai.py | 44 ++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/miaopai.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8c6ee0503..d511b04bc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -471,6 +471,7 @@ from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE from .mgtv import MGTVIE +from .miaopai import MiaoPaiIE from .microsoftvirtualacademy import ( MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, diff --git a/youtube_dl/extractor/miaopai.py b/youtube_dl/extractor/miaopai.py new file mode 100644 index 000000000..c36b441b8 --- /dev/null +++ b/youtube_dl/extractor/miaopai.py @@ -0,0 +1,44 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import sanitized_Request + + +class MiaoPaiIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P[-A-Za-z0-9~_]+).htm' + _TEST = { + 'url': 'http://www.miaopai.com/show/n~0hO7sfV1nBEw4Y29-Hqg__.htm', + 'md5': '095ed3f1cd96b821add957bdc29f845b', + 'info_dict': { + 'id': 'n~0hO7sfV1nBEw4Y29-Hqg__', + 'ext': 'mp4', + 'title': '西游记音乐会的秒拍视频', + 'thumbnail': 're:^https?://.*/n~0hO7sfV1nBEw4Y29-Hqg___m.jpg', + } + } + + _USER_AGENT_IPAD = 'User-Agent:Mozilla/5.0 ' \ + '(iPad; CPU OS 9_1 like Mac OS X) ' \ + 'AppleWebKit/601.1.46 (KHTML, like Gecko) ' \ + 'Version/9.0 Mobile/13B143 Safari/601.1' + + def _real_extract(self, url): + video_id = self._match_id(url) + request = sanitized_Request(url) + request.add_header('User-Agent', self._USER_AGENT_IPAD) + webpage = self._download_webpage(request, video_id) + + title = self._html_search_regex(r'([^<]*)', + webpage, + 'title') + regex = r"""
]*data-url=['"]([^'"]*\.jpg)['"]""" + thumbnail = self._html_search_regex(regex, webpage, '') + regex = r"""

', - webpage, 'title', default=None) or self._og_search_title(webpage) + webpage, 'title', default=None) or self._og_search_title( + webpage)).strip() video_id = self._html_search_regex( r'data-video=(["\'])(?P.+?)\1', webpage, 'video id', group='id') data = self._download_json( - 'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id) + 'https://mediazone.vrt.be/api/v1/%s/assets/%s' + % (site_id, video_id), display_id) formats = [] for target in data['targetUrls']: From c6129feb7f8313941a4d2044fa4b45ceaa0a91c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 9 Sep 2016 23:20:45 +0700 Subject: [PATCH 1624/3599] [ketnet] Add extractor (Closes #10343) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/ketnet.py | 52 ++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/ketnet.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b7b630e9d..38dc33674 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -407,6 +407,7 @@ from .kankan import KankanIE from .karaoketv import KaraoketvIE from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE +from .ketnet import KetnetIE from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE from .keek import KeekIE diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py new file mode 100644 index 000000000..aaf3f807a --- /dev/null +++ b/youtube_dl/extractor/ketnet.py @@ -0,0 +1,52 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class KetnetIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes', + 'md5': 'd907f7b1814ef0fa285c0475d9994ed7', + 'info_dict': { + 'id': 'zomerse-filmpjes', + 'ext': 'mp4', + 'title': 'Gluur mee op de filmset en op Pennenzakkenrock', + 'description': 'Gluur mee met Ghost Rockers op de filmset', + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, { + 'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016', + 'only_matching': True, + }, { + 'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + config = self._parse_json( + self._search_regex( + r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage, + 'player config'), + video_id) + + title = config['title'] + + formats = self._extract_m3u8_formats( + config['source']['hls'], video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': config.get('description'), + 'thumbnail': config.get('image'), + 'series': config.get('program'), + 'episode': config.get('episode'), + 'formats': formats, + } From 6c3affcb18f7eabf7d428e5efe474e0547ab25cb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 10 Sep 2016 20:09:09 +0800 Subject: [PATCH 1625/3599] [newgrounds] Fix uploader extraction Closes #10584 Also change test URLs to HTTPS, as proposed by @stepshal in #10593. Closes #10593 --- ChangeLog | 6 ++++++ youtube_dl/extractor/newgrounds.py | 23 ++++++++--------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index d84f447ba..fafe445cb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [newgrounds] Fix uploader extraction (#10584) + + version 2016.09.08 Extractors diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index 705940323..9bea610c8 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -1,15 +1,12 @@ from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor class NewgroundsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P[0-9]+)' _TESTS = [{ - 'url': 'http://www.newgrounds.com/audio/listen/549479', + 'url': 'https://www.newgrounds.com/audio/listen/549479', 'md5': 'fe6033d297591288fa1c1f780386f07a', 'info_dict': { 'id': '549479', @@ -18,7 +15,7 @@ class NewgroundsIE(InfoExtractor): 'uploader': 'Burn7', } }, { - 'url': 'http://www.newgrounds.com/portal/view/673111', + 'url': 'https://www.newgrounds.com/portal/view/673111', 'md5': '3394735822aab2478c31b1004fe5e5bc', 'info_dict': { 'id': '673111', @@ -29,24 +26,20 @@ class NewgroundsIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - music_id = mobj.group('id') - webpage = self._download_webpage(url, music_id) + media_id = self._match_id(url) + webpage = self._download_webpage(url, media_id) title = self._html_search_regex( r'([^>]+)', webpage, 'title') uploader = self._html_search_regex( - [r',"artist":"([^"]+)",', r'[\'"]owner[\'"]\s*:\s*[\'"]([^\'"]+)[\'"],'], - webpage, 'uploader') + r'Author\s*]+>([^<]+)', webpage, 'uploader', fatal=False) - music_url_json_string = self._html_search_regex( - r'({"url":"[^"]+"),', webpage, 'music url') + '}' - music_url_json = json.loads(music_url_json_string) - music_url = music_url_json['url'] + music_url = self._parse_json(self._search_regex( + r'"url":("[^"]+"),', webpage, ''), media_id) return { - 'id': music_id, + 'id': media_id, 'title': title, 'url': music_url, 'uploader': uploader, From b29f842e0eb095248ff39d1fa28c5b4941793246 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 10 Sep 2016 20:46:45 +0700 Subject: [PATCH 1626/3599] [canalplus] Add support for c8.fr (Closes #10577) --- youtube_dl/extractor/canalplus.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 61463f249..69e8f4f57 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -23,6 +23,7 @@ class CanalplusIE(InfoExtractor): (?:(?:www|m)\.)?canalplus\.fr| (?:www\.)?piwiplus\.fr| (?:www\.)?d8\.tv| + (?:www\.)?c8\.fr| (?:www\.)?d17\.tv| (?:www\.)?itele\.fr )/(?:(?:[^/]+/)*(?P[^/?#&]+))?(?:\?.*\bvid=(?P\d+))?| @@ -35,6 +36,7 @@ class CanalplusIE(InfoExtractor): 'canalplus': 'cplus', 'piwiplus': 'teletoon', 'd8': 'd8', + 'c8': 'd8', 'd17': 'd17', 'itele': 'itele', } From 84a18e9b908eb0b770f03603200026a06f4f08b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 10 Sep 2016 22:01:49 +0700 Subject: [PATCH 1627/3599] [polskieradio:category] Improve extraction --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/polskieradio.py | 158 ++++++++++++++------------- 2 files changed, 84 insertions(+), 79 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6a142996f..96f3d3fcb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -671,7 +671,10 @@ from .pluralsight import ( ) from .podomatic import PodomaticIE from .pokemon import PokemonIE -from .polskieradio import PolskieRadioIE, PolskieRadioProgrammeIE +from .polskieradio import ( + PolskieRadioIE, + PolskieRadioCategoryIE, +) from .porn91 import Porn91IE from .porncom import PornComIE from .pornhd import PornHdIE diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py index c51d3d9be..5ff173774 100644 --- a/youtube_dl/extractor/polskieradio.py +++ b/youtube_dl/extractor/polskieradio.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor @@ -10,90 +11,13 @@ from ..compat import ( compat_urlparse ) from ..utils import ( + extract_attributes, int_or_none, strip_or_none, unified_timestamp, ) -class PolskieRadioProgrammeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(,[^/]+)?/(?P\d+)' - _TESTS = [{ - 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', - 'info_dict': { - 'id': '5102', - 'title': 'HISTORIA ŻYWA', - }, - 'playlist_mincount': 34, - }, { - 'url': 'http://www.polskieradio.pl/7/4807', - 'info_dict': { - 'id': '4807', - 'title': 'Vademecum 1050. rocznicy Chrztu Polski' - }, - 'playlist_mincount': 5 - }, { - 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', - 'only_matching': True - }, { - 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', - 'info_dict': { - 'id': '4143', - 'title': 'Kierunek Kraków', - }, - 'playlist_mincount': 61 - }, { - 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA', - 'only_matching': True - }] - - def _get_entries_from_page_content(self, base_url, content): - entries = [] - - articles = re.findall( - r'
\s+', - content) - for article_id, article_url, _, article_title in articles: - resolved_article_url = compat_urlparse.urljoin(base_url, article_url) - entries.append(self.url_result( - resolved_article_url, - ie='PolskieRadio', - video_id=article_id, - video_title=article_title)) - - return entries - - @classmethod - def suitable(cls, url): - return False if PolskieRadioIE.suitable(url) else super(PolskieRadioProgrammeIE, cls).suitable(url) - - def _real_extract(self, url): - programme_id = self._match_id(url) - webpage = self._download_webpage(url, programme_id) - - title = self._html_search_regex( - r'(.+?)', - webpage, 'title', fatal=False) - description = None - - entries = self._get_entries_from_page_content(url, webpage) - - pages = re.findall(r' 1: - page_url_root = next(url for _, url, _ in pages if len(url) > 0) - for page_number in range(2, page_count + 1): - page_url = page_url_root + str(page_number) - resolved_page_url = compat_urlparse.urljoin(url, page_url) - page_content = self._download_webpage( - resolved_page_url, programme_id, - note="Downloading page number %d" % page_number) - entries.extend(self._get_entries_from_page_content(url, page_content)) - - return self.playlist_result(entries, programme_id, title, description) - - class PolskieRadioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P[0-9]+)' _TESTS = [{ @@ -176,3 +100,81 @@ class PolskieRadioIE(InfoExtractor): description = strip_or_none(self._og_search_description(webpage)) return self.playlist_result(entries, playlist_id, title, description) + + +class PolskieRadioCategoryIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', + 'info_dict': { + 'id': '5102', + 'title': 'HISTORIA ŻYWA', + }, + 'playlist_mincount': 38, + }, { + 'url': 'http://www.polskieradio.pl/7/4807', + 'info_dict': { + 'id': '4807', + 'title': 'Vademecum 1050. rocznicy Chrztu Polski' + }, + 'playlist_mincount': 5 + }, { + 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', + 'only_matching': True + }, { + 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', + 'info_dict': { + 'id': '4143', + 'title': 'Kierunek Kraków', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka', + 'info_dict': { + 'id': '214', + 'title': 'Muzyka', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA', + 'only_matching': True, + }, { + 'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if PolskieRadioIE.suitable(url) else super(PolskieRadioCategoryIE, cls).suitable(url) + + def _entries(self, url, page, category_id): + content = page + for page_num in itertools.count(2): + for a_entry, entry_id in re.findall( + r'(?s)]+>.*?(]+href=["\']/\d+/\d+/Artykul/(\d+)[^>]+>).*?
', + content): + entry = extract_attributes(a_entry) + href = entry.get('href') + if not href: + continue + yield self.url_result( + compat_urlparse.urljoin(url, href), PolskieRadioIE.ie_key(), + entry_id, entry.get('title')) + mobj = re.search( + r']+class=["\']next["\'][^>]*>\s*]+href=(["\'])(?P(?:(?!\1).)+)\1', + content) + if not mobj: + break + next_url = compat_urlparse.urljoin(url, mobj.group('url')) + content = self._download_webpage( + next_url, category_id, 'Downloading page %s' % page_num) + + def _real_extract(self, url): + category_id = self._match_id(url) + webpage = self._download_webpage(url, category_id) + title = self._html_search_regex( + r'([^<]+) - [^<]+ - [^<]+', + webpage, 'title', fatal=False) + return self.playlist_result( + self._entries(url, webpage, category_id), + category_id, title) From 732424375017a033f5b398b0f3dc2c6d47f3d3fd Mon Sep 17 00:00:00 2001 From: Scott Leggett Date: Mon, 5 Sep 2016 22:41:08 +1000 Subject: [PATCH 1628/3599] [9now] Fix extraction --- youtube_dl/extractor/ninenow.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index faa577237..907b42609 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -44,7 +44,14 @@ class NineNowIE(InfoExtractor): page_data = self._parse_json(self._search_regex( r'window\.__data\s*=\s*({.*?});', webpage, 'page data'), display_id) - common_data = page_data.get('episode', {}).get('episode') or page_data.get('clip', {}).get('clip') + current_key = ( + page_data.get('episode', {}).get('currentEpisodeKey') or + page_data.get('clip', {}).get('currentClipKey') + ) + common_data = ( + page_data.get('episode', {}).get('episodeCache', {}).get(current_key, {}).get('episode') or + page_data.get('clip', {}).get('clipCache', {}).get(current_key, {}).get('clip') + ) video_data = common_data['video'] if video_data.get('drm'): From 56c0ead4d3b9f365f0562678504879be8e79b89c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 00:42:13 +0700 Subject: [PATCH 1629/3599] [9now] Improve video data extraction (Closes #10561) --- youtube_dl/extractor/ninenow.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index 907b42609..351bea7ba 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -44,14 +44,20 @@ class NineNowIE(InfoExtractor): page_data = self._parse_json(self._search_regex( r'window\.__data\s*=\s*({.*?});', webpage, 'page data'), display_id) - current_key = ( - page_data.get('episode', {}).get('currentEpisodeKey') or - page_data.get('clip', {}).get('currentClipKey') - ) - common_data = ( - page_data.get('episode', {}).get('episodeCache', {}).get(current_key, {}).get('episode') or - page_data.get('clip', {}).get('clipCache', {}).get(current_key, {}).get('clip') - ) + + for kind in ('episode', 'clip'): + current_key = page_data.get(kind, {}).get( + 'current%sKey' % kind.capitalize()) + if not current_key: + continue + cache = page_data.get(kind, {}).get('%sCache' % kind, {}) + if not cache: + continue + common_data = (cache.get(current_key) or list(cache.values())[0])[kind] + break + else: + raise ExtractorError('Unable to find video data') + video_data = common_data['video'] if video_data.get('drm'): From 2512b17493fced6b469d9610c1ad5c5af52870f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 01:27:20 +0700 Subject: [PATCH 1630/3599] [lrt] Fix audio extraction (Closes #10566) --- youtube_dl/extractor/lrt.py | 46 ++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index 1072405b3..f5c997ef4 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -1,8 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, parse_duration, remove_end, @@ -12,8 +15,10 @@ from ..utils import ( class LRTIE(InfoExtractor): IE_NAME = 'lrt.lt' _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P[0-9]+)' - _TEST = { + _TESTS = [{ + # m3u8 download 'url': 'http://www.lrt.lt/mediateka/irasas/54391/', + 'md5': 'fe44cf7e4ab3198055f2c598fc175cb0', 'info_dict': { 'id': '54391', 'ext': 'mp4', @@ -23,20 +28,45 @@ class LRTIE(InfoExtractor): 'view_count': int, 'like_count': int, }, - 'params': { - 'skip_download': True, # m3u8 download + }, { + # direct mp3 download + 'url': 'http://www.lrt.lt/mediateka/irasas/1013074524/', + 'md5': '389da8ca3cad0f51d12bed0c844f6a0a', + 'info_dict': { + 'id': '1013074524', + 'ext': 'mp3', + 'title': 'Kita tema 2016-09-05 15:05', + 'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5', + 'duration': 3008, + 'view_count': int, + 'like_count': int, }, - } + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = remove_end(self._og_search_title(webpage), ' - LRT') - m3u8_url = self._search_regex( - r'file\s*:\s*(["\'])(?P.+?)\1\s*\+\s*location\.hash\.substring\(1\)', - webpage, 'm3u8 url', group='url') - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + + formats = [] + for _, file_url in re.findall( + r'file\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage): + ext = determine_ext(file_url) + if ext not in ('m3u8', 'mp3'): + continue + # mp3 served as m3u8 produces stuttered media file + if ext == 'm3u8' and '.mp3' in file_url: + continue + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + file_url, video_id, 'mp4', entry_protocol='m3u8_native', + fatal=False)) + elif ext == 'mp3': + formats.append({ + 'url': file_url, + 'vcodec': 'none', + }) self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage) From 1e35999c1e4637174e2532c457431315b5e186d9 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 10 Sep 2016 19:43:09 +0100 Subject: [PATCH 1631/3599] [tfo] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tfo.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 youtube_dl/extractor/tfo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 96f3d3fcb..124e909fb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -870,6 +870,7 @@ from .teletask import TeleTaskIE from .telewebion import TelewebionIE from .testurl import TestURLIE from .tf1 import TF1IE +from .tfo import TFOIE from .theintercept import TheInterceptIE from .theplatform import ( ThePlatformIE, diff --git a/youtube_dl/extractor/tfo.py b/youtube_dl/extractor/tfo.py new file mode 100644 index 000000000..6f1eeac57 --- /dev/null +++ b/youtube_dl/extractor/tfo.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import ( + HEADRequest, + ExtractorError, + int_or_none, +) + + +class TFOIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P\d+)' + _TEST = { + 'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon', + 'md5': '47c987d0515561114cf03d1226a9d4c7', + 'info_dict': { + 'id': '100463871', + 'ext': 'mp4', + 'title': 'Video Game Hackathon', + 'description': 'md5:558afeba217c6c8d96c60e5421795c07', + 'upload_date': '20160212', + 'timestamp': 1455310233, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + self._request_webpage(HEADRequest('http://www.tfo.org/'), video_id) + infos = self._download_json( + 'http://www.tfo.org/api/web/video/get_infos', video_id, data=json.dumps({ + 'product_id': video_id, + }).encode(), headers={ + 'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value, + }) + if infos.get('success') == 0: + raise ExtractorError('%s said: %s' % (self.IE_NAME, infos['msg']), expected=True) + video_data = infos['data'] + + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'limelight:media:' + video_data['llid'], + 'title': video_data['title'], + 'description': video_data.get('description'), + 'series': video_data.get('collection'), + 'season_number': int_or_none(video_data.get('season')), + 'episode_number': int_or_none(video_data.get('episode')), + 'duration': int_or_none(video_data.get('duration')), + 'ie_key': 'LimelightMedia', + } From 001a5fd3d75b311102264cf3920c6aa5b2322e51 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 03:02:00 +0800 Subject: [PATCH 1632/3599] [iwara] Fix extraction after relaunch Closes #10462, closes #3215 --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/iwara.py | 77 ++++++++++++++++++++++++++++++ youtube_dl/extractor/trollvids.py | 36 -------------- 4 files changed, 79 insertions(+), 37 deletions(-) create mode 100644 youtube_dl/extractor/iwara.py delete mode 100644 youtube_dl/extractor/trollvids.py diff --git a/ChangeLog b/ChangeLog index fafe445cb..387dc7bf6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [iwara] Fix extraction after relaunch (#10462, #3215) * [newgrounds] Fix uploader extraction (#10584) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 124e909fb..2e795260e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -395,6 +395,7 @@ from .ivi import ( IviCompilationIE ) from .ivideon import IvideonIE +from .iwara import IwaraIE from .izlesene import IzleseneIE from .jeuxvideo import JeuxVideoIE from .jove import JoveIE @@ -899,7 +900,6 @@ from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE -from .trollvids import TrollvidsIE from .trutv import TruTVIE from .tube8 import Tube8IE from .tubitv import TubiTvIE diff --git a/youtube_dl/extractor/iwara.py b/youtube_dl/extractor/iwara.py new file mode 100644 index 000000000..8d7e7f472 --- /dev/null +++ b/youtube_dl/extractor/iwara.py @@ -0,0 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse +from ..utils import remove_end + + +class IwaraIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P[a-zA-Z0-9]+)' + _TESTS = [{ + 'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD', + 'md5': '1d53866b2c514b23ed69e4352fdc9839', + 'info_dict': { + 'id': 'amVwUl1EHpAD9RD', + 'ext': 'mp4', + 'title': '【MMD R-18】ガールフレンド carry_me_off', + 'age_limit': 18, + }, + }, { + 'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO', + 'md5': '7e5f1f359cd51a027ba4a7b7710a50f0', + 'info_dict': { + 'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc', + 'ext': 'mp4', + 'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4', + 'age_limit': 18, + }, + 'add_ie': ['GoogleDrive'], + }, { + 'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq', + 'md5': '1d85f1e5217d2791626cff5ec83bb189', + 'info_dict': { + 'id': '6liAP9s2Ojc', + 'ext': 'mp4', + 'age_limit': 0, + 'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)', + 'description': 'md5:590c12c0df1443d833fbebe05da8c47a', + 'upload_date': '20160910', + 'uploader': 'aMMDsork', + 'uploader_id': 'UCVOFyOSCyFkXTYYHITtqB7A', + }, + 'add_ie': ['Youtube'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage, urlh = self._download_webpage_handle(url, video_id) + + hostname = compat_urllib_parse_urlparse(urlh.geturl()).hostname + # ecchi is 'sexy' in Japanese + age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0 + + entries = self._parse_html5_media_entries(url, webpage, video_id) + + if not entries: + iframe_url = self._html_search_regex( + r']+src=([\'"])(?P[^\'"]+)\1', + webpage, 'iframe URL', group='url') + return { + '_type': 'url_transparent', + 'url': iframe_url, + 'age_limit': age_limit, + } + + title = remove_end(self._html_search_regex( + r'([^<]+)', webpage, 'title'), ' | Iwara') + + info_dict = entries[0] + info_dict.update({ + 'id': video_id, + 'title': title, + 'age_limit': age_limit, + }) + + return info_dict diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py deleted file mode 100644 index 657705623..000000000 --- a/youtube_dl/extractor/trollvids.py +++ /dev/null @@ -1,36 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -import re - -from .nuevo import NuevoBaseIE - - -class TrollvidsIE(NuevoBaseIE): - _VALID_URL = r'https?://(?:www\.)?trollvids\.com/video/(?P\d+)/(?P[^/?#&]+)' - IE_NAME = 'trollvids' - _TEST = { - 'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff', - 'md5': '1d53866b2c514b23ed69e4352fdc9839', - 'info_dict': { - 'id': '2349002', - 'ext': 'mp4', - 'title': '【MMD R-18】ガールフレンド carry_me_off', - 'age_limit': 18, - 'duration': 216.78, - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - info = self._extract_nuevo( - 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id, - video_id) - info.update({ - 'display_id': display_id, - 'age_limit': 18 - }) - return info From bfcda07a2710738c32f63fdb4e09e177acc53df3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 04:06:00 +0800 Subject: [PATCH 1633/3599] [abc:iview] Skip the test. They are removed soon --- youtube_dl/extractor/abc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index c7b6df7d0..3792bd232 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -100,6 +100,7 @@ class ABCIViewIE(InfoExtractor): IE_NAME = 'abc.net.au:iview' _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P[^/?#]+)' + # ABC iview programs are normally available for 14 days only. _TESTS = [{ 'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00', 'md5': '979d10b2939101f0d27a06b79edad536', @@ -112,6 +113,7 @@ class ABCIViewIE(InfoExtractor): 'uploader_id': 'abc1', 'timestamp': 1471719600, }, + 'skip': 'Video gone', }] def _real_extract(self, url): From 2cb93afcd8a8a1f086a97ef3791fa033ddc1610a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 14:59:14 +0700 Subject: [PATCH 1634/3599] [viafree] Improve video id extraction (Closes #10615) --- youtube_dl/extractor/tvplay.py | 36 +++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index c0fec2594..5548ff2ac 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -16,6 +16,7 @@ from ..utils import ( parse_iso8601, qualities, try_get, + js_to_json, update_url_query, ) @@ -367,6 +368,10 @@ class ViafreeIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [TVPlayIE.ie_key()], + }, { + # Different og:image URL schema + 'url': 'www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', + 'only_matching': True, }, { 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', 'only_matching': True, @@ -384,14 +389,35 @@ class ViafreeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + data = self._parse_json( + self._search_regex( + r'(?s)window\.App\s*=\s*({.+?})\s*;\s* Date: Sun, 11 Sep 2016 18:32:45 +0800 Subject: [PATCH 1635/3599] [foxnews] Support Fox News Articles (closes #10598) --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/foxnews.py | 40 +++++++++++++++++++++++++++--- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 387dc7bf6..a73a35e88 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [foxnews] Support Fox News articles (#10598) * [iwara] Fix extraction after relaunch (#10462, #3215) * [newgrounds] Fix uploader extraction (#10584) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2e795260e..e9027fb69 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -292,6 +292,7 @@ from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE from .foxnews import ( + FoxNewsVideoIE, FoxNewsIE, FoxNewsInsiderIE, ) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 5c7acd795..3e9a6a08c 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -6,7 +6,8 @@ from .amp import AMPIE from .common import InfoExtractor -class FoxNewsIE(AMPIE): +class FoxNewsVideoIE(AMPIE): + IE_NAME = 'foxnews:video' IE_DESC = 'Fox News and Fox Business Video' _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ @@ -66,6 +67,35 @@ class FoxNewsIE(AMPIE): return info +class FoxNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P[a-z-]+)' + IE_NAME = 'foxnews' + + _TEST = { + 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', + 'md5': '62aa5a781b308fdee212ebb6f33ae7ef', + 'info_dict': { + 'id': '5116295019001', + 'ext': 'mp4', + 'title': 'Trump and Clinton asked to defend positions on Iraq War', + 'description': 'Veterans react on \'The Kelly File\'', + 'timestamp': 1473299755, + 'upload_date': '20160908', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + video_id = self._html_search_regex( + r'data-video-id=([\'"])(?P[^\'"]+)\1', + webpage, 'video ID', group='id') + return self.url_result( + 'http://video.foxnews.com/v/' + video_id, + FoxNewsVideoIE.ie_key()) + + class FoxNewsInsiderIE(InfoExtractor): _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P[a-z-]+)' IE_NAME = 'foxnews:insider' @@ -83,7 +113,11 @@ class FoxNewsInsiderIE(InfoExtractor): 'upload_date': '20160825', 'thumbnail': 're:^https?://.*\.jpg$', }, - 'add_ie': [FoxNewsIE.ie_key()], + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': [FoxNewsVideoIE.ie_key()], } def _real_extract(self, url): @@ -98,7 +132,7 @@ class FoxNewsInsiderIE(InfoExtractor): return { '_type': 'url_transparent', - 'ie_key': FoxNewsIE.ie_key(), + 'ie_key': FoxNewsVideoIE.ie_key(), 'url': embed_url, 'display_id': display_id, 'title': title, From f01115c933bdf6a3d741bb2f306d26b4df943a40 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 18:36:59 +0800 Subject: [PATCH 1636/3599] [openload] Temporary fix (#10408) --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 03baf8e32..76316ca2f 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -60,7 +60,7 @@ class OpenloadIE(InfoExtractor): if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 if idx == len(enc_data) - 1: - j += 1 + j += 3 video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From fea74acad8e8ebc1fda1d24a10c085c6771a71be Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 18:53:05 +0800 Subject: [PATCH 1637/3599] [foxnews] Revert to old extractor names --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/foxnews.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e9027fb69..a3cd9c289 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -292,8 +292,8 @@ from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE from .foxnews import ( - FoxNewsVideoIE, FoxNewsIE, + FoxNewsArticleIE, FoxNewsInsiderIE, ) from .foxsports import FoxSportsIE diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 3e9a6a08c..229bcb175 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -6,8 +6,8 @@ from .amp import AMPIE from .common import InfoExtractor -class FoxNewsVideoIE(AMPIE): - IE_NAME = 'foxnews:video' +class FoxNewsIE(AMPIE): + IE_NAME = 'foxnews' IE_DESC = 'Fox News and Fox Business Video' _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ @@ -67,9 +67,9 @@ class FoxNewsVideoIE(AMPIE): return info -class FoxNewsIE(InfoExtractor): +class FoxNewsArticleIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P[a-z-]+)' - IE_NAME = 'foxnews' + IE_NAME = 'foxnews:article' _TEST = { 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', @@ -93,7 +93,7 @@ class FoxNewsIE(InfoExtractor): webpage, 'video ID', group='id') return self.url_result( 'http://video.foxnews.com/v/' + video_id, - FoxNewsVideoIE.ie_key()) + FoxNewsIE.ie_key()) class FoxNewsInsiderIE(InfoExtractor): @@ -117,7 +117,7 @@ class FoxNewsInsiderIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - 'add_ie': [FoxNewsVideoIE.ie_key()], + 'add_ie': [FoxNewsIE.ie_key()], } def _real_extract(self, url): @@ -132,7 +132,7 @@ class FoxNewsInsiderIE(InfoExtractor): return { '_type': 'url_transparent', - 'ie_key': FoxNewsVideoIE.ie_key(), + 'ie_key': FoxNewsIE.ie_key(), 'url': embed_url, 'display_id': display_id, 'title': title, From 6bb05b32a990b8fb961971fcb8110d292cf953e7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 19:22:51 +0800 Subject: [PATCH 1638/3599] [pornhub] Extract categories and tags (closes #10499) --- ChangeLog | 1 + youtube_dl/extractor/pornhub.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/ChangeLog b/ChangeLog index a73a35e88..5d6609987 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [pornhub] Extract categories and tags (#10499) + [foxnews] Support Fox News articles (#10598) * [iwara] Fix extraction after relaunch (#10462, #3215) * [newgrounds] Fix uploader extraction (#10584) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 20976c101..0724efc09 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -15,6 +15,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + js_to_json, orderedSet, sanitized_Request, str_to_int, @@ -48,6 +49,8 @@ class PornHubIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'age_limit': 18, + 'tags': list, + 'categories': list, }, }, { # non-ASCII title @@ -63,6 +66,8 @@ class PornHubIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'age_limit': 18, + 'tags': list, + 'categories': list, }, 'params': { 'skip_download': True, @@ -183,6 +188,15 @@ class PornHubIE(InfoExtractor): }) self._sort_formats(formats) + page_params = self._parse_json(self._search_regex( + r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P{[^}]+})', + webpage, 'page parameters', group='data', default='{}'), + video_id, transform_source=js_to_json, fatal=False) + tags = categories = None + if page_params: + tags = page_params.get('tags', '').split(',') + categories = page_params.get('categories', '').split(',') + return { 'id': video_id, 'uploader': video_uploader, @@ -195,6 +209,8 @@ class PornHubIE(InfoExtractor): 'comment_count': comment_count, 'formats': formats, 'age_limit': 18, + 'tags': tags, + 'categories': categories, } From 6599c72527ca8434589c010c48164494ab4c2469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 22:50:36 +0700 Subject: [PATCH 1639/3599] [tube8] Extract categories and tags (Closes #10579) --- youtube_dl/extractor/tube8.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 4053f6c21..e937b2396 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from ..utils import ( int_or_none, str_to_int, @@ -21,7 +23,13 @@ class Tube8IE(KeezMoviesIE): 'title': 'Kasia music video', 'age_limit': 18, 'duration': 230, + 'categories': ['Teen'], + 'tags': ['dancing'], + }, + 'params': { + 'proxy': '127.0.0.1:8118', } + }, { 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', 'only_matching': True, @@ -51,6 +59,17 @@ class Tube8IE(KeezMoviesIE): r'(\d+)', webpage, 'comment count', fatal=False)) + category = self._search_regex( + r'Category:\s*\s*]+href=[^>]+>([^<]+)', + webpage, 'category', fatal=False) + categories = [category] if category else None + + tags_str = self._search_regex( + r'(?s)Tags:\s*(.+?)]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None + info.update({ 'description': description, 'uploader': uploader, @@ -58,6 +77,8 @@ class Tube8IE(KeezMoviesIE): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, + 'categories': categories, + 'tags': tags, }) return info From bc9186c8822db456dae93d053a34e60b7887405a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 22:51:12 +0700 Subject: [PATCH 1640/3599] [tvplay] Remove unused import --- youtube_dl/extractor/tvplay.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 5548ff2ac..58ffc0e6f 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -16,7 +16,6 @@ from ..utils import ( parse_iso8601, qualities, try_get, - js_to_json, update_url_query, ) From 1c81476cbb167776e7b1454bf135fb7ebf62547f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:20:09 +0700 Subject: [PATCH 1641/3599] release 2016.09.11 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 9 +++++++-- youtube_dl/version.py | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a983bf432..d7195712b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.08 +[debug] youtube-dl version 2016.09.11 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 5d6609987..21d9f6275 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.11 Extractors + [pornhub] Extract categories and tags (#10499) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e6be746a8..7a7b268d3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -247,7 +247,8 @@ - **Formula1** - **FOX** - **Foxgay** - - **FoxNews**: Fox News and Fox Business Video + - **foxnews**: Fox News and Fox Business Video + - **foxnews:article** - **foxnews:insider** - **FoxSports** - **france2.fr:generation-quoi** @@ -326,6 +327,7 @@ - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations - **ivideon**: Ivideon TV + - **Iwara** - **Izlesene** - **JeuxVideo** - **Jove** @@ -339,6 +341,7 @@ - **KarriereVideos** - **keek** - **KeezMovies** + - **Ketnet** - **KhanAcademy** - **KickStarter** - **KonserthusetPlay** @@ -540,6 +543,7 @@ - **podomatic** - **Pokemon** - **PolskieRadio** + - **PolskieRadioCategory** - **PornCom** - **PornHd** - **PornHub**: PornHub and Thumbzilla @@ -701,9 +705,11 @@ - **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telegraaf** - **TeleMB** + - **TeleQuebec** - **TeleTask** - **Telewebion** - **TF1** + - **TFO** - **TheIntercept** - **ThePlatform** - **ThePlatformFeed** @@ -725,7 +731,6 @@ - **ToypicsUser**: Toypics user profile - **TrailerAddict** (Currently broken) - **Trilulilu** - - **trollvids** - **TruTV** - **Tube8** - **TubiTv** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 941ffb3f6..5f572391c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.08' +__version__ = '2016.09.11' From eb87d4545a58be369723eddf5433b4198d64d367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:29:25 +0700 Subject: [PATCH 1642/3599] [devscripts/release.sh] Add ChangeLog reminder prompt --- devscripts/release.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/devscripts/release.sh b/devscripts/release.sh index ca6ae1b49..981d37ca7 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -60,6 +60,9 @@ if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; e if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi +read -p "Is ChangeLog up to date? (y/n) " -n 1 +if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; + /bin/echo -e "\n### First of all, testing..." make clean if $skip_tests ; then From d667ab7fad8d04a318b54e95d7a764e1667d80bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:30:18 +0700 Subject: [PATCH 1643/3599] [ChangeLog] Actualize --- ChangeLog | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 21d9f6275..9183f29e8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,22 @@ -version 2016.09.11 +version Extractors ++ [tube8] Extract categories and tags (#10579) + [pornhub] Extract categories and tags (#10499) -+ [foxnews] Support Fox News articles (#10598) +* [openload] Temporary fix (#10408) ++ [foxnews] Add support Fox News articles (#10598) +* [viafree] Improve video id extraction (#10615) * [iwara] Fix extraction after relaunch (#10462, #3215) ++ [tfo] Add extractor for tfo.org +* [lrt] Fix audio extraction (#10566) +* [9now] Fix extraction (#10561) ++ [canalplus] Add support for c8.fr (#10577) * [newgrounds] Fix uploader extraction (#10584) ++ [polskieradio:category] Add support for category lists (#10576) ++ [ketnet] Add extractor for ketnet.be (#10343) ++ [canvas] Add support for een.be (#10605) ++ [telequebec] Add extractor for telequebec.tv (#1999) +* [parliamentliveuk] Fix extraction (#9137) version 2016.09.08 From fc150cba1d6763ab115319c5726b5081b0f49106 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:32:01 +0700 Subject: [PATCH 1644/3599] [devscripts/release.sh] Add missing fi --- devscripts/release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 981d37ca7..1af61aa0b 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -61,7 +61,7 @@ if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missi if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi read -p "Is ChangeLog up to date? (y/n) " -n 1 -if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; +if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi /bin/echo -e "\n### First of all, testing..." make clean From 0307d6fba6d3b793acac5785b2cee39e3dfbffcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:33:20 +0700 Subject: [PATCH 1645/3599] release 2016.09.11.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index d7195712b..e87fed573 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.11 +[debug] youtube-dl version 2016.09.11.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 9183f29e8..669544815 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.11.1 Extractors + [tube8] Extract categories and tags (#10579) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5f572391c..903aede58 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.11' +__version__ = '2016.09.11.1' From ee7e672eb0eca7a916845b359511262935f9ef1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:44:22 +0700 Subject: [PATCH 1646/3599] [tube8] Remove proxy settings from test --- youtube_dl/extractor/tube8.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index e937b2396..1853a1104 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -26,10 +26,6 @@ class Tube8IE(KeezMoviesIE): 'categories': ['Teen'], 'tags': ['dancing'], }, - 'params': { - 'proxy': '127.0.0.1:8118', - } - }, { 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', 'only_matching': True, From be457302267b456412fb9848bcb8ce36874d8d7e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 12 Sep 2016 02:55:15 +0800 Subject: [PATCH 1647/3599] [nbc] Add new extractor for NBC Olympics (#10295, #10361) --- ChangeLog | 6 +++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nbc.py | 40 ++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/ChangeLog b/ChangeLog index 669544815..46eea0626 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [nbc] Add support for NBC Olympics (#10361) + + version 2016.09.11.1 Extractors diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a3cd9c289..522691de1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -534,6 +534,7 @@ from .nbc import ( CSNNEIE, NBCIE, NBCNewsIE, + NBCOlympicsIE, NBCSportsIE, NBCSportsVPlayerIE, ) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index f694e210b..f37bf2f30 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -335,3 +335,43 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id, 'ie_key': 'ThePlatformFeed', } + + +class NBCOlympicsIE(InfoExtractor): + _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P[a-z-]+)' + + _TEST = { + # Geo-restricted to US + 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold', + 'md5': '54fecf846d05429fbaa18af557ee523a', + 'info_dict': { + 'id': 'WjTBzDXx5AUq', + 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold', + 'ext': 'mp4', + 'title': 'Rose\'s son Leo was in tears after his dad won gold', + 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.', + 'timestamp': 1471274964, + 'upload_date': '20160815', + 'uploader': 'NBCU-SPORTS', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + + iframe_url = drupal_settings['vod']['iframe_url'] + theplatform_url = iframe_url.replace( + 'vplayer.nbcolympics.com', 'player.theplatform.com') + + return { + '_type': 'url_transparent', + 'url': theplatform_url, + 'ie_key': ThePlatformIE.ie_key(), + 'display_id': display_id, + } From 546edb2efabb18f9eb0eecb2f8719fcb777e99a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 21:01:31 +0700 Subject: [PATCH 1648/3599] [ISSUE_TEMPLATE_tmpl.md] Fix typo --- .github/ISSUE_TEMPLATE_tmpl.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index a5e6a4233..4112f53bb 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -55,4 +55,4 @@ $ youtube-dl -v ### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* required an account credentials please provide them or explain how one can obtain them. +If work on your *issue* requires an account credentials please provide them or explain how one can obtain them. From d002e919863c910e52c623ee544e93fe41af4665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 21:48:45 +0700 Subject: [PATCH 1649/3599] [vimeo:ondemand] Pass Referer along with embed URL (#10624) --- youtube_dl/extractor/vimeo.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 7e854f326..50aacc6ac 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -350,6 +350,10 @@ class VimeoIE(VimeoBaseInfoExtractor): } ] + @staticmethod + def _smuggle_referrer(url, referrer_url): + return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) + @staticmethod def _extract_vimeo_url(url, webpage): # Look for embedded (iframe) Vimeo player @@ -357,8 +361,7 @@ class VimeoIE(VimeoBaseInfoExtractor): r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) if mobj: player_url = unescapeHTML(mobj.group('url')) - surl = smuggle_url(player_url, {'http_headers': {'Referer': url}}) - return surl + return VimeoIE._smuggle_referrer(player_url, url) # Look for embedded (swf embed) Vimeo player mobj = re.search( r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) @@ -585,6 +588,20 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/gumfilms', 'uploader_id': 'gumfilms', }, + }, { + # requires Referer to be passed along with og:video:url + 'url': 'https://vimeo.com/ondemand/36938/126682985', + 'info_dict': { + 'id': '126682985', + 'ext': 'mp4', + 'title': 'Rävlock, rätt läte på rätt plats', + 'uploader': 'Lindroth & Norin', + 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user14430847', + 'uploader_id': 'user14430847', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://vimeo.com/ondemand/nazmaalik', 'only_matching': True, @@ -599,7 +616,12 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - return self.url_result(self._og_search_video_url(webpage), VimeoIE.ie_key()) + return self.url_result( + # Some videos require Referer to be passed along with og:video:url + # similarly to generic vimeo embeds (e.g. + # https://vimeo.com/ondemand/36938/126682985). + VimeoIE._smuggle_referrer(self._og_search_video_url(webpage), url), + VimeoIE.ie_key()) class VimeoChannelIE(VimeoBaseInfoExtractor): From a5ff05df1af97613c979f85ab2f6f610f60be910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 21:49:31 +0700 Subject: [PATCH 1650/3599] [extractor/generic] Add vimeo embed that requires Referer passed --- youtube_dl/extractor/generic.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 24b217715..2e46ca179 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1369,6 +1369,11 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Vimeo'], }, + { + # generic vimeo embed that requires original URL passed as Referer + 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/', + 'only_matching': True, + }, { 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video', 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365', From e8bcd982ccee87e45a5cc8b116cc4452c81b0453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 22:33:00 +0700 Subject: [PATCH 1651/3599] [kaltura] Skip chun format --- youtube_dl/extractor/kaltura.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 6a8464998..22a06e4ae 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -262,6 +262,10 @@ class KalturaIE(InfoExtractor): # Continue if asset is not ready if f.get('status') != 2: continue + # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g) + # skip for now. + if f.get('fileExt') == 'chun': + continue video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) formats.append({ From 1d16035bb4ec516d25326ce5ff35affb4ff1f13c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 22:43:45 +0700 Subject: [PATCH 1652/3599] [kaltura] Improve audio detection --- youtube_dl/extractor/kaltura.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 22a06e4ae..5a8403777 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -268,6 +268,10 @@ class KalturaIE(InfoExtractor): continue video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) + # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g + # -f mp4-56) + vcodec = 'none' if 'videoCodecId' not in f and f.get( + 'frameRate') == 0 else f.get('videoCodecId') formats.append({ 'format_id': '%(fileExt)s-%(bitrate)s' % f, 'ext': f.get('fileExt'), @@ -275,7 +279,7 @@ class KalturaIE(InfoExtractor): 'fps': int_or_none(f.get('frameRate')), 'filesize_approx': int_or_none(f.get('size'), invscale=1024), 'container': f.get('containerFormat'), - 'vcodec': f.get('videoCodecId'), + 'vcodec': vcodec, 'height': int_or_none(f.get('height')), 'width': int_or_none(f.get('width')), 'url': video_url, From a6ccc3e518eabf61cc41575e52361d5ea79e3796 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 23:05:52 +0700 Subject: [PATCH 1653/3599] [safari] Improve ids regexes (#10617) --- youtube_dl/extractor/safari.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 08ddbe3c4..eabe41efe 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -103,13 +103,13 @@ class SafariIE(SafariBaseIE): webpage = self._download_webpage(url, video_id) reference_id = self._search_regex( - r'data-reference-id=(["\'])(?P.+?)\1', + r'data-reference-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura reference id', group='id') partner_id = self._search_regex( - r'data-partner-id=(["\'])(?P.+?)\1', + r'data-partner-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura widget id', group='id') ui_id = self._search_regex( - r'data-ui-id=(["\'])(?P.+?)\1', + r'data-ui-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura uiconf id', group='id') query = { From fcba157e8049350c5386cc3b850626320d9ff7eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 23:29:43 +0700 Subject: [PATCH 1654/3599] [ISSUE_TEMPLATE_tmpl.md] Fix typo --- .github/ISSUE_TEMPLATE_tmpl.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index 4112f53bb..ab9968129 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -55,4 +55,4 @@ $ youtube-dl -v ### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* requires an account credentials please provide them or explain how one can obtain them. +If work on your *issue* requires account credentials please provide them or explain how one can obtain them. From 7a7309219cae70e14f58e904591a77360bfbc985 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 12 Sep 2016 23:39:11 +0100 Subject: [PATCH 1655/3599] [adobepass] add an option to specify mso_id and support for ROGERS TV Provider(closes #10606) --- youtube_dl/YoutubeDL.py | 1 + youtube_dl/__init__.py | 1 + youtube_dl/extractor/adobepass.py | 49 +++++++++++++++++++++++-------- youtube_dl/options.py | 4 +++ 4 files changed, 42 insertions(+), 13 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 805733fb7..f70d5f49a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,6 +131,7 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. + ap_mso_id Adobe Pass Multiple-system operator Identifier. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 42128272a..2b1b841c9 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -293,6 +293,7 @@ def _real_main(argv=None): 'password': opts.password, 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, + 'ap_mso_id': opts.ap_mso_id, 'quiet': (opts.quiet or any_getting or any_printing), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 68ec37e00..454a6af8d 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -6,10 +6,12 @@ import time import xml.etree.ElementTree as etree from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( unescapeHTML, urlencode_postdata, unified_timestamp, + ExtractorError, ) @@ -41,6 +43,11 @@ class AdobePassIE(InfoExtractor): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) return token_expires and token_expires <= int(time.time()) + def raise_mvpd_required(): + raise ExtractorError('This video is only available for users of participating TV providers. ' + 'Use --ap-mso-id to specify Adobe Pass Multiple-system operator Identifier ' + 'and --netrc to provide account credentials.', expected=True) + mvpd_headers = { 'ap_42': 'anonymous', 'ap_11': 'Linux i686', @@ -55,19 +62,26 @@ class AdobePassIE(InfoExtractor): authn_token = None if not authn_token: # TODO add support for other TV Providers - mso_id = 'DTV' + mso_id = self._downloader.params.get('ap_mso_id') + if not mso_id: + raise_mvpd_required() username, password = self._get_netrc_login_info(mso_id) if not username or not password: - return '' + return raise_mvpd_required() - def post_form(form_page, note, data={}): + def post_form(form_page_res, note, data={}): + form_page, urlh = form_page_res post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') - return self._download_webpage( - post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={ + if not re.match(r'https?://', post_url): + post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) + form_data = self._hidden_inputs(form_page) + form_data.update(data) + return self._download_webpage_handle( + post_url, video_id, note, data=urlencode_postdata(form_data), headers={ 'Content-Type': 'application/x-www-form-urlencoded', }) - provider_redirect_page = self._download_webpage( + provider_redirect_page_res = self._download_webpage_handle( self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, 'Downloading Provider Redirect Page', query={ 'noflash': 'true', @@ -77,13 +91,22 @@ class AdobePassIE(InfoExtractor): 'domain_name': 'adobe.com', 'redirect_url': url, }) - provider_login_page = post_form( - provider_redirect_page, 'Downloading Provider Login Page') - mvpd_confirm_page = post_form(provider_login_page, 'Logging in', { - 'username': username, - 'password': password, - }) - post_form(mvpd_confirm_page, 'Confirming Login') + provider_login_page_res = post_form( + provider_redirect_page_res, 'Downloading Provider Login Page') + login_data = {} + if mso_id == 'DTV': + login_data = { + 'username': username, + 'password': password, + } + elif mso_id == 'Rogers': + login_data = { + 'UserName': username, + 'UserPassword': password, + } + mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', login_data) + if mso_id == 'DTV': + post_form(mvpd_confirm_page_res, 'Confirming Login') session = self._download_webpage( self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 56f312f57..c4057ce59 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -350,6 +350,10 @@ def parseOpts(overrideArguments=None): '--video-password', dest='videopassword', metavar='PASSWORD', help='Video password (vimeo, smotri, youku)') + authentication.add_option( + '--ap-mso-id', + dest='ap_mso_id', metavar='APMSOID', + help='Adobe Pass Multiple-system operator Identifier(DTV, Rogers)') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From 45396dd2ed3bc7ab9ac6f9b5a5f51179b629abb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Sep 2016 23:20:25 +0700 Subject: [PATCH 1656/3599] [nhk] Fix extraction (Closes #10633) --- youtube_dl/extractor/nhk.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 691bdfa4e..5c8cd76dc 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -1,14 +1,15 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ExtractorError class NhkVodIE(InfoExtractor): - _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P.+?)\.html' + _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P[^/]+/[^/?#&]+)' _TEST = { # Videos available only for a limited period of time. Visit # http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples. - 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815.html', + 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815', 'info_dict': { 'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5', 'ext': 'flv', @@ -19,25 +20,25 @@ class NhkVodIE(InfoExtractor): }, 'skip': 'Videos available only for a limited period of time', } + _API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + data = self._download_json(self._API_URL, video_id) - embed_code = self._search_regex( - r'nw_vod_ooplayer\([^,]+,\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'ooyala embed code', group='id') + try: + episode = next( + e for e in data['data']['episodes'] + if e.get('url') and video_id in e['url']) + except StopIteration: + raise ExtractorError('Unable to find episode') - title = self._search_regex( - r']+class=["\']episode-detail["\']>\s*([^<]+)', - webpage, 'title', default=None) - description = self._html_search_regex( - r'(?s)]+class=["\']description["\'][^>]*>(.+?)

', - webpage, 'description', default=None) - series = self._search_regex( - r']+class=["\']detail-top-player-title[^>]+>]+>([^<]+)', - webpage, 'series', default=None) + embed_code = episode['vod_id'] + + title = episode.get('sub_title_clean') or episode['sub_title'] + description = episode.get('description_clean') or episode.get('description') + series = episode.get('title_clean') or episode.get('title') return { '_type': 'url_transparent', From 8414c2da31a5ff3cc5ba84fdd537d714d04949f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Sep 2016 23:22:16 +0700 Subject: [PATCH 1657/3599] [adobepass] PEP 8 --- youtube_dl/extractor/adobepass.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 454a6af8d..50a208085 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -44,7 +44,8 @@ class AdobePassIE(InfoExtractor): return token_expires and token_expires <= int(time.time()) def raise_mvpd_required(): - raise ExtractorError('This video is only available for users of participating TV providers. ' + raise ExtractorError( + 'This video is only available for users of participating TV providers. ' 'Use --ap-mso-id to specify Adobe Pass Multiple-system operator Identifier ' 'and --netrc to provide account credentials.', expected=True) From 1b6712ab2378b2e8eb59f372fb51193f8d3bdc97 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 13 Sep 2016 22:16:01 +0100 Subject: [PATCH 1658/3599] [adobepass] add specific options for adobe pass authentication - add --ap-username and --ap-password option to specify TV provider username and password in the cmd line - add --ap-retries option to limit the number of retries - add --list-ap-msi-ids to list the supported TV Providers --- youtube_dl/YoutubeDL.py | 4 +- youtube_dl/__init__.py | 15 +++ youtube_dl/extractor/adobepass.py | 206 ++++++++++++++++-------------- youtube_dl/extractor/common.py | 10 +- youtube_dl/options.py | 24 +++- 5 files changed, 155 insertions(+), 104 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f70d5f49a..9c2c26280 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,7 +131,9 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. - ap_mso_id Adobe Pass Multiple-system operator Identifier. + ap_mso_id: Adobe Pass Multiple-system operator Identifier. + ap_username: TV Provider username for authentication purposes. + ap_password: TV Provider password for authentication purposes. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 2b1b841c9..052f20ee7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -34,12 +34,14 @@ from .utils import ( setproctitle, std_headers, write_string, + render_table, ) from .update import update_self from .downloader import ( FileDownloader, ) from .extractor import gen_extractors, list_extractors +from .extractor.adobepass import MSO_INFO from .YoutubeDL import YoutubeDL @@ -118,18 +120,26 @@ def _real_main(argv=None): desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) write_string(desc + '\n', out=sys.stdout) sys.exit(0) + if opts.list_ap_mso_ids: + table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] + write_string('Supported TV Providers:\n' + render_table(['mso id', 'mso name'], table) + '\n', out=sys.stdout) + sys.exit(0) # Conflicting, missing and erroneous options if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error('using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: parser.error('account username missing\n') + if opts.ap_password is not None and opts.ap_username is None: + parser.error('TV Provider account username missing\n') if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): parser.error('using output template conflicts with using title, video ID or auto number') if opts.usetitle and opts.useid: parser.error('using title conflicts with using video ID') if opts.username is not None and opts.password is None: opts.password = compat_getpass('Type account password and press [Return]: ') + if opts.ap_username is not None and opts.ap_password is None: + opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ') if opts.ratelimit is not None: numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) if numeric_limit is None: @@ -169,6 +179,8 @@ def _real_main(argv=None): opts.retries = parse_retries(opts.retries) if opts.fragment_retries is not None: opts.fragment_retries = parse_retries(opts.fragment_retries) + if opts.ap_retries is not None: + opts.ap_retries = parse_retries(opts.ap_retries) if opts.buffersize is not None: numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) if numeric_buffersize is None: @@ -294,6 +306,9 @@ def _real_main(argv=None): 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, 'ap_mso_id': opts.ap_mso_id, + 'ap_username': opts.ap_username, + 'ap_password': opts.ap_password, + 'ap_retries': opts.ap_retries, 'quiet': (opts.quiet or any_getting or any_printing), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 50a208085..9add6c0f8 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -15,6 +15,20 @@ from ..utils import ( ) +MSO_INFO = { + 'DTV': { + 'name': 'DirecTV', + 'username_field': 'username', + 'password_field': 'password', + }, + 'Rogers': { + 'name': 'Rogers Cable', + 'username_field': 'UserName', + 'password_field': 'UserPassword', + }, +} + + class AdobePassIE(InfoExtractor): _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' @@ -43,6 +57,18 @@ class AdobePassIE(InfoExtractor): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) return token_expires and token_expires <= int(time.time()) + def post_form(form_page_res, note, data={}): + form_page, urlh = form_page_res + post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') + if not re.match(r'https?://', post_url): + post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) + form_data = self._hidden_inputs(form_page) + form_data.update(data) + return self._download_webpage_handle( + post_url, video_id, note, data=urlencode_postdata(form_data), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + def raise_mvpd_required(): raise ExtractorError( 'This video is only available for users of participating TV providers. ' @@ -57,105 +83,95 @@ class AdobePassIE(InfoExtractor): } guid = xml_text(resource, 'guid') - requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} - authn_token = requestor_info.get('authn_token') - if authn_token and is_expired(authn_token, 'simpleTokenExpires'): - authn_token = None - if not authn_token: - # TODO add support for other TV Providers - mso_id = self._downloader.params.get('ap_mso_id') - if not mso_id: - raise_mvpd_required() - username, password = self._get_netrc_login_info(mso_id) - if not username or not password: - return raise_mvpd_required() + retries = self._downloader.params.get('ap_retries', 3) + count = 0 + while count < retries: + requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} + authn_token = requestor_info.get('authn_token') + if authn_token and is_expired(authn_token, 'simpleTokenExpires'): + authn_token = None + if not authn_token: + # TODO add support for other TV Providers + mso_id = self._downloader.params.get('ap_mso_id') + if not mso_id: + raise_mvpd_required() + if mso_id not in MSO_INFO: + raise ExtractorError( + 'Unsupported TV Provider, use --list-ap-mso-ids to get a list of supported TV Providers' % mso_id, expected=True) + username, password = self._get_login_info('ap_username', 'ap_password', mso_id) + if not username or not password: + raise_mvpd_required() + mso_info = MSO_INFO[mso_id] - def post_form(form_page_res, note, data={}): - form_page, urlh = form_page_res - post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') - if not re.match(r'https?://', post_url): - post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) - form_data = self._hidden_inputs(form_page) - form_data.update(data) - return self._download_webpage_handle( - post_url, video_id, note, data=urlencode_postdata(form_data), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', + provider_redirect_page_res = self._download_webpage_handle( + self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, + 'Downloading Provider Redirect Page', query={ + 'noflash': 'true', + 'mso_id': mso_id, + 'requestor_id': requestor_id, + 'no_iframe': 'false', + 'domain_name': 'adobe.com', + 'redirect_url': url, }) - - provider_redirect_page_res = self._download_webpage_handle( - self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, - 'Downloading Provider Redirect Page', query={ - 'noflash': 'true', - 'mso_id': mso_id, - 'requestor_id': requestor_id, - 'no_iframe': 'false', - 'domain_name': 'adobe.com', - 'redirect_url': url, + provider_login_page_res = post_form( + provider_redirect_page_res, 'Downloading Provider Login Page') + mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { + mso_info['username_field']: username, + mso_info['password_field']: password, }) - provider_login_page_res = post_form( - provider_redirect_page_res, 'Downloading Provider Login Page') - login_data = {} - if mso_id == 'DTV': - login_data = { - 'username': username, - 'password': password, - } - elif mso_id == 'Rogers': - login_data = { - 'UserName': username, - 'UserPassword': password, - } - mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', login_data) - if mso_id == 'DTV': - post_form(mvpd_confirm_page_res, 'Confirming Login') + if mso_id == 'DTV': + post_form(mvpd_confirm_page_res, 'Confirming Login') - session = self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, - 'Retrieving Session', data=urlencode_postdata({ - '_method': 'GET', + session = self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, + 'Retrieving Session', data=urlencode_postdata({ + '_method': 'GET', + 'requestor_id': requestor_id, + }), headers=mvpd_headers) + if '' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$') def _scrub_eq(o): @@ -350,10 +350,28 @@ def parseOpts(overrideArguments=None): '--video-password', dest='videopassword', metavar='PASSWORD', help='Video password (vimeo, smotri, youku)') - authentication.add_option( + + adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options') + adobe_pass.add_option( '--ap-mso-id', dest='ap_mso_id', metavar='APMSOID', - help='Adobe Pass Multiple-system operator Identifier(DTV, Rogers)') + help='Adobe Pass Multiple-system operator Identifier') + adobe_pass.add_option( + '--ap-username', + dest='ap_username', metavar='APUSERNAME', + help='TV Provider Login with this account ID') + adobe_pass.add_option( + '--ap-password', + dest='ap_password', metavar='APPASSWORD', + help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') + adobe_pass.add_option( + '--list-ap-mso-ids', + action='store_true', dest='list_ap_mso_ids', default=False, + help='List all supported TV Providers') + adobe_pass.add_option( + '--ap-retries', + dest='ap_retries', metavar='APRETRIES', default=3, + help='Number of retries for Adobe Pass Authorization requests') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From 4875ff68476ff7de9733c80effb652fc6ab07ea0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 14 Sep 2016 22:01:31 +0800 Subject: [PATCH 1659/3599] [bilibili] Remove copyrighted test cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I can't find any English or Chinese material that claims BiliBili has bought legal redistribution permissions for copyrighted products from copyrighted holders. References for removed test cases: "刀语": https://en.wikipedia.org/wiki/Katanagatari, by White Fox "哆啦A梦": https://en.wikipedia.org/wiki/Doraemon, by Shin-Ei Animation "岳父岳母真难当": https://en.wikipedia.org/wiki/Serial_(Bad)_Weddings, by Les films du 24 "混沌武士": https://en.wikipedia.org/wiki/Samurai_Champloo, by Manglobe I shouldn't have added them to _TESTS --- youtube_dl/extractor/bilibili.py | 61 ++------------------------------ 1 file changed, 2 insertions(+), 59 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 8fa96d3a0..9f5c12ab9 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -17,7 +17,7 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P\d+)' - _TESTS = [{ + _TEST = { 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', 'info_dict': { @@ -32,64 +32,7 @@ class BiliBiliIE(InfoExtractor): 'uploader': '菊子桑', 'uploader_id': '156160', }, - }, { - 'url': 'http://www.bilibili.com/video/av1041170/', - 'info_dict': { - 'id': '1041170', - 'ext': 'mp4', - 'title': '【BD1080P】刀语【诸神&异域】', - 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', - 'duration': 3382.259, - 'timestamp': 1396530060, - 'upload_date': '20140403', - 'thumbnail': 're:^https?://.+\.jpg', - 'uploader': '枫叶逝去', - 'uploader_id': '520116', - }, - }, { - 'url': 'http://www.bilibili.com/video/av4808130/', - 'info_dict': { - 'id': '4808130', - 'ext': 'mp4', - 'title': '【长篇】哆啦A梦443【钉铛】', - 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', - 'duration': 1493.995, - 'timestamp': 1464564180, - 'upload_date': '20160529', - 'thumbnail': 're:^https?://.+\.jpg', - 'uploader': '喜欢拉面', - 'uploader_id': '151066', - }, - }, { - # Missing upload time - 'url': 'http://www.bilibili.com/video/av1867637/', - 'info_dict': { - 'id': '1867637', - 'ext': 'mp4', - 'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】', - 'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】', - 'duration': 5760.0, - 'uploader': '黑夜为猫', - 'uploader_id': '610729', - 'thumbnail': 're:^https?://.+\.jpg', - }, - 'params': { - # Just to test metadata extraction - 'skip_download': True, - }, - 'expected_warnings': ['upload time'], - }, { - 'url': 'http://bangumi.bilibili.com/anime/v/40068', - 'md5': '08d539a0884f3deb7b698fb13ba69696', - 'info_dict': { - 'id': '40068', - 'ext': 'mp4', - 'duration': 1402.357, - 'title': '混沌武士 : 第7集 四面楚歌 A Risky Racket', - 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', - 'thumbnail': 're:^http?://.+\.jpg', - }, - }] + } _APP_KEY = '6f90a59ac58a4123' _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326' From 86d68f906e21a6674f9f8676b22a47414b6c9fd2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 14 Sep 2016 22:11:49 +0800 Subject: [PATCH 1660/3599] [bilibili] Fix extraction for videos without backup_url (#10647) --- ChangeLog | 1 + youtube_dl/extractor/bilibili.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 46eea0626..25c916eb2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [bilibili] Fix extraction for specific videos (#10647) + [nbc] Add support for NBC Olympics (#10361) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 9f5c12ab9..2d174e6f9 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -67,7 +67,7 @@ class BiliBiliIE(InfoExtractor): 'url': durl['url'], 'filesize': int_or_none(durl['size']), }] - for backup_url in durl['backup_url']: + for backup_url in durl.get('backup_url', []): formats.append({ 'url': backup_url, # backup URLs have lower priorities From 5712c0f42639cd183b0dfbc51482592e790e99d1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 14 Sep 2016 16:36:42 +0100 Subject: [PATCH 1661/3599] [adobepass] remove unnecessary option --- youtube_dl/__init__.py | 3 --- youtube_dl/extractor/adobepass.py | 3 +-- youtube_dl/options.py | 4 ---- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 052f20ee7..cdff3df65 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -179,8 +179,6 @@ def _real_main(argv=None): opts.retries = parse_retries(opts.retries) if opts.fragment_retries is not None: opts.fragment_retries = parse_retries(opts.fragment_retries) - if opts.ap_retries is not None: - opts.ap_retries = parse_retries(opts.ap_retries) if opts.buffersize is not None: numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) if numeric_buffersize is None: @@ -308,7 +306,6 @@ def _real_main(argv=None): 'ap_mso_id': opts.ap_mso_id, 'ap_username': opts.ap_username, 'ap_password': opts.ap_password, - 'ap_retries': opts.ap_retries, 'quiet': (opts.quiet or any_getting or any_printing), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 9add6c0f8..913a817d2 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -83,9 +83,8 @@ class AdobePassIE(InfoExtractor): } guid = xml_text(resource, 'guid') - retries = self._downloader.params.get('ap_retries', 3) count = 0 - while count < retries: + while count < 2: requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token and is_expired(authn_token, 'simpleTokenExpires'): diff --git a/youtube_dl/options.py b/youtube_dl/options.py index b99201a20..342ae3be3 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -368,10 +368,6 @@ def parseOpts(overrideArguments=None): '--list-ap-mso-ids', action='store_true', dest='list_ap_mso_ids', default=False, help='List all supported TV Providers') - adobe_pass.add_option( - '--ap-retries', - dest='ap_retries', metavar='APRETRIES', default=3, - help='Number of retries for Adobe Pass Authorization requests') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From b690ea15ebe7549854962f02987a8faaa6d41f53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 14 Sep 2016 22:45:23 +0700 Subject: [PATCH 1662/3599] [viafree] Fix test --- youtube_dl/extractor/tvplay.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 58ffc0e6f..3eda0a399 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -369,7 +369,7 @@ class ViafreeIE(InfoExtractor): 'add_ie': [TVPlayIE.ie_key()], }, { # Different og:image URL schema - 'url': 'www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', + 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', 'only_matching': True, }, { 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', From 925194022cd661747771e58bad41e5f7ae118999 Mon Sep 17 00:00:00 2001 From: stepshal Date: Thu, 8 Sep 2016 18:29:05 +0700 Subject: [PATCH 1663/3599] Improve some _VALID_URLs --- youtube_dl/extractor/abc.py | 2 +- youtube_dl/extractor/aljazeera.py | 2 +- youtube_dl/extractor/azubu.py | 2 +- youtube_dl/extractor/bbc.py | 2 +- youtube_dl/extractor/bpb.py | 2 +- youtube_dl/extractor/camdemy.py | 2 +- youtube_dl/extractor/cbssports.py | 2 +- youtube_dl/extractor/ceskatelevize.py | 2 +- youtube_dl/extractor/chirbit.py | 2 +- youtube_dl/extractor/cmt.py | 2 +- youtube_dl/extractor/criterion.py | 2 +- youtube_dl/extractor/dctp.py | 2 +- youtube_dl/extractor/democracynow.py | 2 +- youtube_dl/extractor/engadget.py | 2 +- youtube_dl/extractor/expotv.py | 2 +- youtube_dl/extractor/freespeech.py | 2 +- youtube_dl/extractor/gamestar.py | 2 +- youtube_dl/extractor/googleplus.py | 2 +- youtube_dl/extractor/goshgay.py | 2 +- youtube_dl/extractor/hark.py | 2 +- youtube_dl/extractor/hotnewhiphop.py | 2 +- youtube_dl/extractor/imdb.py | 2 +- youtube_dl/extractor/karaoketv.py | 2 +- youtube_dl/extractor/kickstarter.py | 2 +- youtube_dl/extractor/kuwo.py | 8 ++++---- youtube_dl/extractor/litv.py | 2 +- youtube_dl/extractor/lynda.py | 2 +- youtube_dl/extractor/macgamestore.py | 2 +- youtube_dl/extractor/metacritic.py | 2 +- youtube_dl/extractor/mgtv.py | 2 +- youtube_dl/extractor/ministrygrid.py | 2 +- youtube_dl/extractor/mitele.py | 2 +- youtube_dl/extractor/motorsport.py | 2 +- youtube_dl/extractor/moviezine.py | 2 +- youtube_dl/extractor/myspass.py | 2 +- youtube_dl/extractor/nbc.py | 6 +++--- youtube_dl/extractor/ndr.py | 8 ++++---- youtube_dl/extractor/nextmedia.py | 6 +++--- youtube_dl/extractor/niconico.py | 2 +- youtube_dl/extractor/oktoberfesttv.py | 2 +- youtube_dl/extractor/openload.py | 2 +- youtube_dl/extractor/periscope.py | 2 +- youtube_dl/extractor/playvid.py | 2 +- youtube_dl/extractor/qqmusic.py | 6 +++--- youtube_dl/extractor/rottentomatoes.py | 2 +- youtube_dl/extractor/roxwel.py | 2 +- youtube_dl/extractor/rtve.py | 6 +++--- youtube_dl/extractor/screenjunkies.py | 2 +- youtube_dl/extractor/senateisvp.py | 2 +- youtube_dl/extractor/slideshare.py | 2 +- youtube_dl/extractor/spiegel.py | 2 +- youtube_dl/extractor/syfy.py | 2 +- youtube_dl/extractor/teachingchannel.py | 2 +- youtube_dl/extractor/telecinco.py | 2 +- youtube_dl/extractor/telewebion.py | 2 +- youtube_dl/extractor/theintercept.py | 2 +- youtube_dl/extractor/thescene.py | 2 +- youtube_dl/extractor/tlc.py | 2 +- youtube_dl/extractor/udemy.py | 2 +- youtube_dl/extractor/ustream.py | 4 ++-- youtube_dl/extractor/vevo.py | 4 ++-- youtube_dl/extractor/videodetective.py | 2 +- youtube_dl/extractor/weiqitv.py | 2 +- youtube_dl/extractor/yam.py | 2 +- youtube_dl/extractor/youtube.py | 12 ++++++------ 65 files changed, 86 insertions(+), 86 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 3792bd232..465249bbf 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -13,7 +13,7 @@ from ..utils import ( class ABCIE(InfoExtractor): IE_NAME = 'abc.net.au' - _VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P\d+)' _TESTS = [{ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dl/extractor/aljazeera.py index b081695d8..388e578d5 100644 --- a/youtube_dl/extractor/aljazeera.py +++ b/youtube_dl/extractor/aljazeera.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class AlJazeeraIE(InfoExtractor): - _VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P[^/]+)\.html' + _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P[^/]+)\.html' _TEST = { 'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html', diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index a813eb429..72e1bd59d 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -103,7 +103,7 @@ class AzubuIE(InfoExtractor): class AzubuLiveIE(InfoExtractor): - _VALID_URL = r'https?://www.azubu.tv/(?P[^/]+)$' + _VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P[^/]+)$' _TEST = { 'url': 'http://www.azubu.tv/MarsTVMDLen', diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index deb9cc1c0..b17916137 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1028,7 +1028,7 @@ class BBCIE(BBCCoUkIE): class BBCCoUkArticleIE(InfoExtractor): - _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P[a-zA-Z0-9]+)' IE_NAME = 'bbc.co.uk:article' IE_DESC = 'BBC articles' diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py index 6ad45a1e6..9661ade4f 100644 --- a/youtube_dl/extractor/bpb.py +++ b/youtube_dl/extractor/bpb.py @@ -12,7 +12,7 @@ from ..utils import ( class BpbIE(InfoExtractor): IE_DESC = 'Bundeszentrale für politische Bildung' - _VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P[0-9]+)/' + _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P[0-9]+)/' _TEST = { 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py index 268c34392..d4e6fbdce 100644 --- a/youtube_dl/extractor/camdemy.py +++ b/youtube_dl/extractor/camdemy.py @@ -112,7 +112,7 @@ class CamdemyIE(InfoExtractor): class CamdemyFolderIE(InfoExtractor): - _VALID_URL = r'https?://www.camdemy.com/folder/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P\d+)' _TESTS = [{ # links with trailing slash 'url': 'http://www.camdemy.com/folder/450', diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py index bf7915626..3a62c840b 100644 --- a/youtube_dl/extractor/cbssports.py +++ b/youtube_dl/extractor/cbssports.py @@ -4,7 +4,7 @@ from .cbs import CBSBaseIE class CBSSportsIE(CBSBaseIE): - _VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?cbssports\.com/video/player/[^/]+/(?P\d+)' _TESTS = [{ 'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast', diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 5a58d1777..87c2e7089 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -17,7 +17,7 @@ from ..utils import ( class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P[^/#?]+)/*(?:[#?].*)?$' + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P[^/#?]+)/*(?:[#?].*)?$' _TESTS = [{ 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 'info_dict': { diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py index b43518652..61aed0167 100644 --- a/youtube_dl/extractor/chirbit.py +++ b/youtube_dl/extractor/chirbit.py @@ -65,7 +65,7 @@ class ChirbitIE(InfoExtractor): class ChirbitProfileIE(InfoExtractor): IE_NAME = 'chirbit:profile' - _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P[^/]+)' _TEST = { 'url': 'http://chirbit.com/ScarletBeauty', 'info_dict': { diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index f24568dcc..ac3bdfe8f 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -6,7 +6,7 @@ from ..utils import ExtractorError class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://www\.cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P\d+)' _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' _TESTS = [{ diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index dedb810a0..ad32673a8 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class CriterionIE(InfoExtractor): - _VALID_URL = r'https?://www\.criterion\.com/films/(?P[0-9]+)-.+' + _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P[0-9]+)-.+' _TEST = { 'url': 'http://www.criterion.com/films/184-le-samourai', 'md5': 'bc51beba55685509883a9a7830919ec3', diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index 9099f5046..a47e04993 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -6,7 +6,7 @@ from ..compat import compat_str class DctpTvIE(InfoExtractor): - _VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P.+?)/$' + _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P.+?)/$' _TEST = { 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', 'info_dict': { diff --git a/youtube_dl/extractor/democracynow.py b/youtube_dl/extractor/democracynow.py index 65a98d789..bdfe638b4 100644 --- a/youtube_dl/extractor/democracynow.py +++ b/youtube_dl/extractor/democracynow.py @@ -13,7 +13,7 @@ from ..utils import ( class DemocracynowIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?democracynow.org/(?P[^\?]*)' + _VALID_URL = r'https?://(?:www\.)?democracynow\.org/(?P[^\?]*)' IE_NAME = 'democracynow' _TESTS = [{ 'url': 'http://www.democracynow.org/shows/2015/7/3', diff --git a/youtube_dl/extractor/engadget.py b/youtube_dl/extractor/engadget.py index a39e9010d..65635c18b 100644 --- a/youtube_dl/extractor/engadget.py +++ b/youtube_dl/extractor/engadget.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class EngadgetIE(InfoExtractor): - _VALID_URL = r'https?://www.engadget.com/video/(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P[^/?#]+)' _TESTS = [{ # video with 5min ID diff --git a/youtube_dl/extractor/expotv.py b/youtube_dl/extractor/expotv.py index 971c918a4..ef11962f3 100644 --- a/youtube_dl/extractor/expotv.py +++ b/youtube_dl/extractor/expotv.py @@ -8,7 +8,7 @@ from ..utils import ( class ExpoTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P[0-9]+)($|[?#])' + _VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P[0-9]+)($|[?#])' _TEST = { 'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916', 'md5': 'fe1d728c3a813ff78f595bc8b7a707a8', diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py index 1477708bb..0a70ca763 100644 --- a/youtube_dl/extractor/freespeech.py +++ b/youtube_dl/extractor/freespeech.py @@ -8,7 +8,7 @@ from .common import InfoExtractor class FreespeechIE(InfoExtractor): IE_NAME = 'freespeech.org' - _VALID_URL = r'https://www\.freespeech\.org/video/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?freespeech\.org/video/(?P<title>.+)' _TEST = { 'add_ie': ['Youtube'], 'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0', diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index 341e72733..55a34604a 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -9,7 +9,7 @@ from ..utils import ( class GameStarIE(InfoExtractor): - _VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.)?gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' _TEST = { 'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html', 'md5': '96974ecbb7fd8d0d20fca5a00810cea7', diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 731bacd67..427499b11 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -10,7 +10,7 @@ from ..utils import unified_strdate class GooglePlusIE(InfoExtractor): IE_DESC = 'Google Plus' - _VALID_URL = r'https://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)' + _VALID_URL = r'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)' IE_NAME = 'plus.google' _TEST = { 'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH', diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index 0c015141f..a43abd154 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -11,7 +11,7 @@ from ..utils import ( class GoshgayIE(InfoExtractor): - _VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)' + _VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P<id>\d+?)($|/)' _TEST = { 'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video', 'md5': '4b6db9a0a333142eb9f15913142b0ed1', diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py index b6cc15b6f..749e9154f 100644 --- a/youtube_dl/extractor/hark.py +++ b/youtube_dl/extractor/hark.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class HarkIE(InfoExtractor): - _VALID_URL = r'https?://www\.hark\.com/clips/(?P<id>.+?)-.+' + _VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P<id>.+?)-.+' _TEST = { 'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013', 'md5': '6783a58491b47b92c7c1af5a77d4cbee', diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index 9db565209..34163725f 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -12,7 +12,7 @@ from ..utils import ( class HotNewHipHopIE(InfoExtractor): - _VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html' + _VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html' _TEST = { 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html', 'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96', diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 3a6a6f5ad..f0fc8d49a 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -94,7 +94,7 @@ class ImdbIE(InfoExtractor): class ImdbListIE(InfoExtractor): IE_NAME = 'imdb:list' IE_DESC = 'Internet Movie Database lists' - _VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' + _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' _TEST = { 'url': 'http://www.imdb.com/list/JFs9NWw6XI0', 'info_dict': { diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py index bad46005b..bfccf89b0 100644 --- a/youtube_dl/extractor/karaoketv.py +++ b/youtube_dl/extractor/karaoketv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class KaraoketvIE(InfoExtractor): - _VALID_URL = r'https?://www\.karaoketv\.co\.il/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?karaoketv\.co\.il/[^/]+/(?P<id>\d+)' _TEST = { 'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F', 'info_dict': { diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index c61e78622..fbe499497 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -6,7 +6,7 @@ from ..utils import smuggle_url class KickStarterIE(InfoExtractor): - _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*' + _VALID_URL = r'https?://(?:www\.)?kickstarter\.com/projects/(?P<id>[^/]*)/.*' _TESTS = [{ 'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant/description', 'md5': 'c81addca81327ffa66c642b5d8b08cab', diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 0eeb9ffeb..ba621ca7b 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -59,7 +59,7 @@ class KuwoBaseIE(InfoExtractor): class KuwoIE(KuwoBaseIE): IE_NAME = 'kuwo:song' IE_DESC = '酷我音乐' - _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/yinyue/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.kuwo.cn/yinyue/635632/', 'info_dict': { @@ -139,7 +139,7 @@ class KuwoIE(KuwoBaseIE): class KuwoAlbumIE(InfoExtractor): IE_NAME = 'kuwo:album' IE_DESC = '酷我音乐 - 专辑' - _VALID_URL = r'https?://www\.kuwo\.cn/album/(?P<id>\d+?)/' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/album/(?P<id>\d+?)/' _TEST = { 'url': 'http://www.kuwo.cn/album/502294/', 'info_dict': { @@ -200,7 +200,7 @@ class KuwoChartIE(InfoExtractor): class KuwoSingerIE(InfoExtractor): IE_NAME = 'kuwo:singer' IE_DESC = '酷我音乐 - 歌手' - _VALID_URL = r'https?://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mingxing/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.kuwo.cn/mingxing/bruno+mars/', 'info_dict': { @@ -296,7 +296,7 @@ class KuwoCategoryIE(InfoExtractor): class KuwoMvIE(KuwoBaseIE): IE_NAME = 'kuwo:mv' IE_DESC = '酷我音乐 - MV' - _VALID_URL = r'https?://www\.kuwo\.cn/mv/(?P<id>\d+?)/' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mv/(?P<id>\d+?)/' _TEST = { 'url': 'http://www.kuwo.cn/mv/6480076/', 'info_dict': { diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py index 05c6579f1..a3784e6c6 100644 --- a/youtube_dl/extractor/litv.py +++ b/youtube_dl/extractor/litv.py @@ -14,7 +14,7 @@ from ..utils import ( class LiTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)' + _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)' _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s' diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index a98c4c530..299873ecc 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -94,7 +94,7 @@ class LyndaBaseIE(InfoExtractor): class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' IE_DESC = 'lynda.com videos' - _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)' _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py index 3cd4a3a19..43db9929c 100644 --- a/youtube_dl/extractor/macgamestore.py +++ b/youtube_dl/extractor/macgamestore.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class MacGameStoreIE(InfoExtractor): IE_NAME = 'macgamestore' IE_DESC = 'MacGameStore trailers' - _VALID_URL = r'https?://www\.macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)' _TEST = { 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dl/extractor/metacritic.py index 444ec0310..7d468d78b 100644 --- a/youtube_dl/extractor/metacritic.py +++ b/youtube_dl/extractor/metacritic.py @@ -9,7 +9,7 @@ from ..utils import ( class MetacriticIE(InfoExtractor): - _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?metacritic\.com/.+?/trailers/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222', diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 27bdff8b2..e0bb5d208 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -6,7 +6,7 @@ from ..utils import int_or_none class MGTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html' IE_DESC = '芒果TV' _TESTS = [{ diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dl/extractor/ministrygrid.py index e48eba3fa..10190d5f6 100644 --- a/youtube_dl/extractor/ministrygrid.py +++ b/youtube_dl/extractor/ministrygrid.py @@ -8,7 +8,7 @@ from ..utils import ( class MinistryGridIE(InfoExtractor): - _VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?ministrygrid\.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers', diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index cd169f361..2294745d4 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -74,7 +74,7 @@ class MiTeleBaseIE(InfoExtractor): class MiTeleIE(MiTeleBaseIE): IE_DESC = 'mitele.es' - _VALID_URL = r'https?://www\.mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' + _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index 370328b36..c9d1ab64d 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -9,7 +9,7 @@ from ..compat import ( class MotorsportIE(InfoExtractor): IE_DESC = 'motorsport.com' - _VALID_URL = r'https?://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', 'info_dict': { diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py index f130b75c4..aa091a62c 100644 --- a/youtube_dl/extractor/moviezine.py +++ b/youtube_dl/extractor/moviezine.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class MoviezineIE(InfoExtractor): - _VALID_URL = r'https?://www\.moviezine\.se/video/(?P<id>[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)' _TEST = { 'url': 'http://www.moviezine.se/video/205866', diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index 1ca7b1a9e..2afe535b5 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -11,7 +11,7 @@ from ..utils import ( class MySpassIE(InfoExtractor): - _VALID_URL = r'https?://www\.myspass\.de/.*' + _VALID_URL = r'https?://(?:www\.)?myspass\.de/.*' _TEST = { 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', 'md5': '0b49f4844a068f8b33f4b7c88405862b', diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index f37bf2f30..7f1bd9229 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -13,7 +13,7 @@ from ..utils import ( class NBCIE(InfoExtractor): - _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' + _VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' _TESTS = [ { @@ -138,7 +138,7 @@ class NBCSportsVPlayerIE(InfoExtractor): class NBCSportsIE(InfoExtractor): # Does not include https because its certificate is invalid - _VALID_URL = r'https?://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' + _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' _TEST = { 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', @@ -161,7 +161,7 @@ class NBCSportsIE(InfoExtractor): class CSNNEIE(InfoExtractor): - _VALID_URL = r'https?://www\.csnne\.com/video/(?P<id>[0-9a-z-]+)' + _VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)' _TEST = { 'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter', diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 0cded6b5c..e3b0da2e9 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -23,7 +23,7 @@ class NDRBaseIE(InfoExtractor): class NDRIE(NDRBaseIE): IE_NAME = 'ndr' IE_DESC = 'NDR.de - Norddeutscher Rundfunk' - _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html' + _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html' _TESTS = [{ # httpVideo, same content id 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', @@ -105,7 +105,7 @@ class NDRIE(NDRBaseIE): class NJoyIE(NDRBaseIE): IE_NAME = 'njoy' IE_DESC = 'N-JOY' - _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html' + _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html' _TESTS = [{ # httpVideo, same content id 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html', @@ -238,7 +238,7 @@ class NDREmbedBaseIE(InfoExtractor): class NDREmbedIE(NDREmbedBaseIE): IE_NAME = 'ndr:embed' - _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' + _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' _TESTS = [{ 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9', @@ -332,7 +332,7 @@ class NDREmbedIE(NDREmbedBaseIE): class NJoyEmbedIE(NDREmbedBaseIE): IE_NAME = 'njoy:embed' - _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' + _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' _TESTS = [{ # httpVideo 'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html', diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index aae7aeeeb..a08e48c4b 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -7,7 +7,7 @@ from ..utils import parse_iso8601 class NextMediaIE(InfoExtractor): IE_DESC = '蘋果日報' - _VALID_URL = r'https?://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)' + _VALID_URL = r'https?://hk\.apple\.nextmedia\.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)' _TESTS = [{ 'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199', 'md5': 'dff9fad7009311c421176d1ac90bfe4f', @@ -68,7 +68,7 @@ class NextMediaIE(InfoExtractor): class NextMediaActionNewsIE(NextMediaIE): IE_DESC = '蘋果日報 - 動新聞' - _VALID_URL = r'https?://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+' + _VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+' _TESTS = [{ 'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460', 'md5': '05fce8ffeed7a5e00665d4b7cf0f9201', @@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'https?://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' + _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index dd75a48af..6eaaa8416 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -252,7 +252,7 @@ class NiconicoIE(InfoExtractor): class NiconicoPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/mylist/(?P<id>\d+)' _TEST = { 'url': 'http://www.nicovideo.jp/mylist/27411728', diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dl/extractor/oktoberfesttv.py index 4a41c0542..f2ccc53dc 100644 --- a/youtube_dl/extractor/oktoberfesttv.py +++ b/youtube_dl/extractor/oktoberfesttv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class OktoberfestTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)' _TEST = { 'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt', diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 76316ca2f..c261a7455 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -13,7 +13,7 @@ from ..utils import ( class OpenloadIE(InfoExtractor): - _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://openload\.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 6c640089d..eb1aeba46 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -94,7 +94,7 @@ class PeriscopeIE(PeriscopeBaseIE): class PeriscopeUserIE(PeriscopeBaseIE): - _VALID_URL = r'https?://www\.periscope\.tv/(?P<id>[^/]+)/?$' + _VALID_URL = r'https?://(?:www\.)?periscope\.tv/(?P<id>[^/]+)/?$' IE_DESC = 'Periscope user videos' IE_NAME = 'periscope:user' diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py index 78d219299..79c2db085 100644 --- a/youtube_dl/extractor/playvid.py +++ b/youtube_dl/extractor/playvid.py @@ -14,7 +14,7 @@ from ..utils import ( class PlayvidIE(InfoExtractor): - _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' + _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' _TESTS = [{ 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index ff0af9543..37cb9e2c9 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -18,7 +18,7 @@ from ..utils import ( class QQMusicIE(InfoExtractor): IE_NAME = 'qqmusic' IE_DESC = 'QQ音乐' - _VALID_URL = r'https?://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', 'md5': '9ce1c1c8445f561506d2e3cfb0255705', @@ -172,7 +172,7 @@ class QQPlaylistBaseIE(InfoExtractor): class QQMusicSingerIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:singer' IE_DESC = 'QQ音乐 - 歌手' - _VALID_URL = r'https?://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' _TEST = { 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', 'info_dict': { @@ -217,7 +217,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE): class QQMusicAlbumIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:album' IE_DESC = 'QQ音乐 - 专辑' - _VALID_URL = r'https?://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1', diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index 23abf7a27..1d404d20a 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -5,7 +5,7 @@ from .internetvideoarchive import InternetVideoArchiveIE class RottenTomatoesIE(InfoExtractor): - _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' _TEST = { 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', diff --git a/youtube_dl/extractor/roxwel.py b/youtube_dl/extractor/roxwel.py index 41638c1d0..65284643b 100644 --- a/youtube_dl/extractor/roxwel.py +++ b/youtube_dl/extractor/roxwel.py @@ -7,7 +7,7 @@ from ..utils import unified_strdate, determine_ext class RoxwelIE(InfoExtractor): - _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)' + _VALID_URL = r'https?://(?:www\.)?roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)' _TEST = { 'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html', diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 34f9c4a99..f1b92f6da 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -64,7 +64,7 @@ def _decrypt_url(png): class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' IE_DESC = 'RTVE a la carta' - _VALID_URL = r'https?://www\.rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', @@ -184,7 +184,7 @@ class RTVEInfantilIE(InfoExtractor): class RTVELiveIE(InfoExtractor): IE_NAME = 'rtve.es:live' IE_DESC = 'RTVE.es live streams' - _VALID_URL = r'https?://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' _TESTS = [{ 'url': 'http://www.rtve.es/directo/la-1/', @@ -226,7 +226,7 @@ class RTVELiveIE(InfoExtractor): class RTVETelevisionIE(InfoExtractor): IE_NAME = 'rtve.es:television' - _VALID_URL = r'https?://www\.rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' _TEST = { 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', diff --git a/youtube_dl/extractor/screenjunkies.py b/youtube_dl/extractor/screenjunkies.py index dd0a6ba19..02e574cd8 100644 --- a/youtube_dl/extractor/screenjunkies.py +++ b/youtube_dl/extractor/screenjunkies.py @@ -11,7 +11,7 @@ from ..utils import ( class ScreenJunkiesIE(InfoExtractor): - _VALID_URL = r'https?://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' + _VALID_URL = r'https?://(?:www\.)?screenjunkies\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' _TESTS = [{ 'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915', 'md5': '5c2b686bec3d43de42bde9ec047536b0', diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index c5f474dd1..35540c082 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -48,7 +48,7 @@ class SenateISVPIE(InfoExtractor): ['arch', '', 'http://ussenate-f.akamaihd.net/'] ] _IE_NAME = 'senate.gov' - _VALID_URL = r'https?://www\.senate\.gov/isvp/?\?(?P<qs>.+)' + _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' _TESTS = [{ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'info_dict': { diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py index 4967c1b77..74a1dc672 100644 --- a/youtube_dl/extractor/slideshare.py +++ b/youtube_dl/extractor/slideshare.py @@ -14,7 +14,7 @@ from ..utils import ( class SlideshareIE(InfoExtractor): - _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' + _VALID_URL = r'https?://(?:www\.)?slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' _TEST = { 'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 74cb3a08a..b41d9f59f 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -103,7 +103,7 @@ class SpiegelIE(InfoExtractor): class SpiegelArticleIE(InfoExtractor): - _VALID_URL = r'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.)?spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html' IE_NAME = 'Spiegel:Article' IE_DESC = 'Articles on spiegel.de' _TESTS = [{ diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index ab8bab5cd..def7e5a2c 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -8,7 +8,7 @@ from ..utils import ( class SyfyIE(AdobePassIE): - _VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer', 'info_dict': { diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index d14d93e3a..e89759714 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -7,7 +7,7 @@ from .ooyala import OoyalaIE class TeachingChannelIE(InfoExtractor): - _VALID_URL = r'https?://www\.teachingchannel\.org/videos/(?P<title>.+)' + _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos/(?P<title>.+)' _TEST = { 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index 2ecfd0405..d5abfc9e4 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -6,7 +6,7 @@ from .mitele import MiTeleBaseIE class TelecincoIE(MiTeleBaseIE): IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' - _VALID_URL = r'https?://www\.(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' + _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' _TESTS = [{ 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', diff --git a/youtube_dl/extractor/telewebion.py b/youtube_dl/extractor/telewebion.py index 77916c601..7786b2813 100644 --- a/youtube_dl/extractor/telewebion.py +++ b/youtube_dl/extractor/telewebion.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class TelewebionIE(InfoExtractor): - _VALID_URL = r'https?://www\.telewebion\.com/#!/episode/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?telewebion\.com/#!/episode/(?P<id>\d+)' _TEST = { 'url': 'http://www.telewebion.com/#!/episode/1263668/', diff --git a/youtube_dl/extractor/theintercept.py b/youtube_dl/extractor/theintercept.py index 8cb3c3669..ec6f4ecaa 100644 --- a/youtube_dl/extractor/theintercept.py +++ b/youtube_dl/extractor/theintercept.py @@ -11,7 +11,7 @@ from ..utils import ( class TheInterceptIE(InfoExtractor): - _VALID_URL = r'https://theintercept.com/fieldofvision/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://theintercept\.com/fieldofvision/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/', 'md5': '145f28b41d44aab2f87c0a4ac8ec95bd', diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py index 3e4e14031..ce1326c03 100644 --- a/youtube_dl/extractor/thescene.py +++ b/youtube_dl/extractor/thescene.py @@ -7,7 +7,7 @@ from ..utils import qualities class TheSceneIE(InfoExtractor): - _VALID_URL = r'https://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)' + _VALID_URL = r'https?://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)' _TEST = { 'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear', diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index 88eb83d74..ce4f91f46 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -13,7 +13,7 @@ from ..compat import ( class TlcDeIE(InfoExtractor): IE_NAME = 'tlc.de' - _VALID_URL = r'https?://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?' + _VALID_URL = r'https?://(?:www\.)?tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?' _TEST = { 'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001', diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 89b869559..c2f507233 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -307,7 +307,7 @@ class UdemyIE(InfoExtractor): class UdemyCourseIE(UdemyIE): IE_NAME = 'udemy:course' - _VALID_URL = r'https?://www\.udemy\.com/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?udemy\.com/(?P<id>[^/?#&]+)' _TESTS = [] @classmethod diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 54605d863..a3dc9d33e 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -14,7 +14,7 @@ from ..utils import ( class UstreamIE(InfoExtractor): - _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)' IE_NAME = 'ustream' _TESTS = [{ 'url': 'http://www.ustream.tv/recorded/20274954', @@ -117,7 +117,7 @@ class UstreamIE(InfoExtractor): class UstreamChannelIE(InfoExtractor): - _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)' + _VALID_URL = r'https?://(?:www\.)?ustream\.tv/channel/(?P<slug>.+)' IE_NAME = 'ustream:channel' _TEST = { 'url': 'http://www.ustream.tv/channel/channeljapan', diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 388b4debe..783efda7d 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -31,7 +31,7 @@ class VevoIE(VevoBaseIE): (currently used by MTVIE and MySpaceIE) ''' _VALID_URL = r'''(?x) - (?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| + (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| https?://cache\.vevo\.com/m/html/embed\.html\?video=| https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| vevo:) @@ -374,7 +374,7 @@ class VevoIE(VevoBaseIE): class VevoPlaylistIE(VevoBaseIE): - _VALID_URL = r'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29', diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index 2ed5d9643..a19411a05 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -6,7 +6,7 @@ from .internetvideoarchive import InternetVideoArchiveIE class VideoDetectiveIE(InfoExtractor): - _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' _TEST = { 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py index 3dafbeec2..8e09156c2 100644 --- a/youtube_dl/extractor/weiqitv.py +++ b/youtube_dl/extractor/weiqitv.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class WeiqiTVIE(InfoExtractor): IE_DESC = 'WQTV' - _VALID_URL = r'https?://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3', diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py index 63bbc0634..ef5535547 100644 --- a/youtube_dl/extractor/yam.py +++ b/youtube_dl/extractor/yam.py @@ -15,7 +15,7 @@ from ..utils import ( class YamIE(InfoExtractor): IE_DESC = '蕃薯藤yam天空部落' - _VALID_URL = r'https?://mymedia.yam.com/m/(?P<id>\d+)' + _VALID_URL = r'https?://mymedia\.yam\.com/m/(?P<id>\d+)' _TESTS = [{ # An audio hosted on Yam diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5082cb589..5ca903825 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2302,7 +2302,7 @@ class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor): class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor): IE_DESC = 'YouTube.com (multi-season) shows' - _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)' IE_NAME = 'youtube:show' _TESTS = [{ 'url': 'https://www.youtube.com/show/airdisasters', @@ -2371,7 +2371,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): class YoutubeWatchLaterIE(YoutubePlaylistIE): IE_NAME = 'youtube:watchlater' IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater' _TESTS = [{ 'url': 'https://www.youtube.com/playlist?list=WL', @@ -2392,7 +2392,7 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:favorites' IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?' _LOGIN_REQUIRED = True def _real_extract(self, url): @@ -2403,21 +2403,21 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?' _FEED_NAME = 'recommended' _PLAYLIST_TITLE = 'Youtube Recommended videos' class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' _FEED_NAME = 'subscriptions' _PLAYLIST_TITLE = 'Youtube Subscriptions' class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/history|:ythistory' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory' _FEED_NAME = 'history' _PLAYLIST_TITLE = 'Youtube History' From 014b7e6b25be5583c772af054cd7a1e37a327088 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 17:07:05 +0100 Subject: [PATCH 1664/3599] [go] add support for free full episodes(#10439) --- youtube_dl/extractor/go.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 6a437c54d..7925c1e22 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -8,6 +8,8 @@ from ..utils import ( int_or_none, determine_ext, parse_age_limit, + urlencode_postdata, + ExtractorError, ) @@ -19,7 +21,7 @@ class GoIE(InfoExtractor): 'watchdisneyjunior': '008', 'watchdisneyxd': '009', } - _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/.*?vdka(?P<id>\w+)' % '|'.join(_BRANDS.keys()) + _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_BRANDS.keys()) _TESTS = [{ 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', 'info_dict': { @@ -38,9 +40,13 @@ class GoIE(InfoExtractor): }] def _real_extract(self, url): - sub_domain, video_id = re.match(self._VALID_URL, url).groups() + sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() + if not video_id: + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex(r'data-video-id=["\']VDKA(\w+)', webpage, 'video id') + brand = self._BRANDS[sub_domain] video_data = self._download_json( - 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (self._BRANDS[sub_domain], video_id), + 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id), video_id)['video'][0] title = video_data['title'] @@ -52,6 +58,21 @@ class GoIE(InfoExtractor): format_id = asset.get('format') ext = determine_ext(asset_url) if ext == 'm3u8': + video_type = video_data.get('type') + if video_type == 'lf': + entitlement = self._download_json( + 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', + video_id, data=urlencode_postdata({ + 'video_id': video_data['id'], + 'video_type': video_type, + 'brand': brand, + 'device': '001', + })) + errors = entitlement.get('errors', {}).get('errors', []) + if errors: + error_massege = ', '.join([error['message'] for error in errors]) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_massege), expected=True) + asset_url += '?' + entitlement['uplynkData']['sessionKey'] formats.extend(self._extract_m3u8_formats( asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) else: From 353f340e11d7fc4a0a4973ddd85bc93b1061a487 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 17:22:42 +0100 Subject: [PATCH 1665/3599] [go] fix typo --- youtube_dl/extractor/go.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 7925c1e22..c7776b186 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -70,8 +70,8 @@ class GoIE(InfoExtractor): })) errors = entitlement.get('errors', {}).get('errors', []) if errors: - error_massege = ', '.join([error['message'] for error in errors]) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_massege), expected=True) + error_message = ', '.join([error['message'] for error in errors]) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) asset_url += '?' + entitlement['uplynkData']['sessionKey'] formats.extend(self._extract_m3u8_formats( asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) From 6db354a9f4c62c3cc47918adc13e1e4b63146c80 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 15 Sep 2016 00:53:04 +0800 Subject: [PATCH 1666/3599] [kuwo] Update _TESTS --- youtube_dl/extractor/kuwo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index ba621ca7b..081af86f6 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -82,7 +82,7 @@ class KuwoIE(KuwoBaseIE): 'upload_date': '20150518', }, 'params': { - 'format': 'mp3-320' + 'format': 'mp3-320', }, }, { 'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016', @@ -181,7 +181,7 @@ class KuwoChartIE(InfoExtractor): 'info_dict': { 'id': '香港中文龙虎榜', }, - 'playlist_mincount': 10, + 'playlist_mincount': 7, } def _real_extract(self, url): @@ -303,7 +303,7 @@ class KuwoMvIE(KuwoBaseIE): 'id': '6480076', 'ext': 'mp4', 'title': 'My HouseMV', - 'creator': 'PM02:00', + 'creator': '2PM', }, # In this video, music URLs (anti.s) are blocked outside China and # USA, while the MV URL (mvurl) is available globally, so force the MV From 961516bfd1f3b514859f03766d282824ba8a76f5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 15 Sep 2016 00:56:15 +0800 Subject: [PATCH 1667/3599] [kwuo:song] Improve error detection (closes #10650) --- ChangeLog | 1 + youtube_dl/extractor/kuwo.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 25c916eb2..c3c8bf037 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [kwuo] Improve error detection (#10650) * [bilibili] Fix extraction for specific videos (#10647) + [nbc] Add support for NBC Olympics (#10361) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 081af86f6..63e10125e 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -91,10 +91,10 @@ class KuwoIE(KuwoBaseIE): def _real_extract(self, url): song_id = self._match_id(url) - webpage = self._download_webpage( + webpage, urlh = self._download_webpage_handle( url, song_id, note='Download song detail info', errnote='Unable to get song detail info') - if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: + if song_id not in urlh.geturl() or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: raise ExtractorError('this song has been offline because of copyright issues', expected=True) song_name = self._html_search_regex( From a942d6cb48994c5ff14ccef8773fb086a5544970 Mon Sep 17 00:00:00 2001 From: renalid <renaud.euvrard@MAC-1636.local> Date: Fri, 2 Sep 2016 18:31:52 +0200 Subject: [PATCH 1668/3599] [utils,franceinter] Add french months' names and fix extraction Update of the "FranceInter" radio extractor : webpages HTML structure had changed, the extractor didn't work. So I updated this extractor to get the mp3 URL and all details. --- youtube_dl/extractor/franceinter.py | 38 ++++++++++++++++------------- youtube_dl/utils.py | 13 ++++++++-- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 2369f868d..6dad8d712 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -2,20 +2,24 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + unified_timestamp, + month_by_name, +) class FranceInterIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)' + _TEST = { - 'url': 'http://www.franceinter.fr/player/reecouter?play=793962', + 'url': 'https://www.franceinter.fr/emissions/la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', 'md5': '4764932e466e6f6c79c317d2e74f6884', 'info_dict': { - 'id': '793962', + 'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', 'ext': 'mp3', - 'title': 'L’Histoire dans les jeux vidéo', - 'description': 'md5:7e93ddb4451e7530022792240a3049c7', - 'timestamp': 1387369800, + 'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter', + 'description': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 par Jean Lebrun en replay sur France Inter. Retrouvez l\'émission en réécoute gratuite et abonnez-vous au podcast !', + 'timestamp': 1387324800, 'upload_date': '20131218', }, } @@ -25,17 +29,17 @@ class FranceInterIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - path = self._search_regex( - r'<a id="player".+?href="([^"]+)"', webpage, 'video url') - video_url = 'http://www.franceinter.fr/' + path + video_url = self._search_regex( + r'<button class="replay-button playable" data-is-aod="1" data-url="([^"]+)"', webpage, 'video url') - title = self._html_search_regex( - r'<span class="title-diffusion">(.+?)</span>', webpage, 'title') - description = self._html_search_regex( - r'<span class="description">(.*?)</span>', - webpage, 'description', fatal=False) - timestamp = int_or_none(self._search_regex( - r'data-date="(\d+)"', webpage, 'upload date', fatal=False)) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + + extractdate = self._search_regex('(\d{2}-([a-zA-Z\s]+)-\d{4}$)', url, 'extractdate', fatal=False) + extractdate = extractdate.split('-') + extractdate = extractdate[2] + "," + str(month_by_name(extractdate[1], 'fr')) + "," + extractdate[0] + + timestamp = unified_timestamp(extractdate) return { 'id': video_id, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ed199c4ad..623ced625 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -91,6 +91,10 @@ ENGLISH_MONTH_NAMES = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] +FRENCH_MONTH_NAMES = [ + 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', + 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'] + KNOWN_EXTENSIONS = ( 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 'flv', 'f4v', 'f4a', 'f4b', @@ -1587,11 +1591,16 @@ def parse_count(s): return lookup_unit_table(_UNIT_TABLE, s) -def month_by_name(name): +def month_by_name(name, lang='en'): """ Return the number of a month by (locale-independently) English name """ + name_list = ENGLISH_MONTH_NAMES + + if lang == 'fr': + name_list = FRENCH_MONTH_NAMES + try: - return ENGLISH_MONTH_NAMES.index(name) + 1 + return name_list.index(name) + 1 except ValueError: return None From f6717dec8abe7c0d34e704732b53665a9415fa2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 14 Sep 2016 23:13:55 +0700 Subject: [PATCH 1669/3599] [utils] Improve month_by_name and add tests --- test/test_utils.py | 11 +++++++++++ youtube_dl/utils.py | 16 ++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 405c5d351..4ebca8744 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -40,6 +40,7 @@ from youtube_dl.utils import ( js_to_json, limit_length, mimetype2ext, + month_by_name, ohdave_rsa_encrypt, OnDemandPagedList, orderedSet, @@ -634,6 +635,16 @@ class TestUtil(unittest.TestCase): self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') + def test_month_by_name(self): + self.assertEqual(month_by_name(None), None) + self.assertEqual(month_by_name('December', 'en'), 12) + self.assertEqual(month_by_name('decembre', 'fr'), 12) + self.assertEqual(month_by_name('December'), 12) + self.assertEqual(month_by_name('decembre'), None) + self.assertEqual(month_by_name('Unknown', 'unknown'), None) + + def test_m + def test_parse_codecs(self): self.assertEqual(parse_codecs(''), {}) self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 623ced625..a4ef15908 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -91,9 +91,12 @@ ENGLISH_MONTH_NAMES = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] -FRENCH_MONTH_NAMES = [ - 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', - 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'] +MONTH_NAMES = { + 'en': ENGLISH_MONTH_NAMES, + 'fr': [ + 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', + 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'], +} KNOWN_EXTENSIONS = ( 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', @@ -1594,13 +1597,10 @@ def parse_count(s): def month_by_name(name, lang='en'): """ Return the number of a month by (locale-independently) English name """ - name_list = ENGLISH_MONTH_NAMES - - if lang == 'fr': - name_list = FRENCH_MONTH_NAMES + month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en']) try: - return name_list.index(name) + 1 + return month_names.index(name) + 1 except ValueError: return None From 3e4185c3965579c2cc10922384694c2465be4557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 14 Sep 2016 23:57:01 +0700 Subject: [PATCH 1670/3599] [utils] Use native french month names --- test/test_utils.py | 6 ++---- youtube_dl/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 4ebca8744..9789d8611 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -638,13 +638,11 @@ class TestUtil(unittest.TestCase): def test_month_by_name(self): self.assertEqual(month_by_name(None), None) self.assertEqual(month_by_name('December', 'en'), 12) - self.assertEqual(month_by_name('decembre', 'fr'), 12) + self.assertEqual(month_by_name('décembre', 'fr'), 12) self.assertEqual(month_by_name('December'), 12) - self.assertEqual(month_by_name('decembre'), None) + self.assertEqual(month_by_name('décembre'), None) self.assertEqual(month_by_name('Unknown', 'unknown'), None) - def test_m - def test_parse_codecs(self): self.assertEqual(parse_codecs(''), {}) self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a4ef15908..69ca88c85 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -94,8 +94,8 @@ ENGLISH_MONTH_NAMES = [ MONTH_NAMES = { 'en': ENGLISH_MONTH_NAMES, 'fr': [ - 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', - 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'], + 'janvier', 'février', 'mars', 'avril', 'mai', 'juin', + 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], } KNOWN_EXTENSIONS = ( From 0002962f3feb86ec8c14429af7ecddc17815fa93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 14 Sep 2016 23:59:13 +0700 Subject: [PATCH 1671/3599] [franceinter] Improve extraction (Closes #10538) --- youtube_dl/extractor/franceinter.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 6dad8d712..0d58f89c5 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -2,10 +2,8 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - unified_timestamp, - month_by_name, -) +from ..compat import compat_str +from ..utils import month_by_name class FranceInterIE(InfoExtractor): @@ -18,8 +16,7 @@ class FranceInterIE(InfoExtractor): 'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', 'ext': 'mp3', 'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter', - 'description': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 par Jean Lebrun en replay sur France Inter. Retrouvez l\'émission en réécoute gratuite et abonnez-vous au podcast !', - 'timestamp': 1387324800, + 'description': 'md5:7f2ce449894d1e585932273080fb410d', 'upload_date': '20131218', }, } @@ -30,22 +27,28 @@ class FranceInterIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r'<button class="replay-button playable" data-is-aod="1" data-url="([^"]+)"', webpage, 'video url') + r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'video url', group='url') title = self._og_search_title(webpage) description = self._og_search_description(webpage) - extractdate = self._search_regex('(\d{2}-([a-zA-Z\s]+)-\d{4}$)', url, 'extractdate', fatal=False) - extractdate = extractdate.split('-') - extractdate = extractdate[2] + "," + str(month_by_name(extractdate[1], 'fr')) + "," + extractdate[0] - - timestamp = unified_timestamp(extractdate) + upload_date_str = self._search_regex( + r'class=["\']cover-emission-period["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<', + webpage, 'upload date', fatal=False) + if upload_date_str: + upload_date_list = upload_date_str.split() + upload_date_list.reverse() + upload_date_list[1] = compat_str(month_by_name(upload_date_list[1], lang='fr')) + upload_date = ''.join(upload_date_list) + else: + upload_date = None return { 'id': video_id, 'title': title, 'description': description, - 'timestamp': timestamp, + 'upload_date': upload_date, 'formats': [{ 'url': video_url, 'vcodec': 'none', From 797c636bcb02d1199015b753d26430eec13c4b2b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 18:58:47 +0100 Subject: [PATCH 1672/3599] [ap] improve adobe pass names and parse error handling --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 8 +++++--- youtube_dl/extractor/adobepass.py | 9 +++------ youtube_dl/options.py | 12 ++++++------ 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9c2c26280..29d8517a3 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,7 +131,7 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. - ap_mso_id: Adobe Pass Multiple-system operator Identifier. + ap_mso: Adobe Pass Multiple-system operator Identifier. ap_username: TV Provider username for authentication purposes. ap_password: TV Provider password for authentication purposes. usenetrc: Use netrc for authentication instead. diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index cdff3df65..5614ef0fb 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -120,9 +120,9 @@ def _real_main(argv=None): desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) write_string(desc + '\n', out=sys.stdout) sys.exit(0) - if opts.list_ap_mso_ids: + if opts.ap_mso_list: table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] - write_string('Supported TV Providers:\n' + render_table(['mso id', 'mso name'], table) + '\n', out=sys.stdout) + write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout) sys.exit(0) # Conflicting, missing and erroneous options @@ -165,6 +165,8 @@ def _real_main(argv=None): parser.error('max sleep interval must be greater than or equal to min sleep interval') else: opts.max_sleep_interval = opts.sleep_interval + if opts.ap_mso and opts.ap_mso not in MSO_INFO: + parser.error('Unsupported TV Provider, use --ap-mso-list to get a list of supported TV Providers') def parse_retries(retries): if retries in ('inf', 'infinite'): @@ -303,7 +305,7 @@ def _real_main(argv=None): 'password': opts.password, 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, - 'ap_mso_id': opts.ap_mso_id, + 'ap_mso': opts.ap_mso, 'ap_username': opts.ap_username, 'ap_password': opts.ap_password, 'quiet': (opts.quiet or any_getting or any_printing), diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 913a817d2..8ef5a96ce 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -72,8 +72,8 @@ class AdobePassIE(InfoExtractor): def raise_mvpd_required(): raise ExtractorError( 'This video is only available for users of participating TV providers. ' - 'Use --ap-mso-id to specify Adobe Pass Multiple-system operator Identifier ' - 'and --netrc to provide account credentials.', expected=True) + 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier ' + 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True) mvpd_headers = { 'ap_42': 'anonymous', @@ -91,12 +91,9 @@ class AdobePassIE(InfoExtractor): authn_token = None if not authn_token: # TODO add support for other TV Providers - mso_id = self._downloader.params.get('ap_mso_id') + mso_id = self._downloader.params.get('ap_mso') if not mso_id: raise_mvpd_required() - if mso_id not in MSO_INFO: - raise ExtractorError( - 'Unsupported TV Provider, use --list-ap-mso-ids to get a list of supported TV Providers' % mso_id, expected=True) username, password = self._get_login_info('ap_username', 'ap_password', mso_id) if not username or not password: raise_mvpd_required() diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 342ae3be3..46c326b3d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -353,20 +353,20 @@ def parseOpts(overrideArguments=None): adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options') adobe_pass.add_option( - '--ap-mso-id', - dest='ap_mso_id', metavar='APMSOID', + '--ap-mso', + dest='ap_mso', metavar='MSO', help='Adobe Pass Multiple-system operator Identifier') adobe_pass.add_option( '--ap-username', - dest='ap_username', metavar='APUSERNAME', + dest='ap_username', metavar='USERNAME', help='TV Provider Login with this account ID') adobe_pass.add_option( '--ap-password', - dest='ap_password', metavar='APPASSWORD', + dest='ap_password', metavar='PASSWORD', help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') adobe_pass.add_option( - '--list-ap-mso-ids', - action='store_true', dest='list_ap_mso_ids', default=False, + '--ap-mso-list', + action='store_true', dest='ap_mso_list', default=False, help='List all supported TV Providers') video_format = optparse.OptionGroup(parser, 'Video Format Options') From 87148bb7110ed54ef50f0660dfe0a735cdede3ca Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 20:21:09 +0100 Subject: [PATCH 1673/3599] [adobepass] rename --ap-mso-list option to --ap-list-mso --- youtube_dl/__init__.py | 4 ++-- youtube_dl/options.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5614ef0fb..1cf3140a0 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -120,7 +120,7 @@ def _real_main(argv=None): desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) write_string(desc + '\n', out=sys.stdout) sys.exit(0) - if opts.ap_mso_list: + if opts.ap_list_mso: table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout) sys.exit(0) @@ -166,7 +166,7 @@ def _real_main(argv=None): else: opts.max_sleep_interval = opts.sleep_interval if opts.ap_mso and opts.ap_mso not in MSO_INFO: - parser.error('Unsupported TV Provider, use --ap-mso-list to get a list of supported TV Providers') + parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers') def parse_retries(retries): if retries in ('inf', 'infinite'): diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 46c326b3d..b2e863119 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -365,8 +365,8 @@ def parseOpts(overrideArguments=None): dest='ap_password', metavar='PASSWORD', help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') adobe_pass.add_option( - '--ap-mso-list', - action='store_true', dest='ap_mso_list', default=False, + '--ap-list-mso', + action='store_true', dest='ap_list_mso', default=False, help='List all supported TV Providers') video_format = optparse.OptionGroup(parser, 'Video Format Options') From c035dba19e815eca4a21f17918e96c2e2bd55d6b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 15 Sep 2016 08:12:12 +0100 Subject: [PATCH 1674/3599] [bellmedia] add support for more sites --- youtube_dl/extractor/{ctv.py => bellmedia.py} | 39 ++++++++++++++++--- youtube_dl/extractor/extractors.py | 2 +- 2 files changed, 35 insertions(+), 6 deletions(-) rename youtube_dl/extractor/{ctv.py => bellmedia.py} (54%) diff --git a/youtube_dl/extractor/ctv.py b/youtube_dl/extractor/bellmedia.py similarity index 54% rename from youtube_dl/extractor/ctv.py rename to youtube_dl/extractor/bellmedia.py index a1fe86316..32326ed9e 100644 --- a/youtube_dl/extractor/ctv.py +++ b/youtube_dl/extractor/bellmedia.py @@ -6,8 +6,25 @@ import re from .common import InfoExtractor -class CTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?P<domain>ctv|tsn|bnn|thecomedynetwork)\.ca/.*?(?:\bvid=|-vid|~|%7E)(?P<id>[0-9.]+)' +class BellMediaIE(InfoExtractor): + _VALID_URL = r'''(?x)https?://(?:www\.)? + (?P<domain> + (?: + ctv| + tsn| + bnn| + thecomedynetwork| + discovery| + discoveryvelocity| + sciencechannel| + investigationdiscovery| + animalplanet| + bravo| + mtv| + space + )\.ca| + much\.com + )/.*?(?:\bvid=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6})''' _TESTS = [{ 'url': 'http://www.ctv.ca/video/player?vid=706966', 'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', @@ -32,15 +49,27 @@ class CTVIE(InfoExtractor): }, { 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009', 'only_matching': True, + }, { + 'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016', + 'only_matching': True, + }, { + 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', + 'only_matching': True, }] + _DOMAINS = { + 'thecomedynetwork': 'comedy', + 'discoveryvelocity': 'discvel', + 'sciencechannel': 'discsci', + 'investigationdiscovery': 'invdisc', + 'animalplanet': 'aniplan', + } def _real_extract(self, url): domain, video_id = re.match(self._VALID_URL, url).groups() - if domain == 'thecomedynetwork': - domain = 'comedy' + domain = domain.split('.')[0] return { '_type': 'url_transparent', 'id': video_id, - 'url': '9c9media:%s_web:%s' % (domain, video_id), + 'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id), 'ie_key': 'NineCNineMedia', } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 522691de1..dd0579425 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -93,6 +93,7 @@ from .bbc import ( ) from .beeg import BeegIE from .behindkink import BehindKinkIE +from .bellmedia import BellMediaIE from .beatportpro import BeatportProIE from .bet import BetIE from .bigflix import BigflixIE @@ -195,7 +196,6 @@ from .crunchyroll import ( ) from .cspan import CSpanIE from .ctsnews import CtsNewsIE -from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( From 95be29e1c6b7a06ac444d5142582ebece79698ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 20:58:02 +0700 Subject: [PATCH 1675/3599] [twitch] Fix api calls (Closes #10654, closes #10660) --- youtube_dl/extractor/twitch.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 359a8859c..af6d890b0 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -32,6 +32,7 @@ class TwitchBaseIE(InfoExtractor): _API_BASE = 'https://api.twitch.tv' _USHER_BASE = 'https://usher.ttvnw.net' _LOGIN_URL = 'http://www.twitch.tv/login' + _CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6' _NETRC_MACHINE = 'twitch' def _handle_error(self, response): @@ -44,15 +45,9 @@ class TwitchBaseIE(InfoExtractor): expected=True) def _call_api(self, path, item_id, note): - headers = { - 'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2', - 'X-Requested-With': 'XMLHttpRequest', - } - for cookie in self._downloader.cookiejar: - if cookie.name == 'api_token': - headers['Twitch-Api-Token'] = cookie.value response = self._download_json( - '%s/%s' % (self._API_BASE, path), item_id, note) + '%s/%s' % (self._API_BASE, path), item_id, note, + headers={'Client-ID': self._CLIENT_ID}) self._handle_error(response) return response From eb5b1fc0211e89f386c4f5563cc1d5d4edeb3c55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 21:53:35 +0700 Subject: [PATCH 1676/3599] [crunchyroll] Fix authentication (Closes #10655) --- youtube_dl/extractor/crunchyroll.py | 47 +++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 6d3abb52f..1b69bd0b6 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -34,22 +34,51 @@ from ..aes import ( class CrunchyrollBaseIE(InfoExtractor): + _LOGIN_URL = 'https://www.crunchyroll.com/login' + _LOGIN_FORM = 'login_form' _NETRC_MACHINE = 'crunchyroll' def _login(self): (username, password) = self._get_login_info() if username is None: return - self.report_login() - login_url = 'https://www.crunchyroll.com/?a=formhandler' - data = urlencode_postdata({ - 'formname': 'RpcApiUser_Login', - 'name': username, - 'password': password, + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form_str = self._search_regex( + r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM, + login_page, 'login form', group='form') + + post_url = extract_attributes(login_form_str).get('action') + if not post_url: + post_url = self._LOGIN_URL + elif not post_url.startswith('http'): + post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + + login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page) + + login_form.update({ + 'login_form[name]': username, + 'login_form[password]': password, }) - login_request = sanitized_Request(login_url, data) - login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - self._download_webpage(login_request, None, False, 'Wrong login info') + + response = self._download_webpage( + post_url, None, 'Logging in', 'Wrong login info', + data=urlencode_postdata(login_form), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) + + # Successful login + if '<title>Redirecting' in response: + return + + error = self._html_search_regex( + '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + + raise ExtractorError('Unable to log in') def _real_initialize(self): self._login() From c8498368549048a578d5f30773aaa9760454983c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 21:54:48 +0700 Subject: [PATCH 1677/3599] [utils] Improve _hidden_inputs --- youtube_dl/extractor/common.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ff19270ae..e413799f9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -888,16 +888,16 @@ class InfoExtractor(object): def _hidden_inputs(html): html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html) hidden_inputs = {} - for input in re.findall(r'(?i)<input([^>]+)>', html): - if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): + for input in re.findall(r'(?i)(<input[^>]+>)', html): + attrs = extract_attributes(input) + if not input: continue - name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input) - if not name: + if attrs.get('type') not in ('hidden', 'submit'): continue - value = re.search(r'value=(["\'])(?P<value>.*?)\1', input) - if not value: - continue - hidden_inputs[name.group('value')] = value.group('value') + name = attrs.get('name') or attrs.get('id') + value = attrs.get('value') + if name and value is not None: + hidden_inputs[name] = value return hidden_inputs def _form_hidden_inputs(self, form_id, html): From 537f753399ed9fd07fcb9285a2a3330010394c85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:17:17 +0700 Subject: [PATCH 1678/3599] [options] Improve Adobe Pass wording --- youtube_dl/options.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index b2e863119..100d21310 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -355,19 +355,19 @@ def parseOpts(overrideArguments=None): adobe_pass.add_option( '--ap-mso', dest='ap_mso', metavar='MSO', - help='Adobe Pass Multiple-system operator Identifier') + help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs') adobe_pass.add_option( '--ap-username', dest='ap_username', metavar='USERNAME', - help='TV Provider Login with this account ID') + help='Multiple-system operator account login') adobe_pass.add_option( '--ap-password', dest='ap_password', metavar='PASSWORD', - help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') + help='Multiple-system operator account password. If this option is left out, youtube-dl will ask interactively.') adobe_pass.add_option( '--ap-list-mso', action='store_true', dest='ap_list_mso', default=False, - help='List all supported TV Providers') + help='List all supported multiple-system operators') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From d2522b86ac7d1eff1f00e21bcd976a2616b6a6d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:18:31 +0700 Subject: [PATCH 1679/3599] [options] Actually print Adobe Pass options sections in --help --- youtube_dl/options.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 100d21310..53497fbc6 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -831,6 +831,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(video_format) parser.add_option_group(subtitles) parser.add_option_group(authentication) + parser.add_option_group(adobe_pass) parser.add_option_group(postproc) if overrideArguments is not None: From 1da50aa34e9fa0fd927de8197dcf2884551dd800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:24:55 +0700 Subject: [PATCH 1680/3599] [YoutubeDL] Improve Adobe Pass options' wording --- youtube_dl/YoutubeDL.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 29d8517a3..442aa663b 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,9 +131,9 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. - ap_mso: Adobe Pass Multiple-system operator Identifier. - ap_username: TV Provider username for authentication purposes. - ap_password: TV Provider password for authentication purposes. + ap_mso: Adobe Pass multiple-system operator identifier. + ap_username: Multiple-system operator account username. + ap_password: Multiple-system operator account password. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. From 2133565cec3646680600d314b93e535f6fa52339 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:26:37 +0700 Subject: [PATCH 1681/3599] [extractor/common] Simplify _get_login_info --- youtube_dl/extractor/common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e413799f9..9627816b4 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -689,8 +689,6 @@ class InfoExtractor(object): if self._downloader is None: return (None, None) - username = None - password = None downloader_params = self._downloader.params # Attempt to use provided username and password or .netrc data @@ -700,7 +698,7 @@ class InfoExtractor(object): else: username, password = self._get_netrc_login_info(netrc_machine) - return (username, password) + return username, password def _get_tfa_info(self, note='two-factor verification code'): """ From 32443dd346594d64b579af714f4828287492c464 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:34:29 +0700 Subject: [PATCH 1682/3599] [extractor/common] Update _get_login_info's comment --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9627816b4..95ea3fca5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -683,7 +683,10 @@ class InfoExtractor(object): def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): """ Get the login info as (username, password) - It will look in the netrc file using the _NETRC_MACHINE value + First look for the manually specified credentials using username_option + and password_option as keys in params dictionary. If no such credentials + available look in the netrc file using the netrc_machine or _NETRC_MACHINE + value. If there's no info available, return (None, None) """ if self._downloader is None: From dcce092e0aa92799f1e3a51ce5aae611af4d70d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:35:12 +0700 Subject: [PATCH 1683/3599] [extractor/common] Simplify _get_netrc_login_info and carry long lines --- youtube_dl/extractor/common.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 95ea3fca5..4f738b9fc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -674,11 +674,13 @@ class InfoExtractor(object): username = info[0] password = info[2] else: - raise netrc.NetrcParseError('No authenticators for %s' % netrc_machine) + raise netrc.NetrcParseError( + 'No authenticators for %s' % netrc_machine) except (IOError, netrc.NetrcParseError) as err: - self._downloader.report_warning('parsing .netrc: %s' % error_to_compat_str(err)) + self._downloader.report_warning( + 'parsing .netrc: %s' % error_to_compat_str(err)) - return (username, password) + return username, password def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): """ From 1dec2c8a0e00e8ed53ddd030347ce9225df9964e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 22:47:45 +0700 Subject: [PATCH 1684/3599] [adobepass] Change mvpd cache section name In order to better emphasize it's relation to Adobe Pass --- youtube_dl/extractor/adobepass.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 8ef5a96ce..01932e5e6 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -32,6 +32,7 @@ MSO_INFO = { class AdobePassIE(InfoExtractor): _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' + _MVPD_CACHE = 'ap-mvpd' @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): @@ -85,7 +86,7 @@ class AdobePassIE(InfoExtractor): guid = xml_text(resource, 'guid') count = 0 while count < 2: - requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} + requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token and is_expired(authn_token, 'simpleTokenExpires'): authn_token = None @@ -125,12 +126,12 @@ class AdobePassIE(InfoExtractor): 'requestor_id': requestor_id, }), headers=mvpd_headers) if '<pendingLogout' in session: - self._downloader.cache.store('mvpd', requestor_id, {}) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue authn_token = unescapeHTML(xml_text(session, 'authnToken')) requestor_info['authn_token'] = authn_token - self._downloader.cache.store('mvpd', requestor_id, requestor_info) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) authz_token = requestor_info.get(guid) if authz_token and is_expired(authz_token, 'simpleTokenTTL'): @@ -146,12 +147,12 @@ class AdobePassIE(InfoExtractor): 'userMeta': '1', }), headers=mvpd_headers) if '<pendingLogout' in authorize: - self._downloader.cache.store('mvpd', requestor_id, {}) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) requestor_info[guid] = authz_token - self._downloader.cache.store('mvpd', requestor_id, requestor_info) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info) mvpd_headers.update({ 'ap_19': xml_text(authn_token, 'simpleSamlNameID'), @@ -167,7 +168,7 @@ class AdobePassIE(InfoExtractor): 'hashed_guid': 'false', }), headers=mvpd_headers) if '<pendingLogout' in short_authorize: - self._downloader.cache.store('mvpd', requestor_id, {}) + self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 continue return short_authorize From 490b755769a364ca0624390453e36321d5182d3e Mon Sep 17 00:00:00 2001 From: stepshal <nessento@openmailbox.org> Date: Wed, 14 Sep 2016 23:03:26 +0700 Subject: [PATCH 1685/3599] Improve some id regexes --- youtube_dl/extractor/canvas.py | 2 +- youtube_dl/extractor/nfl.py | 2 +- youtube_dl/extractor/npo.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index ef0691dcd..d183d5d52 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -71,7 +71,7 @@ class CanvasIE(InfoExtractor): webpage)).strip() video_id = self._html_search_regex( - r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'video id', group='id') + r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', group='id') data = self._download_json( 'https://mediazone.vrt.be/api/v1/%s/assets/%s' diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index 200874d68..3930d16f1 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -165,7 +165,7 @@ class NFLIE(InfoExtractor): group='config')) # For articles, the id in the url is not the video id video_id = self._search_regex( - r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>.+?)\1', + r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', default=video_id, group='id') config = self._download_json(config_url, video_id, 'Downloading player config') url_template = NFLIE.prepend_host( diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 87f5675c7..3293bdb17 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -429,7 +429,7 @@ class SchoolTVIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) video_id = self._search_regex( - r'data-mid=(["\'])(?P<id>.+?)\1', webpage, 'video_id', group='id') + r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video_id', group='id') return { '_type': 'url_transparent', 'ie_key': 'NPO', From e6bf3621e703a7cd0d62736a1765b0ccff5adfe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 23:31:16 +0700 Subject: [PATCH 1686/3599] [ChangeLog] Actualize --- ChangeLog | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index c3c8bf037..cd1f2fdf1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,26 @@ version <unreleased> +Core +* Improve _hidden_inputs ++ Introduce improved explicit Adobe Pass support ++ Add --ap-mso to provide multiple-system operator identifier ++ Add --ap-username to provide MSO account username ++ Add --ap-password to provide MSO account password ++ Add --ap-list-mso to list all supported MSOs ++ Add support for Rogers Cable multiple-system operator (#10606) + Extractors -* [kwuo] Improve error detection (#10650) +* [crunchyroll] Fix authentication (#10655) +* [twitch] Fix API calls (#10654, #10660) ++ [bellmedia] Add support for more Bell Media Television sites +* [franceinter] Fix extraction (#10538, #2105) +* [kuwo] Improve error detection (#10650) ++ [go] Add support for free full episodes (#10439) * [bilibili] Fix extraction for specific videos (#10647) +* [nhk] Fix extraction (#10633) +* [kaltura] Improve audio detection +* [kaltura] Skip chun format ++ [vimeo:ondemand] Pass Referer along with embed URL (#10624) + [nbc] Add support for NBC Olympics (#10361) From f5e008d134f5e69920829cfd7a5ce5ae57d275c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 15 Sep 2016 23:46:11 +0700 Subject: [PATCH 1687/3599] release 2016.09.15 --- .github/ISSUE_TEMPLATE.md | 8 ++++---- ChangeLog | 2 +- README.md | 11 +++++++++++ docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e87fed573..61cea757c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.15** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.11.1 +[debug] youtube-dl version 2016.09.15 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} @@ -55,4 +55,4 @@ $ youtube-dl -v <your command line> ### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* required an account credentials please provide them or explain how one can obtain them. +If work on your *issue* requires account credentials please provide them or explain how one can obtain them. diff --git a/ChangeLog b/ChangeLog index cd1f2fdf1..4583537ac 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.09.15 Core * Improve _hidden_inputs diff --git a/README.md b/README.md index 7543f81ac..4debe15fe 100644 --- a/README.md +++ b/README.md @@ -358,6 +358,17 @@ which means you can modify it, redistribute it or use it however you like. -n, --netrc Use .netrc authentication data --video-password PASSWORD Video password (vimeo, smotri, youku) +## Adobe Pass Options: + --ap-mso MSO Adobe Pass multiple-system operator (TV + provider) identifier, use --ap-list-mso for + a list of available MSOs + --ap-username USERNAME Multiple-system operator account login + --ap-password PASSWORD Multiple-system operator account password. + If this option is left out, youtube-dl will + ask interactively. + --ap-list-mso List all supported multiple-system + operators + ## Post-processing Options: -x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 7a7b268d3..fcb618561 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -89,6 +89,7 @@ - **BeatportPro** - **Beeg** - **BehindKink** + - **BellMedia** - **Bet** - **Bigflix** - **Bild**: Bild.de @@ -169,7 +170,6 @@ - **CSNNE** - **CSpan**: C-SPAN - **CtsNews**: 華視新聞 - - **CTV** - **CTVNews** - **culturebox.francetvinfo.fr** - **CultureUnplugged** @@ -445,6 +445,7 @@ - **NBA** - **NBC** - **NBCNews** + - **NBCOlympics** - **NBCSports** - **NBCSportsVPlayer** - **ndr**: NDR.de - Norddeutscher Rundfunk diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 903aede58..081fd6ef0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.11.1' +__version__ = '2016.09.15' From 9d8985a165ebdc9fd8d72e7536253c42162b58a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 16 Sep 2016 00:54:34 +0700 Subject: [PATCH 1688/3599] [tv4] Fix hls and hds formats (Closes #10659) --- youtube_dl/extractor/tv4.py | 49 ++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 343edf206..5d2d8f132 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -2,9 +2,13 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, + int_or_none, parse_iso8601, + try_get, + update_url_query, ) @@ -65,36 +69,47 @@ class TV4IE(InfoExtractor): video_id = self._match_id(url) info = self._download_json( - 'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON') + 'http://www.tv4play.se/player/assets/%s.json' % video_id, + video_id, 'Downloading video info JSON') # If is_geo_restricted is true, it doesn't necessarily mean we can't download it - if info['is_geo_restricted']: + if info.get('is_geo_restricted'): self.report_warning('This content might not be available in your country due to licensing restrictions.') - if info['requires_subscription']: + if info.get('requires_subscription'): raise ExtractorError('This content requires subscription.', expected=True) - sources_data = self._download_json( - 'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON') - sources = sources_data['playback'] + title = info['title'] formats = [] - for item in sources.get('items', {}).get('item', []): - ext, bitrate = item['mediaFormat'], item['bitrate'] - formats.append({ - 'format_id': '%s_%s' % (ext, bitrate), - 'tbr': bitrate, - 'ext': ext, - 'url': item['url'], - }) + # http formats are linked with unresolvable host + for kind in ('hls', ''): + data = self._download_json( + 'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id, + video_id, 'Downloading sources JSON', query={ + 'protocol': kind, + 'videoFormat': 'MP4+WEBVTTS+WEBVTT', + }) + item = try_get(data, lambda x: x['playback']['items']['item'], dict) + manifest_url = item.get('url') + if not isinstance(manifest_url, compat_str): + continue + if kind == 'hls': + formats.extend(self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id=kind, fatal=False)) + else: + formats.extend(self._extract_f4m_formats( + update_url_query(manifest_url, {'hdcore': '3.8.0'}), + video_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) return { 'id': video_id, - 'title': info['title'], + 'title': title, 'formats': formats, 'description': info.get('description'), 'timestamp': parse_iso8601(info.get('broadcast_date_time')), - 'duration': info.get('duration'), + 'duration': int_or_none(info.get('duration')), 'thumbnail': info.get('image'), - 'is_live': sources.get('live'), + 'is_live': info.get('is_live') is True, } From 52dc8a9b3f1af7abda6652a75b906d70809c475d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 16 Sep 2016 22:02:59 +0700 Subject: [PATCH 1689/3599] [franceinter] Fix upload date extraction --- youtube_dl/extractor/franceinter.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 0d58f89c5..1a1232ade 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -10,14 +10,14 @@ class FranceInterIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)' _TEST = { - 'url': 'https://www.franceinter.fr/emissions/la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', - 'md5': '4764932e466e6f6c79c317d2e74f6884', + 'url': 'https://www.franceinter.fr/emissions/la-tete-au-carre/la-tete-au-carre-14-septembre-2016', + 'md5': '4e3aeb58fe0e83d7b0581fa213c409d0', 'info_dict': { - 'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', + 'id': 'la-tete-au-carre/la-tete-au-carre-14-septembre-2016', 'ext': 'mp3', - 'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter', - 'description': 'md5:7f2ce449894d1e585932273080fb410d', - 'upload_date': '20131218', + 'title': 'Et si les rêves pouvaient nous aider à agir dans notre vie quotidienne ?', + 'description': 'md5:a245dd62cf5bf51de915f8d9956d180a', + 'upload_date': '20160914', }, } @@ -39,7 +39,7 @@ class FranceInterIE(InfoExtractor): if upload_date_str: upload_date_list = upload_date_str.split() upload_date_list.reverse() - upload_date_list[1] = compat_str(month_by_name(upload_date_list[1], lang='fr')) + upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0) upload_date = ''.join(upload_date_list) else: upload_date = None From 98b7506e96b5ac107a777d8bb8900623d832fba4 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 17:36:22 +0100 Subject: [PATCH 1690/3599] [toutv] add support for authentication(closes #10669) --- youtube_dl/extractor/radiocanada.py | 55 ++++++++++++++++---------- youtube_dl/extractor/toutv.py | 60 ++++++++++++++++++++++++++++- 2 files changed, 92 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 8ec402646..6751270ee 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -13,6 +13,7 @@ from ..utils import ( xpath_element, ExtractorError, determine_protocol, + unsmuggle_url, ) @@ -35,28 +36,51 @@ class RadioCanadaIE(InfoExtractor): } def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) app_code, video_id = re.match(self._VALID_URL, url).groups() - device_types = ['ipad', 'android'] + metadata = self._download_xml( + 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', + video_id, note='Downloading metadata XML', query={ + 'appCode': app_code, + 'idMedia': video_id, + }) + + def get_meta(name): + el = find_xpath_attr(metadata, './/Meta', 'name', name) + return el.text if el is not None else None + + if get_meta('protectionType'): + raise ExtractorError('This video is DRM protected.', expected=True) + + device_types = ['ipad'] if app_code != 'toutv': device_types.append('flash') + if not smuggled_data: + device_types.append('android') formats = [] # TODO: extract f4m formats # f4m formats can be extracted using flashhd device_type but they produce unplayable file for device_type in device_types: - v_data = self._download_xml( - 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx', - video_id, note='Downloading %s XML' % device_type, query={ - 'appCode': app_code, - 'idMedia': video_id, - 'connectionType': 'broadband', - 'multibitrate': 'true', - 'deviceType': device_type, + validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' + query = { + 'appCode': app_code, + 'idMedia': video_id, + 'connectionType': 'broadband', + 'multibitrate': 'true', + 'deviceType': device_type, + } + if smuggled_data: + validation_url = 'https://services.radio-canada.ca/media/validation/v2/' + query.update(smuggled_data) + else: + query.update({ # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction 'paysJ391wsHjbOJwvCs26toz': 'CA', 'bypasslock': 'NZt5K62gRqfc', - }, fatal=False) + }) + v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False) v_url = xpath_text(v_data, 'url') if not v_url: continue @@ -101,17 +125,6 @@ class RadioCanadaIE(InfoExtractor): f4m_id='hds', fatal=False)) self._sort_formats(formats) - metadata = self._download_xml( - 'http://api.radio-canada.ca/metaMedia/v1/index.ashx', - video_id, note='Downloading metadata XML', query={ - 'appCode': app_code, - 'idMedia': video_id, - }) - - def get_meta(name): - el = find_xpath_attr(metadata, './/Meta', 'name', name) - return el.text if el is not None else None - return { 'id': video_id, 'title': get_meta('Title'), diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 54c2d0aa6..d2d5c1171 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -2,12 +2,22 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + js_to_json, + ExtractorError, + urlencode_postdata, + extract_attributes, + smuggle_url, +) class TouTvIE(InfoExtractor): + _NETRC_MACHINE = 'toutv' IE_NAME = 'tou.tv' _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)' + _access_token = None + _claims = None _TEST = { 'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', @@ -22,18 +32,64 @@ class TouTvIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + 'skip': '404 Not Found', } + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + return + state = 'http://ici.tou.tv//' + webpage = self._download_webpage(state, None, 'Downloading homepage') + toutvlogin = self._parse_json(self._search_regex( + r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json) + authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize' + login_webpage = self._download_webpage( + authorize_url, None, 'Downloading login page', query={ + 'client_id': toutvlogin['clientId'], + 'redirect_uri': 'https://ici.tou.tv/login/loginCallback', + 'response_type': 'token', + 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged', + 'state': state, + }) + login_form = self._search_regex( + r'(?s)(<form[^>]+id="Form-login".+?</form>)', login_webpage, 'login form') + form_data = self._hidden_inputs(login_form) + form_data.update({ + 'login-email': email, + 'login-password': password, + }) + post_url = extract_attributes(login_form).get('action') or authorize_url + _, urlh = self._download_webpage_handle( + post_url, None, 'Logging in', data=urlencode_postdata(form_data)) + self._access_token = self._search_regex( + r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', + urlh.geturl(), 'access token') + self._claims = self._download_json( + 'https://services.radio-canada.ca/media/validation/v2/getClaims', + None, 'Extracting Claims', query={ + 'token': self._access_token, + 'access_token': self._access_token, + })['claims'] + def _real_extract(self, url): path = self._match_id(url) metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path) + if metadata.get('IsDrm'): + raise ExtractorError('This video is DRM protected.', expected=True) video_id = metadata['IdMedia'] details = metadata['Details'] title = details['OriginalTitle'] + video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id) + if self._access_token and self._claims: + video_url = smuggle_url(video_url, { + 'access_token': self._access_token, + 'claims': self._claims, + }) return { '_type': 'url_transparent', - 'url': 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id), + 'url': video_url, 'id': video_id, 'title': title, 'thumbnail': details.get('ImageUrl'), From 6ad0219556cefe60239027633193cc9f1dc9fb1d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 19:30:38 +0100 Subject: [PATCH 1691/3599] [common] add helper method for Wowza Streaming Engine format extraction --- youtube_dl/extractor/common.py | 43 +++++++++++++++++++++++++++++ youtube_dl/extractor/vier.py | 4 +-- youtube_dl/extractor/vodplatform.py | 25 ++--------------- youtube_dl/extractor/vrt.py | 40 ++++++--------------------- 4 files changed, 55 insertions(+), 57 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4f738b9fc..c00023458 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1795,6 +1795,49 @@ class InfoExtractor(object): m3u8_id='hls', fatal=False)) return formats + def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]): + url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url) + url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url') + http_base_url = 'http' + url_base + formats = [] + if 'm3u8' not in skip_protocols: + formats.extend(self._extract_m3u8_formats( + http_base_url + '/playlist.m3u8', video_id, 'mp4', + m3u8_entry_protocol, m3u8_id='hls', fatal=False)) + if 'f4m' not in skip_protocols: + formats.extend(self._extract_f4m_formats( + http_base_url + '/manifest.f4m', + video_id, f4m_id='hds', fatal=False)) + if re.search(r'(?:/smil:|\.smil)', url_base): + if 'dash' not in skip_protocols: + formats.extend(self._extract_mpd_formats( + http_base_url + '/manifest.mpd', + video_id, mpd_id='dash', fatal=False)) + if 'smil' not in skip_protocols: + rtmp_formats = self._extract_smil_formats( + http_base_url + '/jwplayer.smil', + video_id, fatal=False) + for rtmp_format in rtmp_formats: + rtsp_format = rtmp_format.copy() + rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) + del rtsp_format['play_path'] + del rtsp_format['ext'] + rtsp_format.update({ + 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), + 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), + 'protocol': 'rtsp', + }) + formats.extend([rtmp_format, rtsp_format]) + else: + for protocol in ('rtmp', 'rtsp'): + if protocol not in skip_protocols: + formats.append({ + 'url': protocol + url_base, + 'format_id': protocol, + 'protocol': protocol, + }) + return formats + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 6645c6186..dc142a245 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -48,8 +48,8 @@ class VierIE(InfoExtractor): [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], webpage, 'filename') - playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename) - formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4') + playlist_url = 'http://vod.streamcloud.be/%s/_definst_/mp4:%s.mp4/playlist.m3u8' % (application, filename) + formats = self._extract_wowza_formats(playlist_url, display_id) self._sort_formats(formats) title = self._og_search_title(webpage, default=display_id) diff --git a/youtube_dl/extractor/vodplatform.py b/youtube_dl/extractor/vodplatform.py index 7bdd8b1dc..239644340 100644 --- a/youtube_dl/extractor/vodplatform.py +++ b/youtube_dl/extractor/vodplatform.py @@ -25,29 +25,8 @@ class VODPlatformIE(InfoExtractor): title = unescapeHTML(self._og_search_title(webpage)) hidden_inputs = self._hidden_inputs(webpage) - base_url = self._search_regex( - '(.*/)(?:playlist.m3u8|manifest.mpd)', - hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], - 'base url') - formats = self._extract_m3u8_formats( - base_url + 'playlist.m3u8', video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False) - formats.extend(self._extract_mpd_formats( - base_url + 'manifest.mpd', video_id, - mpd_id='dash', fatal=False)) - rtmp_formats = self._extract_smil_formats( - base_url + 'jwplayer.smil', video_id, fatal=False) - for rtmp_format in rtmp_formats: - rtsp_format = rtmp_format.copy() - rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) - del rtsp_format['play_path'] - del rtsp_format['ext'] - rtsp_format.update({ - 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), - 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), - 'protocol': 'rtsp', - }) - formats.extend([rtmp_format, rtsp_format]) + formats = self._extract_wowza_formats( + hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil']) self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index bec7ab327..00c72e346 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -5,7 +5,6 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, float_or_none, ) @@ -75,7 +74,6 @@ class VRTIE(InfoExtractor): }, { 'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055', - 'md5': '', 'info_dict': { 'id': '2377055', 'ext': 'mp4', @@ -119,39 +117,17 @@ class VRTIE(InfoExtractor): video_id, 'mp4', m3u8_id='hls', fatal=False)) if src: - if determine_ext(src) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - formats.extend(self._extract_f4m_formats( - src.replace('playlist.m3u8', 'manifest.f4m'), - video_id, f4m_id='hds', fatal=False)) - if 'data-video-geoblocking="true"' not in webpage: - rtmp_formats = self._extract_smil_formats( - src.replace('playlist.m3u8', 'jwplayer.smil'), - video_id, fatal=False) - formats.extend(rtmp_formats) - for rtmp_format in rtmp_formats: - rtmp_format_c = rtmp_format.copy() - rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) - del rtmp_format_c['play_path'] - del rtmp_format_c['ext'] - http_format = rtmp_format_c.copy() + formats = self._extract_wowza_formats(src, video_id) + if 'data-video-geoblocking="true"' not in webpage: + for f in formats: + if f['url'].startswith('rtsp://'): + http_format = f.copy() http_format.update({ - 'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''), - 'format_id': rtmp_format['format_id'].replace('rtmp', 'http'), + 'url': f['url'].replace('rtsp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''), + 'format_id': f['format_id'].replace('rtsp', 'http'), 'protocol': 'http', }) - rtsp_format = rtmp_format_c.copy() - rtsp_format.update({ - 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), - 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), - 'protocol': 'rtsp', - }) - formats.extend([http_format, rtsp_format]) - else: - formats.extend(self._extract_f4m_formats( - '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False)) + formats.append(http_format) if not formats and 'data-video-geoblocking="true"' in webpage: self.raise_geo_restricted('This video is only available in Belgium') From 7d273a387aade7665cd25eee69d94ee615d9a4b9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 19:31:39 +0100 Subject: [PATCH 1692/3599] [mangomolo] add support for Mangomolo embeds --- youtube_dl/extractor/awaan.py | 63 ++++++++++++------------------ youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/generic.py | 29 ++++++++++++++ youtube_dl/extractor/mangomolo.py | 54 +++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 39 deletions(-) create mode 100644 youtube_dl/extractor/mangomolo.py diff --git a/youtube_dl/extractor/awaan.py b/youtube_dl/extractor/awaan.py index bdf23c6a9..66d7515bc 100644 --- a/youtube_dl/extractor/awaan.py +++ b/youtube_dl/extractor/awaan.py @@ -50,25 +50,6 @@ class AWAANBaseIE(InfoExtractor): 'is_live': is_live, } - def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol): - formats = [] - format_url_base = 'http' + self._html_search_regex( - [ - r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8', - r'<a[^>]+href="rtsp(://[^"]+)"' - ], webpage, 'format url') - formats.extend(self._extract_mpd_formats( - format_url_base + '/manifest.mpd', - video_id, mpd_id='dash', fatal=False)) - formats.extend(self._extract_m3u8_formats( - format_url_base + '/playlist.m3u8', video_id, 'mp4', - m3u8_entry_protocol, m3u8_id='hls', fatal=False)) - formats.extend(self._extract_f4m_formats( - format_url_base + '/manifest.f4m', - video_id, f4m_id='hds', fatal=False)) - self._sort_formats(formats) - return formats - class AWAANVideoIE(AWAANBaseIE): IE_NAME = 'awaan:video' @@ -99,16 +80,18 @@ class AWAANVideoIE(AWAANBaseIE): video_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(video_data, video_id, False) - webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + - compat_urllib_parse_urlencode({ - 'id': video_data['id'], - 'user_id': video_data['user_id'], - 'signature': video_data['signature'], - 'countries': 'Q0M=', - 'filter': 'DENY', - }), video_id) - info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native') + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({ + 'id': video_data['id'], + 'user_id': video_data['user_id'], + 'signature': video_data['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }) + info.update({ + '_type': 'url_transparent', + 'url': embed_url, + 'ie_key': 'MangomoloVideo', + }) return info @@ -138,16 +121,18 @@ class AWAANLiveIE(AWAANBaseIE): channel_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(channel_data, channel_id, True) - webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + - compat_urllib_parse_urlencode({ - 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), - 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), - 'signature': channel_data['signature'], - 'countries': 'Q0M=', - 'filter': 'DENY', - }), channel_id) - info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8') + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({ + 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), + 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), + 'signature': channel_data['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }) + info.update({ + '_type': 'url_transparent', + 'url': embed_url, + 'ie_key': 'MangomoloLive', + }) return info diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dd0579425..4baf4cd48 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -472,6 +472,10 @@ from .macgamestore import MacGameStoreIE from .mailru import MailRuIE from .makerschannel import MakersChannelIE from .makertv import MakerTVIE +from .mangomolo import ( + MangomoloVideoIE, + MangomoloLiveIE, +) from .matchtv import MatchTVIE from .mdr import MDRIE from .meta import METAIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2e46ca179..e01305942 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2254,6 +2254,35 @@ class GenericIE(InfoExtractor): return self.url_result( self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform') + # Look for Mangomolo embeds + mobj = re.search( + r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo.com/analytics/index\.php/customers/embed/ + (?: + video\?.*?\bid=(?P<video_id>\d+)| + index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+) + ).+?)\1''', webpage) + if mobj is not None: + info = { + '_type': 'url_transparent', + 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))), + 'title': video_title, + 'description': video_description, + 'thumbnail': video_thumbnail, + 'uploader': video_uploader, + } + video_id = mobj.group('video_id') + if video_id: + info.update({ + 'ie_key': 'MangomoloVideo', + 'id': video_id, + }) + else: + info.update({ + 'ie_key': 'MangomoloLive', + 'id': mobj.group('channel_id'), + }) + return info + # Look for Instagram embeds instagram_embed_url = InstagramIE._extract_embed_url(webpage) if instagram_embed_url is not None: diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py new file mode 100644 index 000000000..8cac8ace2 --- /dev/null +++ b/youtube_dl/extractor/mangomolo.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote +from ..utils import ( + int_or_none, +) + + +class MangomoloBaseIE(InfoExtractor): + def _get_real_id(self, page_id): + return page_id + + def _real_extract(self, url): + page_id = self._get_real_id(self._match_id(url)) + webpage = self._download_webpage(url, page_id) + hidden_inputs = self._hidden_inputs(webpage) + m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native' + + format_url = self._html_search_regex( + [ + r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)', + r'<a[^>]+href="(rtsp://[^"]+)"' + ], webpage, 'format url') + formats = self._extract_wowza_formats( + format_url, page_id, m3u8_entry_protocol, ['smil']) + self._sort_formats(formats) + + return { + 'id': page_id, + 'title': self._live_title(page_id) if self._IS_LIVE else page_id, + 'uploader_id': hidden_inputs.get('userid'), + 'duration': int_or_none(hidden_inputs.get('duration')), + 'is_live': self._IS_LIVE, + 'formats': formats, + } + + +class MangomoloVideoIE(MangomoloBaseIE): + IENAME = 'mangomolo:video' + _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' + _IS_LIVE = False + + +class MangomoloLiveIE(MangomoloBaseIE): + IENAME = 'mangomolo:live' + _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' + _IS_LIVE = True + + def _get_real_id(self, page_id): + return base64.b64decode(compat_urllib_parse_unquote(page_id).encode()).decode() From fc86d4eed0bf10f8f90326472811e5b4d4ad4bd9 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 20:10:47 +0100 Subject: [PATCH 1693/3599] [mangomolo] fix typo --- youtube_dl/extractor/mangomolo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index 8cac8ace2..2db503f2b 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -40,13 +40,13 @@ class MangomoloBaseIE(InfoExtractor): class MangomoloVideoIE(MangomoloBaseIE): - IENAME = 'mangomolo:video' + IE_NAME = 'mangomolo:video' _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' _IS_LIVE = False class MangomoloLiveIE(MangomoloBaseIE): - IENAME = 'mangomolo:live' + IE_NAME = 'mangomolo:live' _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' _IS_LIVE = True From 30d9e20938fa91ece09c376b67030647215d48df Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Fri, 16 Sep 2016 22:06:55 +0100 Subject: [PATCH 1694/3599] [postprocessor/ffmpeg] apply FFmpegFixupM3u8PP only for videos with aac codec(#5591) --- youtube_dl/postprocessor/ffmpeg.py | 63 +++++++++++++++--------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index fa99b0c2a..8d1214ee2 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -139,6 +139,30 @@ class FFmpegPostProcessor(PostProcessor): def probe_executable(self): return self._paths[self.probe_basename] + def get_audio_codec(self, path): + if not self.probe_available: + raise PostProcessingError('ffprobe or avprobe not found. Please install one.') + try: + cmd = [ + encodeFilename(self.probe_executable, True), + encodeArgument('-show_streams'), + encodeFilename(self._ffmpeg_filename_argument(path), True)] + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) + handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) + output = handle.communicate()[0] + if handle.wait() != 0: + return None + except (IOError, OSError): + return None + audio_codec = None + for line in output.decode('ascii', 'ignore').split('\n'): + if line.startswith('codec_name='): + audio_codec = line.split('=')[1].strip() + elif line.strip() == 'codec_type=audio' and audio_codec is not None: + return audio_codec + return None + def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): self.check_version() @@ -188,31 +212,6 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): self._preferredquality = preferredquality self._nopostoverwrites = nopostoverwrites - def get_audio_codec(self, path): - - if not self.probe_available: - raise PostProcessingError('ffprobe or avprobe not found. Please install one.') - try: - cmd = [ - encodeFilename(self.probe_executable, True), - encodeArgument('-show_streams'), - encodeFilename(self._ffmpeg_filename_argument(path), True)] - if self._downloader.params.get('verbose', False): - self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) - handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) - output = handle.communicate()[0] - if handle.wait() != 0: - return None - except (IOError, OSError): - return None - audio_codec = None - for line in output.decode('ascii', 'ignore').split('\n'): - if line.startswith('codec_name='): - audio_codec = line.split('=')[1].strip() - elif line.strip() == 'codec_type=audio' and audio_codec is not None: - return audio_codec - return None - def run_ffmpeg(self, path, out_path, codec, more_opts): if codec is None: acodec_opts = [] @@ -504,15 +503,15 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor): class FFmpegFixupM3u8PP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] - temp_filename = prepend_extension(filename, 'temp') + if self.get_audio_codec(filename) == 'aac': + temp_filename = prepend_extension(filename, 'temp') - options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] - self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename) - self.run_ffmpeg(filename, temp_filename, options) - - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] + self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename) + self.run_ffmpeg(filename, temp_filename, options) + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return [], info From d05ef09d9d94fa70335af5fbaab385b37b16d705 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 17 Sep 2016 08:11:01 +0100 Subject: [PATCH 1695/3599] [mangomolo] fix domain regex --- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/mangomolo.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e01305942..92a6e5146 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2256,7 +2256,7 @@ class GenericIE(InfoExtractor): # Look for Mangomolo embeds mobj = re.search( - r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo.com/analytics/index\.php/customers/embed/ + r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/ (?: video\?.*?\bid=(?P<video_id>\d+)| index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+) diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index 2db503f2b..1885ac7df 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -41,13 +41,13 @@ class MangomoloBaseIE(InfoExtractor): class MangomoloVideoIE(MangomoloBaseIE): IE_NAME = 'mangomolo:video' - _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' + _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)' _IS_LIVE = False class MangomoloLiveIE(MangomoloBaseIE): IE_NAME = 'mangomolo:live' - _VALID_URL = r'https?://admin\.mangomolo.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' + _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' _IS_LIVE = True def _get_real_id(self, page_id): From c51a7f0b2f2454bfe0b53f9d79567b3210e015b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 15:44:37 +0700 Subject: [PATCH 1696/3599] [franceinter] Fix upload date extraction --- youtube_dl/extractor/franceinter.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 1a1232ade..707b9e00d 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str from ..utils import month_by_name @@ -10,14 +9,14 @@ class FranceInterIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)' _TEST = { - 'url': 'https://www.franceinter.fr/emissions/la-tete-au-carre/la-tete-au-carre-14-septembre-2016', - 'md5': '4e3aeb58fe0e83d7b0581fa213c409d0', + 'url': 'https://www.franceinter.fr/emissions/affaires-sensibles/affaires-sensibles-07-septembre-2016', + 'md5': '9e54d7bdb6fdc02a841007f8a975c094', 'info_dict': { - 'id': 'la-tete-au-carre/la-tete-au-carre-14-septembre-2016', + 'id': 'affaires-sensibles/affaires-sensibles-07-septembre-2016', 'ext': 'mp3', - 'title': 'Et si les rêves pouvaient nous aider à agir dans notre vie quotidienne ?', - 'description': 'md5:a245dd62cf5bf51de915f8d9956d180a', - 'upload_date': '20160914', + 'title': 'Affaire Cahuzac : le contentieux du compte en Suisse', + 'description': 'md5:401969c5d318c061f86bda1fa359292b', + 'upload_date': '20160907', }, } @@ -40,6 +39,7 @@ class FranceInterIE(InfoExtractor): upload_date_list = upload_date_str.split() upload_date_list.reverse() upload_date_list[1] = '%02d' % (month_by_name(upload_date_list[1], lang='fr') or 0) + upload_date_list[2] = '%02d' % int(upload_date_list[2]) upload_date = ''.join(upload_date_list) else: upload_date = None From e14c82bd6b6cfc1e904b067350d818657c911e07 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 17 Sep 2016 18:45:08 +0800 Subject: [PATCH 1697/3599] [jwplatform] Use js_to_json to detect more JWPlayers --- ChangeLog | 6 ++++++ youtube_dl/extractor/jwplatform.py | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4583537ac..a9f7cee53 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [jwplatform] Improve JWPlayer detection + + version 2016.09.15 Core diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 7aaa65476..38199fcd0 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -9,6 +9,7 @@ from ..utils import ( determine_ext, float_or_none, int_or_none, + js_to_json, mimetype2ext, ) @@ -19,14 +20,15 @@ class JWPlatformBaseIE(InfoExtractor): # TODO: Merge this with JWPlayer-related codes in generic.py mobj = re.search( - 'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)', + r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)', webpage) if mobj: return mobj.group('options') def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): jwplayer_data = self._parse_json( - self._find_jwplayer_data(webpage), video_id) + self._find_jwplayer_data(webpage), video_id, + transform_source=js_to_json) return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) From 584d6f3457205b547c8969f11eade117f871ec8f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 17 Sep 2016 18:46:43 +0800 Subject: [PATCH 1698/3599] [thisav] Recognize jwplayers (closes #10447) --- ChangeLog | 1 + youtube_dl/extractor/thisav.py | 39 ++++++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index a9f7cee53..b0a65bde2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors ++ [thisav] Recognize HTML5 videos (#10447) * [jwplatform] Improve JWPlayer detection diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index 7f323c938..027a8e907 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -3,13 +3,12 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor -from ..utils import determine_ext +from .jwplatform import JWPlatformBaseIE -class ThisAVIE(InfoExtractor): +class ThisAVIE(JWPlatformBaseIE): _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' - _TEST = { + _TESTS = [{ 'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html', 'md5': '0480f1ef3932d901f0e0e719f188f19b', 'info_dict': { @@ -19,7 +18,17 @@ class ThisAVIE(InfoExtractor): 'uploader': 'dj7970', 'uploader_id': 'dj7970' } - } + }, { + 'url': 'http://www.thisav.com/video/242352/nerdy-18yo-big-ass-tattoos-and-glasses.html', + 'md5': 'ba90c076bd0f80203679e5b60bf523ee', + 'info_dict': { + 'id': '242352', + 'ext': 'mp4', + 'title': 'Nerdy 18yo Big Ass Tattoos and Glasses', + 'uploader': 'cybersluts', + 'uploader_id': 'cybersluts', + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -28,20 +37,28 @@ class ThisAVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title') video_url = self._html_search_regex( - r"addVariable\('file','([^']+)'\);", webpage, 'video url') + r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None) + if video_url: + info_dict = { + 'formats': [{ + 'url': video_url, + }], + } + else: + info_dict = self._extract_jwplayer_data( + webpage, video_id, require_title=False) uploader = self._html_search_regex( r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>', webpage, 'uploader name', fatal=False) uploader_id = self._html_search_regex( r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>', webpage, 'uploader id', fatal=False) - ext = determine_ext(video_url) - return { + info_dict.update({ 'id': video_id, - 'url': video_url, 'uploader': uploader, 'uploader_id': uploader_id, 'title': title, - 'ext': ext, - } + }) + + return info_dict From a0d5077c8dfa9fa31ebf3e63fdb1b2a7a5182a81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 6 Sep 2016 01:18:57 +0700 Subject: [PATCH 1699/3599] [extractor/common] Introduce fragments interface --- youtube_dl/extractor/common.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c00023458..566ed7a4d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -86,7 +86,9 @@ class InfoExtractor(object): from worst to best quality. Potential fields: - * url Mandatory. The URL of the video file + * url Mandatory. The URL of the video file or URL of + the manifest file in case of fragmented media + (DASH, hls, hds). * ext Will be calculated from URL if missing * format A human-readable description of the format ("mp4 container with h264/opus"). @@ -115,6 +117,11 @@ class InfoExtractor(object): download, lower-case. "http", "https", "rtsp", "rtmp", "rtmpe", "m3u8", "m3u8_native" or "http_dash_segments". + * fragments A list of fragments of the fragmented media, + with the following entries: + * "url" (mandatory) - fragment's URL + * "duration" (optional, int or float) + * "filesize" (optional, int) * preference Order number of this format. If this field is present and not None, the formats get sorted by this field, regardless of all other values. From b4c1d6e800a5b28accf4ba588b8fa3f0c420ce13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 6 Sep 2016 01:21:57 +0700 Subject: [PATCH 1700/3599] [extractor/common] Expose fragments interface for dashsegments formats --- youtube_dl/extractor/common.py | 142 +++++++++++++++++++++++---------- 1 file changed, 99 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 566ed7a4d..e637b33d5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1551,42 +1551,52 @@ class InfoExtractor(object): def extract_multisegment_info(element, ms_parent_info): ms_info = ms_parent_info.copy() + + # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some + # common attributes and elements. We will only extract relevant + # for us. + def extract_common(source): + segment_timeline = source.find(_add_ns('SegmentTimeline')) + if segment_timeline is not None: + s_e = segment_timeline.findall(_add_ns('S')) + if s_e: + ms_info['total_number'] = 0 + ms_info['s'] = [] + for s in s_e: + r = int(s.get('r', 0)) + ms_info['total_number'] += 1 + r + ms_info['s'].append({ + 't': int(s.get('t', 0)), + # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60]) + 'd': int(s.attrib['d']), + 'r': r, + }) + start_number = source.get('startNumber') + if start_number: + ms_info['start_number'] = int(start_number) + timescale = source.get('timescale') + if timescale: + ms_info['timescale'] = int(timescale) + segment_duration = source.get('duration') + if segment_duration: + ms_info['segment_duration'] = int(segment_duration) + + def extract_Initialization(source): + initialization = source.find(_add_ns('Initialization')) + if initialization is not None: + ms_info['initialization_url'] = initialization.attrib['sourceURL'] + segment_list = element.find(_add_ns('SegmentList')) if segment_list is not None: + extract_common(segment_list) + extract_Initialization(segment_list) segment_urls_e = segment_list.findall(_add_ns('SegmentURL')) if segment_urls_e: ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e] - initialization = segment_list.find(_add_ns('Initialization')) - if initialization is not None: - ms_info['initialization_url'] = initialization.attrib['sourceURL'] else: segment_template = element.find(_add_ns('SegmentTemplate')) if segment_template is not None: - start_number = segment_template.get('startNumber') - if start_number: - ms_info['start_number'] = int(start_number) - segment_timeline = segment_template.find(_add_ns('SegmentTimeline')) - if segment_timeline is not None: - s_e = segment_timeline.findall(_add_ns('S')) - if s_e: - ms_info['total_number'] = 0 - ms_info['s'] = [] - for s in s_e: - r = int(s.get('r', 0)) - ms_info['total_number'] += 1 + r - ms_info['s'].append({ - 't': int(s.get('t', 0)), - # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60]) - 'd': int(s.attrib['d']), - 'r': r, - }) - else: - timescale = segment_template.get('timescale') - if timescale: - ms_info['timescale'] = int(timescale) - segment_duration = segment_template.get('duration') - if segment_duration: - ms_info['segment_duration'] = int(segment_duration) + extract_common(segment_template) media_template = segment_template.get('media') if media_template: ms_info['media_template'] = media_template @@ -1594,11 +1604,14 @@ class InfoExtractor(object): if initialization: ms_info['initialization_url'] = initialization else: - initialization = segment_template.find(_add_ns('Initialization')) - if initialization is not None: - ms_info['initialization_url'] = initialization.attrib['sourceURL'] + extract_Initialization(segment_template) return ms_info + def combine_url(base_url, target_url): + if re.match(r'^https?://', target_url): + return target_url + return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) + mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) formats = [] for period in mpd_doc.findall(_add_ns('Period')): @@ -1655,9 +1668,7 @@ class InfoExtractor(object): } representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info: - if 'total_number' not in representation_ms_info and 'segment_duration': - segment_duration = float(representation_ms_info['segment_duration']) / float(representation_ms_info['timescale']) - representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) + media_template = representation_ms_info['media_template'] media_template = media_template.replace('$RepresentationID$', representation_id) media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template) @@ -1666,7 +1677,11 @@ class InfoExtractor(object): # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ # can't be used at the same time - if '%(Number' in media_template: + if '%(Number' in media_template and 's' not in representation_ms_info: + segment_duration = None + if 'total_number' not in representation_ms_info and 'segment_duration': + segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) + representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) representation_ms_info['segment_urls'] = [ media_template % { 'Number': segment_number, @@ -1675,28 +1690,65 @@ class InfoExtractor(object): for segment_number in range( representation_ms_info['start_number'], representation_ms_info['total_number'] + representation_ms_info['start_number'])] + representation_ms_info['fragments'] = [{ + 'url': media_template % { + 'Number': segment_number, + 'Bandwidth': representation_attrib.get('bandwidth'), + }, + 'duration': segment_duration, + } for segment_number in range( + representation_ms_info['start_number'], + representation_ms_info['total_number'] + representation_ms_info['start_number'])] else: + # $Number*$ or $Time$ in media template with S list available + # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg + # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411 representation_ms_info['segment_urls'] = [] + representation_ms_info['fragments'] = [] segment_time = 0 + segment_d = None + segment_number = representation_ms_info['start_number'] def add_segment_url(): - representation_ms_info['segment_urls'].append( - media_template % { - 'Time': segment_time, - 'Bandwidth': representation_attrib.get('bandwidth'), - } - ) + segment_url = media_template % { + 'Time': segment_time, + 'Bandwidth': representation_attrib.get('bandwidth'), + 'Number': segment_number, + } + representation_ms_info['segment_urls'].append(segment_url) + representation_ms_info['fragments'].append({ + 'url': segment_url, + 'duration': float_or_none(segment_d, representation_ms_info['timescale']), + }) for num, s in enumerate(representation_ms_info['s']): segment_time = s.get('t') or segment_time + segment_d = s['d'] add_segment_url() + segment_number += 1 for r in range(s.get('r', 0)): - segment_time += s['d'] + segment_time += segment_d add_segment_url() - segment_time += s['d'] + segment_number += 1 + segment_time += segment_d + elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info: + # No media template + # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI + # or any YouTube dashsegments video + fragments = [] + s_num = 0 + for segment_url in representation_ms_info['segment_urls']: + s = representation_ms_info['s'][s_num] + for r in range(s.get('r', 0) + 1): + fragments.append({ + 'url': segment_url, + 'duration': float_or_none(s['d'], representation_ms_info['timescale']), + }) + representation_ms_info['fragments'] = fragments if 'segment_urls' in representation_ms_info: f.update({ 'segment_urls': representation_ms_info['segment_urls'], + 'fragments': [], 'protocol': 'http_dash_segments', }) if 'initialization_url' in representation_ms_info: @@ -1706,6 +1758,10 @@ class InfoExtractor(object): }) if not f.get('url'): f['url'] = initialization_url + f['fragments'].append({'url': initialization_url}) + f['fragments'].extend(representation_ms_info['fragments']) + for fragment in f['fragments']: + fragment['url'] = combine_url(base_url, fragment['url']) try: existing_format = next( fo for fo in formats From 21d21b0c72a731d4ff5affa2182fbe1687c031a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 19:25:31 +0700 Subject: [PATCH 1701/3599] [svt] Fix DASH formats extraction --- youtube_dl/extractor/svt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py index 1c04dfb7b..fb0a4b24e 100644 --- a/youtube_dl/extractor/svt.py +++ b/youtube_dl/extractor/svt.py @@ -16,7 +16,7 @@ class SVTBaseIE(InfoExtractor): def _extract_video(self, video_info, video_id): formats = [] for vr in video_info['videoReferences']: - player_type = vr.get('playerType') + player_type = vr.get('playerType') or vr.get('format') vurl = vr['url'] ext = determine_ext(vurl) if ext == 'm3u8': From 86f4d14f817acaee1f1f544cd9b06d47bc2a5180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 20:35:22 +0700 Subject: [PATCH 1702/3599] Refactor fragments interface and dash segments downloader - Eliminate segment_urls and initialization_url + Introduce manifest_url (manifest may contain unfragmented data in this case url will be used for direct media URL and manifest_url for manifest itself correspondingly) * Rewrite dashsegments downloader to use fragments data * Improve generic mpd extraction --- youtube_dl/downloader/dash.py | 35 +++++++++++---------------------- youtube_dl/extractor/common.py | 31 +++++++++++------------------ youtube_dl/extractor/generic.py | 4 +++- 3 files changed, 26 insertions(+), 44 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 41fc9cfc2..8437dde30 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import os -import re from .fragment import FragmentFD from ..compat import compat_urllib_error @@ -19,34 +18,32 @@ class DashSegmentsFD(FragmentFD): FD_NAME = 'dashsegments' def real_download(self, filename, info_dict): - base_url = info_dict['url'] - segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls'] - initialization_url = info_dict.get('initialization_url') + segments = info_dict['fragments'][:1] if self.params.get( + 'test', False) else info_dict['fragments'] ctx = { 'filename': filename, - 'total_frags': len(segment_urls) + (1 if initialization_url else 0), + 'total_frags': len(segments), } self._prepare_and_start_frag_download(ctx) - def combine_url(base_url, target_url): - if re.match(r'^https?://', target_url): - return target_url - return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) - segments_filenames = [] fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - def process_segment(segment, tmp_filename, fatal): - target_url, segment_name = segment + def process_segment(segment, tmp_filename, num): + segment_url = segment['url'] + segment_name = 'Frag%d' % num target_filename = '%s-%s' % (tmp_filename, segment_name) + # In DASH, the first segment contains necessary headers to + # generate a valid MP4 file, so always abort for the first segment + fatal = num == 0 or not skip_unavailable_fragments count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)}) + success = ctx['dl'].download(target_filename, {'url': segment_url}) if not success: return False down, target_sanitized = sanitize_open(target_filename, 'rb') @@ -72,16 +69,8 @@ class DashSegmentsFD(FragmentFD): return False return True - segments_to_download = [(initialization_url, 'Init')] if initialization_url else [] - segments_to_download.extend([ - (segment_url, 'Seg%d' % i) - for i, segment_url in enumerate(segment_urls)]) - - for i, segment in enumerate(segments_to_download): - # In DASH, the first segment contains necessary headers to - # generate a valid MP4 file, so always abort for the first segment - fatal = i == 0 or not skip_unavailable_fragments - if not process_segment(segment, ctx['tmpfilename'], fatal): + for i, segment in enumerate(segments): + if not process_segment(segment, ctx['tmpfilename'], i): return False self._finish_frag_download(ctx) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e637b33d5..f35311e7a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -86,9 +86,10 @@ class InfoExtractor(object): from worst to best quality. Potential fields: - * url Mandatory. The URL of the video file or URL of - the manifest file in case of fragmented media - (DASH, hls, hds). + * url Mandatory. The URL of the video file + * manifest_url + The URL of the manifest file in case of + fragmented media (DASH, hls, hds) * ext Will be calculated from URL if missing * format A human-readable description of the format ("mp4 container with h264/opus"). @@ -1528,9 +1529,10 @@ class InfoExtractor(object): mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group() return self._parse_mpd_formats( - compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict) + compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, + formats_dict=formats_dict, mpd_url=mpd_url) - def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}): + def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None): """ Parse formats from MPD manifest. References: @@ -1654,6 +1656,7 @@ class InfoExtractor(object): f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, 'url': base_url, + 'manifest_url': mpd_url, 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), 'height': int_or_none(representation_attrib.get('height')), @@ -1682,14 +1685,6 @@ class InfoExtractor(object): if 'total_number' not in representation_ms_info and 'segment_duration': segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) - representation_ms_info['segment_urls'] = [ - media_template % { - 'Number': segment_number, - 'Bandwidth': representation_attrib.get('bandwidth'), - } - for segment_number in range( - representation_ms_info['start_number'], - representation_ms_info['total_number'] + representation_ms_info['start_number'])] representation_ms_info['fragments'] = [{ 'url': media_template % { 'Number': segment_number, @@ -1703,7 +1698,6 @@ class InfoExtractor(object): # $Number*$ or $Time$ in media template with S list available # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411 - representation_ms_info['segment_urls'] = [] representation_ms_info['fragments'] = [] segment_time = 0 segment_d = None @@ -1715,7 +1709,6 @@ class InfoExtractor(object): 'Bandwidth': representation_attrib.get('bandwidth'), 'Number': segment_number, } - representation_ms_info['segment_urls'].append(segment_url) representation_ms_info['fragments'].append({ 'url': segment_url, 'duration': float_or_none(segment_d, representation_ms_info['timescale']), @@ -1745,17 +1738,15 @@ class InfoExtractor(object): 'duration': float_or_none(s['d'], representation_ms_info['timescale']), }) representation_ms_info['fragments'] = fragments - if 'segment_urls' in representation_ms_info: + # NB: MPD manifest may contain direct URLs to unfragmented media. + # No fragments key is present in this case. + if 'fragments' in representation_ms_info: f.update({ - 'segment_urls': representation_ms_info['segment_urls'], 'fragments': [], 'protocol': 'http_dash_segments', }) if 'initialization_url' in representation_ms_info: initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id) - f.update({ - 'initialization_url': initialization_url, - }) if not f.get('url'): f['url'] = initialization_url f['fragments'].append({'url': initialization_url}) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 92a6e5146..c1792c534 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1657,7 +1657,9 @@ class GenericIE(InfoExtractor): return self.playlist_result(self._parse_xspf(doc, video_id), video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( - doc, video_id, mpd_base_url=url.rpartition('/')[0]) + doc, video_id, + mpd_base_url=full_response.geturl().rpartition('/')[0], + mpd_url=url) self._sort_formats(info_dict['formats']) return info_dict elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): From 30d0b549be5696f24b87471a0e691f9afca4a9c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 21:33:38 +0700 Subject: [PATCH 1703/3599] [extractor/common] Add manifest_url for hls and hds formats --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f35311e7a..9c8991542 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1150,6 +1150,7 @@ class InfoExtractor(object): formats.append({ 'format_id': format_id, 'url': manifest_url, + 'manifest_url': manifest_url, 'ext': 'flv' if bootstrap_info is not None else None, 'tbr': tbr, 'width': width, @@ -1255,9 +1256,11 @@ class InfoExtractor(object): # format_id intact. if not live: format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats))) + manifest_url = format_url(line.strip()) f = { 'format_id': '-'.join(format_id), - 'url': format_url(line.strip()), + 'url': manifest_url, + 'manifest_url': manifest_url, 'tbr': tbr, 'ext': ext, 'fps': float_or_none(last_info.get('FRAME-RATE')), From 26394d021df1137301b1508bd00dd3478c15116c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 17 Sep 2016 23:34:10 +0700 Subject: [PATCH 1704/3599] [globo:article] Add support for multiple videos (Closes #10653) --- youtube_dl/extractor/globo.py | 39 +++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 5638be48f..dc7b2661c 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import random +import re import math from .common import InfoExtractor @@ -14,6 +15,7 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + orderedSet, str_or_none, ) @@ -63,6 +65,9 @@ class GloboIE(InfoExtractor): }, { 'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html', 'only_matching': True, + }, { + 'url': 'globo:3607726', + 'only_matching': True, }] class MD5(object): @@ -396,7 +401,7 @@ class GloboIE(InfoExtractor): class GloboArticleIE(InfoExtractor): - _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/]+)(?:\.html)?' + _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?' _VIDEOID_REGEXES = [ r'\bdata-video-id=["\'](\d{7,})', @@ -408,15 +413,20 @@ class GloboArticleIE(InfoExtractor): _TESTS = [{ 'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html', - 'md5': '307fdeae4390ccfe6ba1aa198cf6e72b', 'info_dict': { - 'id': '3652183', - 'ext': 'mp4', - 'title': 'Receita Federal explica como vai fiscalizar bagagens de quem retorna ao Brasil de avião', - 'duration': 110.711, - 'uploader': 'Rede Globo', - 'uploader_id': '196', - } + 'id': 'novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes', + 'title': 'Novidade na fiscalização de bagagem pela Receita provoca discussões', + 'description': 'md5:c3c4b4d4c30c32fce460040b1ac46b12', + }, + 'playlist_count': 1, + }, { + 'url': 'http://g1.globo.com/pr/parana/noticia/2016/09/mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato.html', + 'info_dict': { + 'id': 'mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato', + 'title': "Lula era o 'comandante máximo' do esquema da Lava Jato, diz MPF", + 'description': 'md5:8aa7cc8beda4dc71cc8553e00b77c54c', + }, + 'playlist_count': 6, }, { 'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html', 'only_matching': True, @@ -435,5 +445,12 @@ class GloboArticleIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._search_regex(self._VIDEOID_REGEXES, webpage, 'video id') - return self.url_result('globo:%s' % video_id, 'Globo') + video_ids = [] + for video_regex in self._VIDEOID_REGEXES: + video_ids.extend(re.findall(video_regex, webpage)) + entries = [ + self.url_result('globo:%s' % video_id, GloboIE.ie_key()) + for video_id in orderedSet(video_ids)] + title = self._og_search_title(webpage, fatal=False) + description = self._html_search_meta('description', webpage) + return self.playlist_result(entries, display_id, title, description) From 190d2027d0b6c785cf789edf6c1bdac2ef650a66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 07:22:06 +0700 Subject: [PATCH 1705/3599] [xfileshare] Add title regex for streamin.to and fallback to video id (Closes #10646) --- youtube_dl/extractor/xfileshare.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 995aada0d..de344bad2 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -124,12 +124,14 @@ class XFileShareIE(InfoExtractor): webpage = self._download_webpage(req, video_id, 'Downloading video page') title = (self._search_regex( - [r'style="z-index: [0-9]+;">([^<]+)</span>', + (r'style="z-index: [0-9]+;">([^<]+)</span>', r'<td nowrap>([^<]+)</td>', r'h4-fine[^>]*>([^<]+)<', r'>Watch (.+) ', - r'<h2 class="video-page-head">([^<]+)</h2>'], - webpage, 'title', default=None) or self._og_search_title(webpage)).strip() + r'<h2 class="video-page-head">([^<]+)</h2>', + r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<'), # streamin.to + webpage, 'title', default=None) or self._og_search_title( + webpage, default=None) or video_id).strip() def extract_video_url(default=NO_DEFAULT): return self._search_regex( From 14ae11efab64baf4994688490474609554c1bf80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 16:56:40 +0700 Subject: [PATCH 1706/3599] [vyborymos] Add extractor (Closes #10692) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vyborymos.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/vyborymos.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4baf4cd48..8166fd4f9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1069,6 +1069,7 @@ from .vporn import VpornIE from .vrt import VRTIE from .vube import VubeIE from .vuclip import VuClipIE +from .vyborymos import VyboryMosIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, diff --git a/youtube_dl/extractor/vyborymos.py b/youtube_dl/extractor/vyborymos.py new file mode 100644 index 000000000..884aecb71 --- /dev/null +++ b/youtube_dl/extractor/vyborymos.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class VyboryMosIE(InfoExtractor): + _VALID_URL = r'https?://vybory\.mos\.ru/(?:#precinct/|account/channels\?.*?\bstation_id=)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://vybory.mos.ru/#precinct/13636', + 'info_dict': { + 'id': '13636', + 'ext': 'mp4', + 'title': 're:^Участковая избирательная комиссия №2231 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'Россия, Москва, улица Введенского, 32А', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'http://vybory.mos.ru/account/channels?station_id=13636', + 'only_matching': True, + }] + + def _real_extract(self, url): + station_id = self._match_id(url) + + channels = self._download_json( + 'http://vybory.mos.ru/account/channels?station_id=%s' % station_id, + station_id) + + formats = [] + for cam_num, (sid, hosts, name, _) in enumerate(channels, 1): + for num, host in enumerate(hosts, 1): + formats.append({ + 'url': 'http://%s/master.m3u8?sid=%s' % (host, sid), + 'ext': 'mp4', + 'format_id': 'camera%d-host%d' % (cam_num, num), + 'format_note': '%s, %s' % (name, host), + }) + + info = self._download_json( + 'http://vybory.mos.ru/json/voting_stations/136/%s.json' % station_id, + station_id, 'Downloading station info') + + title = info['name'] + + return { + 'id': station_id, + 'title': self._live_title(title), + 'description': info.get('address'), + 'is_live': True, + 'formats': formats, + } From 9ca93b99d110f58ec9b280020fb5fede2441794e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 17:15:22 +0700 Subject: [PATCH 1707/3599] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index b0a65bde2..dd11a17b9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,23 @@ version <unreleased> +Core ++ Introduce manifest_url and fragments fields in formats dictionary for + fragmented media ++ Provide manifest_url field for DASH segments, HLS and HDS ++ Provide fragments field for DASH segments +* Rework DASH segments downloader to use fragments field ++ Add helper method for Wowza Streaming Engine formats extraction + Extractors ++ [vyborymos] Add extractor for vybory.mos.ru (#10692) ++ [xfileshare] Add title regular expression for streamin.to (#10646) ++ [globo:article] Add support for multiple videos (#10653) + [thisav] Recognize HTML5 videos (#10447) * [jwplatform] Improve JWPlayer detection ++ [mangomolo] Add support for Mangomolo embeds ++ [toutv] Add support for authentication (#10669) +* [franceinter] Fix upload date extraction +* [tv4] Fix HLS and HDS formats extraction (#10659) version 2016.09.15 From 3acff9423df437dd4bd1530a69011fc9ddc74ad1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 17:16:55 +0700 Subject: [PATCH 1708/3599] release 2016.09.18 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 3 +++ youtube_dl/version.py | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 61cea757c..b9d8ebad7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.15** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.18** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.15 +[debug] youtube-dl version 2016.09.18 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index dd11a17b9..a71fadfa7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.09.18 Core + Introduce manifest_url and fragments fields in formats dictionary for diff --git a/docs/supportedsites.md b/docs/supportedsites.md index fcb618561..95a137393 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -388,6 +388,8 @@ - **mailru**: Видео@Mail.Ru - **MakersChannel** - **MakerTV** + - **mangomolo:live** + - **mangomolo:video** - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** @@ -849,6 +851,7 @@ - **VRT** - **vube**: Vube.com - **VuClip** + - **VyboryMos** - **Walla** - **washingtonpost** - **washingtonpost:article** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 081fd6ef0..5ae6a72aa 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.15' +__version__ = '2016.09.18' From a1da888d0cc92fdf3506b30ee85ce241e9090408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Sep 2016 17:28:41 +0700 Subject: [PATCH 1709/3599] [vyborymos] Improve station info extraction --- youtube_dl/extractor/vyborymos.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vyborymos.py b/youtube_dl/extractor/vyborymos.py index 884aecb71..9e703c4b6 100644 --- a/youtube_dl/extractor/vyborymos.py +++ b/youtube_dl/extractor/vyborymos.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str class VyboryMosIE(InfoExtractor): @@ -28,7 +29,7 @@ class VyboryMosIE(InfoExtractor): channels = self._download_json( 'http://vybory.mos.ru/account/channels?station_id=%s' % station_id, - station_id) + station_id, 'Downloading channels JSON') formats = [] for cam_num, (sid, hosts, name, _) in enumerate(channels, 1): @@ -41,14 +42,13 @@ class VyboryMosIE(InfoExtractor): }) info = self._download_json( - 'http://vybory.mos.ru/json/voting_stations/136/%s.json' % station_id, - station_id, 'Downloading station info') - - title = info['name'] + 'http://vybory.mos.ru/json/voting_stations/%s/%s.json' + % (compat_str(station_id)[:3], station_id), + station_id, 'Downloading station JSON', fatal=False) return { 'id': station_id, - 'title': self._live_title(title), + 'title': self._live_title(info['name'] if info else station_id), 'description': info.get('address'), 'is_live': True, 'formats': formats, From d8dbf8707d4e45a939fc74c76bb919771007f8ba Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 18 Sep 2016 18:33:54 +0800 Subject: [PATCH 1710/3599] [thisav] Improve title extraction (closes #10682) I didn't add a test case as the one in #10682 looks like a copyrighted product. --- ChangeLog | 6 ++++++ youtube_dl/extractor/thisav.py | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index a71fadfa7..18f9fa861 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version <unreleased> + +Extractors +* [thisav] Improve title extraction (#10682) + + version 2016.09.18 Core diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index 027a8e907..4473a3c77 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .jwplatform import JWPlatformBaseIE +from ..utils import remove_end class ThisAVIE(JWPlatformBaseIE): @@ -35,7 +36,9 @@ class ThisAVIE(JWPlatformBaseIE): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title') + title = remove_end(self._html_search_regex( + r'<title>([^<]+)', webpage, 'title'), + ' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站') video_url = self._html_search_regex( r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None) if video_url: From cc764a6da8530248f9810397a22b20c972877a97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Sep 2016 19:10:18 +0700 Subject: [PATCH 1711/3599] [twitch:stream] Remove fallback to profile extraction when stream is offline Main page does not contain profile videos anymore --- youtube_dl/extractor/twitch.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index af6d890b0..bc352391e 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -400,11 +400,8 @@ class TwitchStreamIE(TwitchBaseIE): 'kraken/streams/%s' % channel_id, channel_id, 'Downloading stream JSON').get('stream') - # Fallback on profile extraction if stream is offline if not stream: - return self.url_result( - 'http://www.twitch.tv/%s/profile' % channel_id, - 'TwitchProfile', channel_id) + raise ExtractorError('%s is offline' % channel_id, expected=True) # Channel name may be typed if different case than the original channel name # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing From 70b4cf9b1b8a2c2935ca7384d7545463cfd4ea16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 19 Sep 2016 02:50:06 +0700 Subject: [PATCH 1712/3599] [crunchyroll] Check if already logged in (Closes #10700) --- youtube_dl/extractor/crunchyroll.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 1b69bd0b6..e4c10ad24 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -46,6 +46,13 @@ class CrunchyrollBaseIE(InfoExtractor): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') + def is_logged(webpage): + return 'Redirecting' in webpage + + # Already logged in + if is_logged(login_page): + return + login_form_str = self._search_regex( r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM, login_page, 'login form', group='form') @@ -69,7 +76,7 @@ class CrunchyrollBaseIE(InfoExtractor): headers={'Content-Type': 'application/x-www-form-urlencoded'}) # Successful login - if '<title>Redirecting' in response: + if is_logged(response): return error = self._html_search_regex( From 59fd8f931d274cc702a7e260e9ec996f8db7c9f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 19 Sep 2016 02:57:14 +0700 Subject: [PATCH 1713/3599] [ChangeLog] Actualize --- ChangeLog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ChangeLog b/ChangeLog index 18f9fa861..c67d5f650 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,10 @@ version <unreleased> Extractors ++ [crunchyroll] Check if already authenticated (#10700) +- [twitch:stream] Remove fallback to profile extraction when stream is offline * [thisav] Improve title extraction (#10682) +* [vyborymos] Improve station info extraction version 2016.09.18 From cb57386873a053b3328a78f48cf27f23ca6897d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 19 Sep 2016 02:58:32 +0700 Subject: [PATCH 1714/3599] release 2016.09.19 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index b9d8ebad7..8b28d784a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.18** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.19** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.18 +[debug] youtube-dl version 2016.09.19 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index c67d5f650..24077c430 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.09.19 Extractors + [crunchyroll] Check if already authenticated (#10700) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5ae6a72aa..9d3138181 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.18' +__version__ = '2016.09.19' From c38f06818df83f5f46cbdee1069bfaf53a537cc8 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 20 Sep 2016 11:55:30 +0100 Subject: [PATCH 1715/3599] add support for Adobe Pass auth in tbs,tnt and trutv extractors(fixes #10642)(closes #10222)(closes #10519) --- youtube_dl/extractor/adobepass.py | 2 +- youtube_dl/extractor/tbs.py | 13 +++++-------- youtube_dl/extractor/trutv.py | 12 ++++++++++++ youtube_dl/extractor/turner.py | 17 ++++++++++------- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 01932e5e6..c787e0962 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -83,7 +83,7 @@ class AdobePassIE(InfoExtractor): 'User-Agent': self._USER_AGENT, } - guid = xml_text(resource, 'guid') + guid = xml_text(resource, 'guid') if '<' in resource else resource count = 0 while count < 2: requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {} diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py index 0c351e045..bf93eb868 100644 --- a/youtube_dl/extractor/tbs.py +++ b/youtube_dl/extractor/tbs.py @@ -4,10 +4,7 @@ from __future__ import unicode_literals import re from .turner import TurnerBaseIE -from ..utils import ( - extract_attributes, - ExtractorError, -) +from ..utils import extract_attributes class TBSIE(TurnerBaseIE): @@ -37,10 +34,6 @@ class TBSIE(TurnerBaseIE): site = domain[:3] webpage = self._download_webpage(url, display_id) video_params = extract_attributes(self._search_regex(r'(<[^>]+id="page-video"[^>]*>)', webpage, 'video params')) - if video_params.get('isAuthRequired') == 'true': - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported.', expected=True) query = None clip_id = video_params.get('clipid') if clip_id: @@ -56,4 +49,8 @@ class TBSIE(TurnerBaseIE): 'media_src': 'http://androidhls-secure.cdn.turner.com/%s/big' % site, 'tokenizer_src': 'http://www.%s.com/video/processors/services/token_ipadAdobe.do' % domain, }, + }, { + 'url': url, + 'site_name': site.upper(), + 'auth_required': video_params.get('isAuthRequired') != 'false', }) diff --git a/youtube_dl/extractor/trutv.py b/youtube_dl/extractor/trutv.py index e60d8a181..3a5782525 100644 --- a/youtube_dl/extractor/trutv.py +++ b/youtube_dl/extractor/trutv.py @@ -22,9 +22,17 @@ class TruTVIE(TurnerBaseIE): def _real_extract(self, url): path, video_id = re.match(self._VALID_URL, url).groups() + auth_required = False if path: data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path else: + webpage = self._download_webpage(url, video_id) + video_id = self._search_regex( + r"TTV\.TVE\.episodeId\s*=\s*'([^']+)';", + webpage, 'video id', default=video_id) + auth_required = self._search_regex( + r'TTV\.TVE\.authRequired\s*=\s*(true|false);', + webpage, 'auth required', default='false') == 'true' data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id return self._extract_cvp_info( data_src, path, { @@ -32,4 +40,8 @@ class TruTVIE(TurnerBaseIE): 'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big', 'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do', }, + }, { + 'url': url, + 'site_name': 'truTV', + 'auth_required': auth_required, }) diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 4228c1ccc..57ffedb87 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .adobepass import AdobePassIE from ..compat import compat_str from ..utils import ( xpath_text, @@ -16,11 +16,11 @@ from ..utils import ( ) -class TurnerBaseIE(InfoExtractor): +class TurnerBaseIE(AdobePassIE): def _extract_timestamp(self, video_data): return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts')) - def _extract_cvp_info(self, data_src, video_id, path_data={}): + def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}): video_data = self._download_xml(data_src, video_id) video_id = video_data.attrib['id'] title = xpath_text(video_data, 'headline', fatal=True) @@ -70,11 +70,14 @@ class TurnerBaseIE(InfoExtractor): secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*' token = tokens.get(secure_path) if not token: + query = { + 'path': secure_path, + 'videoId': content_id, + } + if ap_data.get('auth_required'): + query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], video_id, ap_data['site_name'], ap_data['site_name']) auth = self._download_xml( - secure_path_data['tokenizer_src'], video_id, query={ - 'path': secure_path, - 'videoId': content_id, - }) + secure_path_data['tokenizer_src'], video_id, query=query) error_msg = xpath_text(auth, 'error/msg') if error_msg: raise ExtractorError(error_msg, expected=True) From e33a7253b23e0adca9a3cb9a3856952c922a3357 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 20 Sep 2016 15:52:23 +0100 Subject: [PATCH 1716/3599] [fox] add support for Adobe Pass auth(closes #8584) --- youtube_dl/extractor/fox.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py index 9f406b17e..9f2e5d065 100644 --- a/youtube_dl/extractor/fox.py +++ b/youtube_dl/extractor/fox.py @@ -1,14 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor +from .adobepass import AdobePassIE from ..utils import ( smuggle_url, update_url_query, ) -class FOXIE(InfoExtractor): +class FOXIE(AdobePassIE): _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.fox.com/watch/255180355939/7684182528', @@ -30,14 +30,26 @@ class FOXIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - release_url = self._parse_json(self._search_regex( - r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'), - video_id)['release_url'] + settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), video_id) + fox_pdk_player = settings['fox_pdk_player'] + release_url = fox_pdk_player['release_url'] + query = { + 'mbr': 'true', + 'switch': 'http' + } + if fox_pdk_player.get('access') == 'locked': + ap_p = settings['foxAdobePassProvider'] + rating = ap_p.get('videoRating') + if rating == 'n/a': + rating = None + resource = self._get_mvpd_resource('fbc-fox', None, ap_p['videoGUID'], rating) + query['auth'] = self._extract_mvpd_auth(url, video_id, 'fbc-fox', resource) return { '_type': 'url_transparent', 'ie_key': 'ThePlatform', - 'url': smuggle_url(update_url_query( - release_url, {'switch': 'http'}), {'force_smil_url': True}), + 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), 'id': video_id, } From 4bfd294e2f83301921494c02e497cccf1a26cfd5 Mon Sep 17 00:00:00 2001 From: coolsa <noob.cloud@gmail.com> Date: Sun, 18 Sep 2016 03:53:05 -0600 Subject: [PATCH 1717/3599] [soundcloud] Extract license metadata --- youtube_dl/extractor/soundcloud.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 9635c2b49..47b84809f 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -53,6 +53,7 @@ class SoundcloudIE(InfoExtractor): 'uploader': 'E.T. ExTerrestrial Music', 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'duration': 143, + 'license': 'all-rights-reserved', } }, # not streamable song @@ -66,6 +67,7 @@ class SoundcloudIE(InfoExtractor): 'uploader': 'The Royal Concept', 'upload_date': '20120521', 'duration': 227, + 'license': 'all-rights-reserved', }, 'params': { # rtmp @@ -84,6 +86,7 @@ class SoundcloudIE(InfoExtractor): 'description': 'test chars: \"\'/\\ä↭', 'upload_date': '20131209', 'duration': 9, + 'license': 'all-rights-reserved', }, }, # private link (alt format) @@ -98,6 +101,7 @@ class SoundcloudIE(InfoExtractor): 'description': 'test chars: \"\'/\\ä↭', 'upload_date': '20131209', 'duration': 9, + 'license': 'all-rights-reserved', }, }, # downloadable song @@ -112,6 +116,7 @@ class SoundcloudIE(InfoExtractor): 'uploader': 'oddsamples', 'upload_date': '20140109', 'duration': 17, + 'license': 'cc-by-sa', }, }, ] @@ -138,8 +143,8 @@ class SoundcloudIE(InfoExtractor): name = full_title or track_id if quiet: self.report_extraction(name) - thumbnail = info['artwork_url'] + track_license = info['license'] if thumbnail is not None: thumbnail = thumbnail.replace('-large', '-t500x500') ext = 'mp3' @@ -152,6 +157,7 @@ class SoundcloudIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': int_or_none(info.get('duration'), 1000), 'webpage_url': info.get('permalink_url'), + 'license': track_license, } formats = [] if info.get('downloadable', False): @@ -222,6 +228,7 @@ class SoundcloudIE(InfoExtractor): track_id = mobj.group('track_id') token = None + if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID full_title = track_id From f62a77b99a73ed3acf8406efaa34d08c73682be3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 20 Sep 2016 21:55:57 +0700 Subject: [PATCH 1718/3599] [soundcloud] Modernize --- youtube_dl/extractor/soundcloud.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 47b84809f..513c54829 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -143,21 +143,20 @@ class SoundcloudIE(InfoExtractor): name = full_title or track_id if quiet: self.report_extraction(name) - thumbnail = info['artwork_url'] - track_license = info['license'] - if thumbnail is not None: + thumbnail = info.get('artwork_url') + if isinstance(thumbnail, compat_str): thumbnail = thumbnail.replace('-large', '-t500x500') ext = 'mp3' result = { 'id': track_id, - 'uploader': info['user']['username'], - 'upload_date': unified_strdate(info['created_at']), + 'uploader': info.get('user', {}).get('username'), + 'upload_date': unified_strdate(info.get('created_at')), 'title': info['title'], - 'description': info['description'], + 'description': info.get('description'), 'thumbnail': thumbnail, 'duration': int_or_none(info.get('duration'), 1000), 'webpage_url': info.get('permalink_url'), - 'license': track_license, + 'license': info.get('license'), } formats = [] if info.get('downloadable', False): @@ -227,7 +226,6 @@ class SoundcloudIE(InfoExtractor): raise ExtractorError('Invalid URL: %s' % url) track_id = mobj.group('track_id') - token = None if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID From 1ae0ae5db0bc9c388de970c71880e2f3dc400cc3 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 20 Sep 2016 18:51:29 +0100 Subject: [PATCH 1719/3599] [cartoonnetwork] add support Adobe Pass auth --- youtube_dl/extractor/cartoonnetwork.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py index 688a6375e..086ec90c9 100644 --- a/youtube_dl/extractor/cartoonnetwork.py +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -33,4 +33,10 @@ class CartoonNetworkIE(TurnerBaseIE): 'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big', 'tokenizer_src': 'http://www.cartoonnetwork.com/cntv/mvpd/processors/services/token_ipadAdobe.do', }, + }, { + 'url': url, + 'site_name': 'CartoonNetwork', + 'auth_required': self._search_regex( + r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);', + webpage, 'auth required', default='false') == 'true', }) From 3a5a18705f2a7faf64a4b69665511ef5f0c6084d Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 21 Sep 2016 15:56:31 +0100 Subject: [PATCH 1720/3599] [adobepass] add support MSO that depend on watchTVeverywhere(closes #10709) --- youtube_dl/extractor/adobepass.py | 1264 ++++++++++++++++++++++++++++- 1 file changed, 1259 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index c787e0962..8f7ed6ef2 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -17,15 +17,1269 @@ from ..utils import ( MSO_INFO = { 'DTV': { - 'name': 'DirecTV', + 'name': 'DIRECTV', 'username_field': 'username', 'password_field': 'password', }, 'Rogers': { - 'name': 'Rogers Cable', + 'name': 'Rogers', 'username_field': 'UserName', 'password_field': 'UserPassword', }, + 'thr030': { + 'name': '3 Rivers Communications' + }, + 'com140': { + 'name': 'Access Montana' + }, + 'acecommunications': { + 'name': 'AcenTek' + }, + 'acm010': { + 'name': 'Acme Communications' + }, + 'ada020': { + 'name': 'Adams Cable Service' + }, + 'alb020': { + 'name': 'Albany Mutual Telephone' + }, + 'algona': { + 'name': 'Algona Municipal Utilities' + }, + 'allwest': { + 'name': 'All West Communications' + }, + 'all025': { + 'name': 'Allen\'s Communications' + }, + 'spl010': { + 'name': 'Alliance Communications' + }, + 'all070': { + 'name': 'ALLO Communications' + }, + 'alpine': { + 'name': 'Alpine Communications' + }, + 'hun015': { + 'name': 'American Broadband' + }, + 'nwc010': { + 'name': 'American Broadband Missouri' + }, + 'com130-02': { + 'name': 'American Community Networks' + }, + 'com130-01': { + 'name': 'American Warrior Networks' + }, + 'tom020': { + 'name': 'Amherst Telephone/Tomorrow Valley' + }, + 'tvc020': { + 'name': 'Andycable' + }, + 'arkwest': { + 'name': 'Arkwest Communications' + }, + 'art030': { + 'name': 'Arthur Mutual Telephone Company' + }, + 'arvig': { + 'name': 'Arvig' + }, + 'nttcash010': { + 'name': 'Ashland Home Net' + }, + 'astound': { + 'name': 'Astound (now Wave)' + }, + 'dix030': { + 'name': 'ATC Broadband' + }, + 'ara010': { + 'name': 'ATC Communications' + }, + 'she030-02': { + 'name': 'Ayersville Communications' + }, + 'baldwin': { + 'name': 'Baldwin Lightstream' + }, + 'bal040': { + 'name': 'Ballard TV' + }, + 'cit025': { + 'name': 'Bardstown Cable TV' + }, + 'bay030': { + 'name': 'Bay Country Communications' + }, + 'tel095': { + 'name': 'Beaver Creek Cooperative Telephone' + }, + 'bea020': { + 'name': 'Beaver Valley Cable' + }, + 'bee010': { + 'name': 'Bee Line Cable' + }, + 'wir030': { + 'name': 'Beehive Broadband' + }, + 'bra020': { + 'name': 'BELD' + }, + 'bel020': { + 'name': 'Bellevue Municipal Cable' + }, + 'vol040-01': { + 'name': 'Ben Lomand Connect / BLTV' + }, + 'bev010': { + 'name': 'BEVCOMM' + }, + 'big020': { + 'name': 'Big Sandy Broadband' + }, + 'ble020': { + 'name': 'Bledsoe Telephone Cooperative' + }, + 'bvt010': { + 'name': 'Blue Valley Tele-Communications' + }, + 'bra050': { + 'name': 'Brandenburg Telephone Co.' + }, + 'bte010': { + 'name': 'Bristol Tennessee Essential Services' + }, + 'annearundel': { + 'name': 'Broadstripe' + }, + 'btc010': { + 'name': 'BTC Communications' + }, + 'btc040': { + 'name': 'BTC Vision - Nahunta' + }, + 'bul010': { + 'name': 'Bulloch Telephone Cooperative' + }, + 'but010': { + 'name': 'Butler-Bremer Communications' + }, + 'tel160-csp': { + 'name': 'C Spire SNAP' + }, + 'csicable': { + 'name': 'Cable Services Inc.' + }, + 'cableamerica': { + 'name': 'CableAmerica' + }, + 'cab038': { + 'name': 'CableSouth Media 3' + }, + 'weh010-camtel': { + 'name': 'Cam-Tel Company' + }, + 'car030': { + 'name': 'Cameron Communications' + }, + 'canbytel': { + 'name': 'Canby Telcom' + }, + 'crt020': { + 'name': 'CapRock Tv' + }, + 'car050': { + 'name': 'Carnegie Cable' + }, + 'cas': { + 'name': 'CAS Cable' + }, + 'casscomm': { + 'name': 'CASSCOMM' + }, + 'mid180-02': { + 'name': 'Catalina Broadband Solutions' + }, + 'cccomm': { + 'name': 'CC Communications' + }, + 'nttccde010': { + 'name': 'CDE Lightband' + }, + 'cfunet': { + 'name': 'Cedar Falls Utilities' + }, + 'dem010-01': { + 'name': 'Celect-Bloomer Telephone Area' + }, + 'dem010-02': { + 'name': 'Celect-Bruce Telephone Area' + }, + 'dem010-03': { + 'name': 'Celect-Citizens Connected Area' + }, + 'dem010-04': { + 'name': 'Celect-Elmwood/Spring Valley Area' + }, + 'dem010-06': { + 'name': 'Celect-Mosaic Telecom' + }, + 'dem010-05': { + 'name': 'Celect-West WI Telephone Area' + }, + 'net010-02': { + 'name': 'Cellcom/Nsight Telservices' + }, + 'cen100': { + 'name': 'CentraCom' + }, + 'nttccst010': { + 'name': 'Central Scott / CSTV' + }, + 'cha035': { + 'name': 'Chaparral CableVision' + }, + 'cha050': { + 'name': 'Chariton Valley Communication Corporation, Inc.' + }, + 'cha060': { + 'name': 'Chatmoss Cablevision' + }, + 'nttcche010': { + 'name': 'Cherokee Communications' + }, + 'che050': { + 'name': 'Chesapeake Bay Communications' + }, + 'cimtel': { + 'name': 'Cim-Tel Cable, LLC.' + }, + 'cit180': { + 'name': 'Citizens Cablevision - Floyd, VA' + }, + 'cit210': { + 'name': 'Citizens Cablevision, Inc.' + }, + 'cit040': { + 'name': 'Citizens Fiber' + }, + 'cit250': { + 'name': 'Citizens Mutual' + }, + 'war040': { + 'name': 'Citizens Telephone Corporation' + }, + 'wat025': { + 'name': 'City Of Monroe' + }, + 'wadsworth': { + 'name': 'CityLink' + }, + 'nor100': { + 'name': 'CL Tel' + }, + 'cla010': { + 'name': 'Clarence Telephone and Cedar Communications' + }, + 'ser060': { + 'name': 'Clear Choice Communications' + }, + 'tac020': { + 'name': 'Click! Cable TV' + }, + 'war020': { + 'name': 'CLICK1.NET' + }, + 'cml010': { + 'name': 'CML Telephone Cooperative Association' + }, + 'cns': { + 'name': 'CNS' + }, + 'com160': { + 'name': 'Co-Mo Connect' + }, + 'coa020': { + 'name': 'Coast Communications' + }, + 'coa030': { + 'name': 'Coaxial Cable TV' + }, + 'mid055': { + 'name': 'Cobalt TV (Mid-State Community TV)' + }, + 'col070': { + 'name': 'Columbia Power & Water Systems' + }, + 'col080': { + 'name': 'Columbus Telephone' + }, + 'nor105': { + 'name': 'Communications 1 Cablevision, Inc.' + }, + 'com150': { + 'name': 'Community Cable & Broadband' + }, + 'com020': { + 'name': 'Community Communications Company' + }, + 'coy010': { + 'name': 'commZoom' + }, + 'com025': { + 'name': 'Complete Communication Services' + }, + 'cat020': { + 'name': 'Comporium' + }, + 'com071': { + 'name': 'ComSouth Telesys' + }, + 'consolidatedcable': { + 'name': 'Consolidated' + }, + 'conwaycorp': { + 'name': 'Conway Corporation' + }, + 'coo050': { + 'name': 'Coon Valley Telecommunications Inc' + }, + 'coo080': { + 'name': 'Cooperative Telephone Company' + }, + 'cpt010': { + 'name': 'CP-TEL' + }, + 'cra010': { + 'name': 'Craw-Kan Telephone' + }, + 'crestview': { + 'name': 'Crestview Cable Communications' + }, + 'cross': { + 'name': 'Cross TV' + }, + 'cro030': { + 'name': 'Crosslake Communications' + }, + 'ctc040': { + 'name': 'CTC - Brainerd MN' + }, + 'phe030': { + 'name': 'CTV-Beam - East Alabama' + }, + 'cun010': { + 'name': 'Cunningham Telephone & Cable' + }, + 'dpc010': { + 'name': 'D & P Communications' + }, + 'dak030': { + 'name': 'Dakota Central Telecommunications' + }, + 'nttcdel010': { + 'name': 'Delcambre Telephone LLC' + }, + 'tel160-del': { + 'name': 'Delta Telephone Company' + }, + 'sal040': { + 'name': 'DiamondNet' + }, + 'ind060-dc': { + 'name': 'Direct Communications' + }, + 'doy010': { + 'name': 'Doylestown Cable TV' + }, + 'dic010': { + 'name': 'DRN' + }, + 'dtc020': { + 'name': 'DTC' + }, + 'dtc010': { + 'name': 'DTC Cable (Delhi)' + }, + 'dum010': { + 'name': 'Dumont Telephone Company' + }, + 'dun010': { + 'name': 'Dunkerton Telephone Cooperative' + }, + 'cci010': { + 'name': 'Duo County Telecom' + }, + 'eagle': { + 'name': 'Eagle Communications' + }, + 'weh010-east': { + 'name': 'East Arkansas Cable TV' + }, + 'eatel': { + 'name': 'EATEL Video, LLC' + }, + 'ell010': { + 'name': 'ECTA' + }, + 'emerytelcom': { + 'name': 'Emery Telcom Video LLC' + }, + 'nor200': { + 'name': 'Empire Access' + }, + 'endeavor': { + 'name': 'Endeavor Communications' + }, + 'sun045': { + 'name': 'Enhanced Telecommunications Corporation' + }, + 'mid030': { + 'name': 'enTouch' + }, + 'epb020': { + 'name': 'EPB Smartnet' + }, + 'jea010': { + 'name': 'EPlus Broadband' + }, + 'com065': { + 'name': 'ETC' + }, + 'ete010': { + 'name': 'Etex Communications' + }, + 'fbc-tele': { + 'name': 'F&B Communications' + }, + 'fal010': { + 'name': 'Falcon Broadband' + }, + 'fam010': { + 'name': 'FamilyView CableVision' + }, + 'far020': { + 'name': 'Farmers Mutual Telephone Company' + }, + 'fay010': { + 'name': 'Fayetteville Public Utilities' + }, + 'sal060': { + 'name': 'fibrant' + }, + 'fid010': { + 'name': 'Fidelity Communications' + }, + 'for030': { + 'name': 'FJ Communications' + }, + 'fli020': { + 'name': 'Flint River Communications' + }, + 'far030': { + 'name': 'FMT - Jesup' + }, + 'foo010': { + 'name': 'Foothills Communications' + }, + 'for080': { + 'name': 'Forsyth CableNet' + }, + 'fbcomm': { + 'name': 'Frankfort Plant Board' + }, + 'tel160-fra': { + 'name': 'Franklin Telephone Company' + }, + 'nttcftc010': { + 'name': 'FTC' + }, + 'fullchannel': { + 'name': 'Full Channel, Inc.' + }, + 'gar040': { + 'name': 'Gardonville Cooperative Telephone Association' + }, + 'gbt010': { + 'name': 'GBT Communications, Inc.' + }, + 'tec010': { + 'name': 'Genuine Telecom' + }, + 'clr010': { + 'name': 'Giant Communications' + }, + 'gla010': { + 'name': 'Glasgow EPB' + }, + 'gle010': { + 'name': 'Glenwood Telecommunications' + }, + 'gra060': { + 'name': 'GLW Broadband Inc.' + }, + 'goldenwest': { + 'name': 'Golden West Cablevision' + }, + 'vis030': { + 'name': 'Grantsburg Telcom' + }, + 'gpcom': { + 'name': 'Great Plains Communications' + }, + 'gri010': { + 'name': 'Gridley Cable Inc' + }, + 'hbc010': { + 'name': 'H&B Cable Services' + }, + 'hae010': { + 'name': 'Haefele TV Inc.' + }, + 'htc010': { + 'name': 'Halstad Telephone Company' + }, + 'har005': { + 'name': 'Harlan Municipal Utilities' + }, + 'har020': { + 'name': 'Hart Communications' + }, + 'ced010': { + 'name': 'Hartelco TV' + }, + 'hea040': { + 'name': 'Heart of Iowa Communications Cooperative' + }, + 'htc020': { + 'name': 'Hickory Telephone Company' + }, + 'nttchig010': { + 'name': 'Highland Communication Services' + }, + 'hig030': { + 'name': 'Highland Media' + }, + 'spc010': { + 'name': 'Hilliary Communications' + }, + 'hin020': { + 'name': 'Hinton CATV Co.' + }, + 'hometel': { + 'name': 'HomeTel Entertainment, Inc.' + }, + 'hoodcanal': { + 'name': 'Hood Canal Communications' + }, + 'weh010-hope': { + 'name': 'Hope - Prescott Cable TV' + }, + 'horizoncable': { + 'name': 'Horizon Cable TV, Inc.' + }, + 'hor040': { + 'name': 'Horizon Chillicothe Telephone' + }, + 'htc030': { + 'name': 'HTC Communications Co. - IL' + }, + 'htccomm': { + 'name': 'HTC Communications, Inc. - IA' + }, + 'wal005': { + 'name': 'Huxley Communications' + }, + 'imon': { + 'name': 'ImOn Communications' + }, + 'ind040': { + 'name': 'Independence Telecommunications' + }, + 'rrc010': { + 'name': 'Inland Networks' + }, + 'stc020': { + 'name': 'Innovative Cable TV St Croix' + }, + 'car100': { + 'name': 'Innovative Cable TV St Thomas-St John' + }, + 'icc010': { + 'name': 'Inside Connect Cable' + }, + 'int100': { + 'name': 'Integra Telecom' + }, + 'int050': { + 'name': 'Interstate Telecommunications Coop' + }, + 'irv010': { + 'name': 'Irvine Cable' + }, + 'k2c010': { + 'name': 'K2 Communications' + }, + 'kal010': { + 'name': 'Kalida Telephone Company, Inc.' + }, + 'kal030': { + 'name': 'Kalona Cooperative Telephone Company' + }, + 'kmt010': { + 'name': 'KMTelecom' + }, + 'kpu010': { + 'name': 'KPU Telecommunications' + }, + 'kuh010': { + 'name': 'Kuhn Communications, Inc.' + }, + 'lak130': { + 'name': 'Lakeland Communications' + }, + 'lan010': { + 'name': 'Langco' + }, + 'lau020': { + 'name': 'Laurel Highland Total Communications, Inc.' + }, + 'leh010': { + 'name': 'Lehigh Valley Cooperative Telephone' + }, + 'bra010': { + 'name': 'Limestone Cable/Bracken Cable' + }, + 'loc020': { + 'name': 'LISCO' + }, + 'lit020': { + 'name': 'Litestream' + }, + 'tel140': { + 'name': 'LivCom' + }, + 'loc010': { + 'name': 'LocalTel Communications' + }, + 'weh010-longview': { + 'name': 'Longview - Kilgore Cable TV' + }, + 'lon030': { + 'name': 'Lonsdale Video Ventures, LLC' + }, + 'lns010': { + 'name': 'Lost Nation-Elwood Telephone Co.' + }, + 'nttclpc010': { + 'name': 'LPC Connect' + }, + 'lumos': { + 'name': 'Lumos Networks' + }, + 'madison': { + 'name': 'Madison Communications' + }, + 'mad030': { + 'name': 'Madison County Cable Inc.' + }, + 'nttcmah010': { + 'name': 'Mahaska Communication Group' + }, + 'mar010': { + 'name': 'Marne & Elk Horn Telephone Company' + }, + 'mcc040': { + 'name': 'McClure Telephone Co.' + }, + 'mctv': { + 'name': 'MCTV' + }, + 'merrimac': { + 'name': 'Merrimac Communications Ltd.' + }, + 'metronet': { + 'name': 'Metronet' + }, + 'mhtc': { + 'name': 'MHTC' + }, + 'midhudson': { + 'name': 'Mid-Hudson Cable' + }, + 'midrivers': { + 'name': 'Mid-Rivers Communications' + }, + 'mid045': { + 'name': 'Midstate Communications' + }, + 'mil080': { + 'name': 'Milford Communications' + }, + 'min030': { + 'name': 'MINET' + }, + 'nttcmin010': { + 'name': 'Minford TV' + }, + 'san040-02': { + 'name': 'Mitchell Telecom' + }, + 'mlg010': { + 'name': 'MLGC' + }, + 'mon060': { + 'name': 'Mon-Cre TVE' + }, + 'mou110': { + 'name': 'Mountain Telephone' + }, + 'mou050': { + 'name': 'Mountain Village Cable' + }, + 'mtacomm': { + 'name': 'MTA Communications, LLC' + }, + 'mtc010': { + 'name': 'MTC Cable' + }, + 'med040': { + 'name': 'MTC Technologies' + }, + 'man060': { + 'name': 'MTCC' + }, + 'mtc030': { + 'name': 'MTCO Communications' + }, + 'mul050': { + 'name': 'Mulberry Telecommunications' + }, + 'mur010': { + 'name': 'Murray Electric System' + }, + 'musfiber': { + 'name': 'MUS FiberNET' + }, + 'mpw': { + 'name': 'Muscatine Power & Water' + }, + 'nttcsli010': { + 'name': 'myEVTV.com' + }, + 'nor115': { + 'name': 'NCC' + }, + 'nor260': { + 'name': 'NDTC' + }, + 'nctc': { + 'name': 'Nebraska Central Telecom, Inc.' + }, + 'nel020': { + 'name': 'Nelsonville TV Cable' + }, + 'nem010': { + 'name': 'Nemont' + }, + 'new075': { + 'name': 'New Hope Telephone Cooperative' + }, + 'nor240': { + 'name': 'NICP' + }, + 'cic010': { + 'name': 'NineStar Connect' + }, + 'nktelco': { + 'name': 'NKTelco' + }, + 'nortex': { + 'name': 'Nortex Communications' + }, + 'nor140': { + 'name': 'North Central Telephone Cooperative' + }, + 'nor030': { + 'name': 'Northland Communications' + }, + 'nor075': { + 'name': 'Northwest Communications' + }, + 'nor125': { + 'name': 'Norwood Light Broadband' + }, + 'net010': { + 'name': 'Nsight Telservices' + }, + 'dur010': { + 'name': 'Ntec' + }, + 'nts010': { + 'name': 'NTS Communications' + }, + 'new045': { + 'name': 'NU-Telecom' + }, + 'nulink': { + 'name': 'NuLink' + }, + 'jam030': { + 'name': 'NVC' + }, + 'far035': { + 'name': 'OmniTel Communications' + }, + 'onesource': { + 'name': 'OneSource Communications' + }, + 'cit230': { + 'name': 'Opelika Power Services' + }, + 'daltonutilities': { + 'name': 'OptiLink' + }, + 'mid140': { + 'name': 'OPTURA' + }, + 'ote010': { + 'name': 'OTEC Communication Company' + }, + 'cci020': { + 'name': 'Packerland Broadband' + }, + 'pan010': { + 'name': 'Panora Telco/Guthrie Center Communications' + }, + 'otter': { + 'name': 'Park Region Telephone & Otter Tail Telcom' + }, + 'mid050': { + 'name': 'Partner Communications Cooperative' + }, + 'fib010': { + 'name': 'Pathway' + }, + 'paulbunyan': { + 'name': 'Paul Bunyan Communications' + }, + 'pem020': { + 'name': 'Pembroke Telephone Company' + }, + 'mck010': { + 'name': 'Peoples Rural Telephone Cooperative' + }, + 'pul010': { + 'name': 'PES Energize' + }, + 'phi010': { + 'name': 'Philippi Communications System' + }, + 'phonoscope': { + 'name': 'Phonoscope Cable' + }, + 'pin070': { + 'name': 'Pine Belt Communications, Inc.' + }, + 'weh010-pine': { + 'name': 'Pine Bluff Cable TV' + }, + 'pin060': { + 'name': 'Pineland Telephone Cooperative' + }, + 'cam010': { + 'name': 'Pinpoint Communications' + }, + 'pio060': { + 'name': 'Pioneer Broadband' + }, + 'pioncomm': { + 'name': 'Pioneer Communications' + }, + 'pioneer': { + 'name': 'Pioneer DTV' + }, + 'pla020': { + 'name': 'Plant TiftNet, Inc.' + }, + 'par010': { + 'name': 'PLWC' + }, + 'pro035': { + 'name': 'PMT' + }, + 'vik011': { + 'name': 'Polar Cablevision' + }, + 'pottawatomie': { + 'name': 'Pottawatomie Telephone Co.' + }, + 'premiercomm': { + 'name': 'Premier Communications' + }, + 'psc010': { + 'name': 'PSC' + }, + 'pan020': { + 'name': 'PTCI' + }, + 'qco010': { + 'name': 'QCOL' + }, + 'qua010': { + 'name': 'Quality Cablevision' + }, + 'rad010': { + 'name': 'Radcliffe Telephone Company' + }, + 'car040': { + 'name': 'Rainbow Communications' + }, + 'rai030': { + 'name': 'Rainier Connect' + }, + 'ral010': { + 'name': 'Ralls Technologies' + }, + 'rct010': { + 'name': 'RC Technologies' + }, + 'red040': { + 'name': 'Red River Communications' + }, + 'ree010': { + 'name': 'Reedsburg Utility Commission' + }, + 'mol010': { + 'name': 'Reliance Connects- Oregon' + }, + 'res020': { + 'name': 'Reserve Telecommunications' + }, + 'weh010-resort': { + 'name': 'Resort TV Cable' + }, + 'rld010': { + 'name': 'Richland Grant Telephone Cooperative, Inc.' + }, + 'riv030': { + 'name': 'River Valley Telecommunications Coop' + }, + 'rockportcable': { + 'name': 'Rock Port Cablevision' + }, + 'rsf010': { + 'name': 'RS Fiber' + }, + 'rtc': { + 'name': 'RTC Communication Corp' + }, + 'res040': { + 'name': 'RTC-Reservation Telephone Coop.' + }, + 'rte010': { + 'name': 'RTEC Communications' + }, + 'stc010': { + 'name': 'S&T' + }, + 'san020': { + 'name': 'San Bruno Cable TV' + }, + 'san040-01': { + 'name': 'Santel' + }, + 'sav010': { + 'name': 'SCI Broadband-Savage Communications Inc.' + }, + 'sco050': { + 'name': 'Scottsboro Electric Power Board' + }, + 'scr010': { + 'name': 'Scranton Telephone Company' + }, + 'selco': { + 'name': 'SELCO' + }, + 'she010': { + 'name': 'Shentel' + }, + 'she030': { + 'name': 'Sherwood Mutual Telephone Association, Inc.' + }, + 'ind060-ssc': { + 'name': 'Silver Star Communications' + }, + 'sjoberg': { + 'name': 'Sjoberg\'s Inc.' + }, + 'sou025': { + 'name': 'SKT' + }, + 'sky050': { + 'name': 'SkyBest TV' + }, + 'nttcsmi010': { + 'name': 'Smithville Communications' + }, + 'woo010': { + 'name': 'Solarus' + }, + 'sou075': { + 'name': 'South Central Rural Telephone Cooperative' + }, + 'sou065': { + 'name': 'South Holt Cablevision, Inc.' + }, + 'sou035': { + 'name': 'South Slope Cooperative Communications' + }, + 'spa020': { + 'name': 'Spanish Fork Community Network' + }, + 'spe010': { + 'name': 'Spencer Municipal Utilities' + }, + 'spi005': { + 'name': 'Spillway Communications, Inc.' + }, + 'srt010': { + 'name': 'SRT' + }, + 'cccsmc010': { + 'name': 'St. Maarten Cable TV' + }, + 'sta025': { + 'name': 'Star Communications' + }, + 'sco020': { + 'name': 'STE' + }, + 'uin010': { + 'name': 'STRATA Networks' + }, + 'sum010': { + 'name': 'Sumner Cable TV' + }, + 'pie010': { + 'name': 'Surry TV/PCSI TV' + }, + 'swa010': { + 'name': 'Swayzee Communications' + }, + 'sweetwater': { + 'name': 'Sweetwater Cable Television Co' + }, + 'weh010-talequah': { + 'name': 'Tahlequah Cable TV' + }, + 'tct': { + 'name': 'TCT' + }, + 'tel050': { + 'name': 'Tele-Media Company' + }, + 'com050': { + 'name': 'The Community Agency' + }, + 'thr020': { + 'name': 'Three River' + }, + 'cab140': { + 'name': 'Town & Country Technologies' + }, + 'tra010': { + 'name': 'Trans-Video' + }, + 'tre010': { + 'name': 'Trenton TV Cable Company' + }, + 'tcc': { + 'name': 'Tri County Communications Cooperative' + }, + 'tri025': { + 'name': 'TriCounty Telecom' + }, + 'tri110': { + 'name': 'TrioTel Communications, Inc.' + }, + 'tro010': { + 'name': 'Troy Cablevision, Inc.' + }, + 'tsc': { + 'name': 'TSC' + }, + 'cit220': { + 'name': 'Tullahoma Utilities Board' + }, + 'tvc030': { + 'name': 'TV Cable of Rensselaer' + }, + 'tvc015': { + 'name': 'TVC Cable' + }, + 'cab180': { + 'name': 'TVision' + }, + 'twi040': { + 'name': 'Twin Lakes' + }, + 'tvtinc': { + 'name': 'Twin Valley' + }, + 'uis010': { + 'name': 'Union Telephone Company' + }, + 'uni110': { + 'name': 'United Communications - TN' + }, + 'uni120': { + 'name': 'United Services' + }, + 'uss020': { + 'name': 'US Sonet' + }, + 'cab060': { + 'name': 'USA Communications' + }, + 'she005': { + 'name': 'USA Communications/Shellsburg, IA' + }, + 'val040': { + 'name': 'Valley TeleCom Group' + }, + 'val025': { + 'name': 'Valley Telecommunications' + }, + 'val030': { + 'name': 'Valparaiso Broadband' + }, + 'cla050': { + 'name': 'Vast Broadband' + }, + 'sul015': { + 'name': 'Venture Communications Cooperative, Inc.' + }, + 'ver025': { + 'name': 'Vernon Communications Co-op' + }, + 'weh010-vicksburg': { + 'name': 'Vicksburg Video' + }, + 'vis070': { + 'name': 'Vision Communications' + }, + 'volcanotel': { + 'name': 'Volcano Vision, Inc.' + }, + 'vol040-02': { + 'name': 'VolFirst / BLTV' + }, + 'ver070': { + 'name': 'VTel' + }, + 'nttcvtx010': { + 'name': 'VTX1' + }, + 'bci010-02': { + 'name': 'Vyve Broadband' + }, + 'wab020': { + 'name': 'Wabash Mutual Telephone' + }, + 'waitsfield': { + 'name': 'Waitsfield Cable' + }, + 'wal010': { + 'name': 'Walnut Communications' + }, + 'wavebroadband': { + 'name': 'Wave' + }, + 'wav030': { + 'name': 'Waverly Communications Utility' + }, + 'wbi010': { + 'name': 'WBI' + }, + 'web020': { + 'name': 'Webster-Calhoun Cooperative Telephone Association' + }, + 'wes005': { + 'name': 'West Alabama TV Cable' + }, + 'carolinata': { + 'name': 'West Carolina Communications' + }, + 'wct010': { + 'name': 'West Central Telephone Association' + }, + 'wes110': { + 'name': 'West River Cooperative Telephone Company' + }, + 'ani030': { + 'name': 'WesTel Systems' + }, + 'westianet': { + 'name': 'Western Iowa Networks' + }, + 'nttcwhi010': { + 'name': 'Whidbey Telecom' + }, + 'weh010-white': { + 'name': 'White County Cable TV' + }, + 'wes130': { + 'name': 'Wiatel' + }, + 'wik010': { + 'name': 'Wiktel' + }, + 'wil070': { + 'name': 'Wilkes Communications, Inc./RiverStreet Networks' + }, + 'wil015': { + 'name': 'Wilson Communications' + }, + 'win010': { + 'name': 'Windomnet/SMBS' + }, + 'win090': { + 'name': 'Windstream Cable TV' + }, + 'wcta': { + 'name': 'Winnebago Cooperative Telecom Association' + }, + 'wtc010': { + 'name': 'WTC' + }, + 'wil040': { + 'name': 'WTC Communications, Inc.' + }, + 'wya010': { + 'name': 'Wyandotte Cable' + }, + 'hin020-02': { + 'name': 'X-Stream Services' + }, + 'xit010': { + 'name': 'XIT Communications' + }, + 'yel010': { + 'name': 'Yelcot Communications' + }, + 'mid180-01': { + 'name': 'yondoo' + }, + 'cou060': { + 'name': 'Zito Media' + }, } @@ -113,10 +1367,10 @@ class AdobePassIE(InfoExtractor): provider_login_page_res = post_form( provider_redirect_page_res, 'Downloading Provider Login Page') mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { - mso_info['username_field']: username, - mso_info['password_field']: password, + mso_info.get('username_field', 'username'): username, + mso_info.get('password_field', 'password'): password, }) - if mso_id == 'DTV': + if mso_id != 'Rogers': post_form(mvpd_confirm_page_res, 'Confirming Login') session = self._download_webpage( From 12f211d0cbd25554ff3116ee173ffc3f25d0e453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 21 Sep 2016 22:51:36 +0700 Subject: [PATCH 1721/3599] [videomore] Fix embed regex --- youtube_dl/extractor/videomore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index 328b5b7fb..8a11ff848 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -84,7 +84,7 @@ class VideomoreIE(InfoExtractor): @staticmethod def _extract_url(webpage): mobj = re.search( - r'<object[^>]+data=(["\'])https?://videomore.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1', + r'<object[^>]+data=(["\'])https?://videomore\.ru/player\.swf\?.*config=(?P<url>https?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1', webpage) if mobj: return mobj.group('url') From 1978540a5122c53012e17a78841f3da0df77fd34 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 21 Sep 2016 21:49:52 +0100 Subject: [PATCH 1722/3599] [ooyala] extract all hls formats --- youtube_dl/extractor/ooyala.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 2038a6ba5..72ec20938 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -47,7 +47,7 @@ class OoyalaBaseIE(InfoExtractor): delivery_type = stream['delivery_type'] if delivery_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - s_url, embed_code, 'mp4', 'm3u8_native', + re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif delivery_type == 'hds' or ext == 'f4m': formats.extend(self._extract_f4m_formats( From 0a439c5c4c1a6a2ee54465c5ad893ffb768539d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 22 Sep 2016 21:48:53 +0700 Subject: [PATCH 1723/3599] [udemy] Stringify video id --- youtube_dl/extractor/udemy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index c2f507233..cce29c6e0 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_HTTPError, + compat_str, compat_urllib_request, compat_urlparse, ) @@ -207,7 +208,7 @@ class UdemyIE(InfoExtractor): if youtube_url: return self.url_result(youtube_url, 'Youtube') - video_id = asset['id'] + video_id = compat_str(asset['id']) thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl') duration = float_or_none(asset.get('data', {}).get('duration')) From e3d6bdc8fc48ddf0bea324c9196297e539669aaf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 23 Sep 2016 01:11:13 +0800 Subject: [PATCH 1724/3599] [ustream] Support HLS streams (closes #10698) --- ChangeLog | 5 ++ youtube_dl/extractor/ustream.py | 122 +++++++++++++++++++++++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 24077c430..5122af4c0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +version <unreleased> + +Extractors ++ [ustream] Support the new HLS streams (#10698) + version 2016.09.19 Extractors diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index a3dc9d33e..0c06bf36b 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -1,15 +1,20 @@ from __future__ import unicode_literals +import random import re from .common import InfoExtractor from ..compat import ( + compat_str, compat_urlparse, ) from ..utils import ( + encode_data_uri, ExtractorError, int_or_none, float_or_none, + mimetype2ext, + str_or_none, ) @@ -47,8 +52,108 @@ class UstreamIE(InfoExtractor): 'id': '10299409', }, 'playlist_count': 3, + }, { + 'url': 'http://www.ustream.tv/recorded/91343263', + 'info_dict': { + 'id': '91343263', + 'ext': 'mp4', + 'title': 'GitHub Universe - General Session - Day 1', + 'upload_date': '20160914', + 'description': 'GitHub Universe - General Session - Day 1', + 'timestamp': 1473872730, + 'uploader': 'wa0dnskeqkr', + 'uploader_id': '38977840', + }, + 'params': { + 'skip_download': True, # m3u8 download + }, }] + def _get_stream_info(self, url, video_id, app_id_ver, extra_note=None): + def num_to_hex(n): + return hex(n)[2:] + + rnd = random.randrange + + if not extra_note: + extra_note = '' + + conn_info = self._download_json( + 'http://r%d-1-%s-recorded-lp-live.ums.ustream.tv/1/ustream' % (rnd(1e8), video_id), + video_id, note='Downloading connection info' + extra_note, + query={ + 'type': 'viewer', + 'appId': app_id_ver[0], + 'appVersion': app_id_ver[1], + 'rsid': '%s:%s' % (num_to_hex(rnd(1e8)), num_to_hex(rnd(1e8))), + 'rpin': '_rpin.%d' % rnd(1e15), + 'referrer': url, + 'media': video_id, + 'application': 'recorded', + }) + host = conn_info[0]['args'][0]['host'] + connection_id = conn_info[0]['args'][0]['connectionId'] + + return self._download_json( + 'http://%s/1/ustream?connectionId=%s' % (host, connection_id), + video_id, note='Downloading stream info' + extra_note) + + def _get_streams(self, url, video_id, app_id_ver): + # Sometimes the return dict does not have 'stream' + for trial_count in range(3): + stream_info = self._get_stream_info( + url, video_id, app_id_ver, + extra_note=' (try %d)' % (trial_count + 1) if trial_count > 0 else '') + if 'stream' in stream_info[0]['args'][0]: + return stream_info[0]['args'][0]['stream'] + return [] + + def _parse_segmented_mp4(self, dash_stream_info): + def resolve_dash_template(template, idx, chunk_hash): + return template.replace('%', compat_str(idx), 1).replace('%', chunk_hash) + + formats = [] + for stream in dash_stream_info['streams']: + # Use only one provider to avoid too many formats + provider = dash_stream_info['providers'][0] + fragments = [{ + 'url': resolve_dash_template( + provider['url'] + stream['initUrl'], 0, dash_stream_info['hashes']['0']) + }] + for idx in range(dash_stream_info['videoLength'] // dash_stream_info['chunkTime']): + fragments.append({ + 'url': resolve_dash_template( + provider['url'] + stream['segmentUrl'], idx, + dash_stream_info['hashes'][compat_str(idx // 10 * 10)]) + }) + content_type = stream['contentType'] + kind = content_type.split('/')[0] + f = { + 'format_id': '-'.join(filter(None, [ + 'dash', kind, str_or_none(stream.get('bitrate'))])), + 'protocol': 'http_dash_segments', + # TODO: generate a MPD doc for external players? + 'url': encode_data_uri(b'<MPD/>', 'text/xml'), + 'ext': mimetype2ext(content_type), + 'height': stream.get('height'), + 'width': stream.get('width'), + 'fragments': fragments, + } + if kind == 'video': + f.update({ + 'vcodec': stream.get('codec'), + 'acodec': 'none', + 'vbr': stream.get('bitrate'), + }) + else: + f.update({ + 'vcodec': 'none', + 'acodec': stream.get('codec'), + 'abr': stream.get('bitrate'), + }) + formats.append(f) + return formats + def _real_extract(self, url): m = re.match(self._VALID_URL, url) video_id = m.group('id') @@ -86,7 +191,22 @@ class UstreamIE(InfoExtractor): 'url': video_url, 'ext': format_id, 'filesize': filesize, - } for format_id, video_url in video['media_urls'].items()] + } for format_id, video_url in video['media_urls'].items() if video_url] + + if not formats: + hls_streams = self._get_streams(url, video_id, app_id_ver=(11, 2)) + if hls_streams: + # m3u8_native leads to intermittent ContentTooShortError + formats.extend(self._extract_m3u8_formats( + hls_streams[0]['url'], video_id, ext='mp4', m3u8_id='hls')) + + ''' + # DASH streams handling is incomplete as 'url' is missing + dash_streams = self._get_streams(url, video_id, app_id_ver=(3, 1)) + if dash_streams: + formats.extend(self._parse_segmented_mp4(dash_streams)) + ''' + self._sort_formats(formats) description = video.get('description') From 628406db960c032eb68ef318ce9fecf6b8329834 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 23 Sep 2016 01:13:56 +0800 Subject: [PATCH 1725/3599] [Makefile] Cleanup files from fragment-based downloaders --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 354052c50..ac234fcb0 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From 4ddcb5999d0323fb83c5b879127d31763f5d63e2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 23 Sep 2016 01:47:01 +0800 Subject: [PATCH 1726/3599] [openload] Fix extraction (closes #10408, closes #10727) Thanks to @daniel100097 for providing a working version --- ChangeLog | 1 + youtube_dl/extractor/openload.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5122af4c0..6c72bae90 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [openload] Fix extraction (#10408) + [ustream] Support the new HLS streams (#10698) version 2016.09.19 diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index c261a7455..b6e3ac250 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -51,7 +51,8 @@ class OpenloadIE(InfoExtractor): # declared to be freely used in youtube-dl # See https://github.com/rg3/youtube-dl/issues/10408 enc_data = self._html_search_regex( - r'<span[^>]+id="hiddenurl"[^>]*>([^<]+)</span>', webpage, 'encrypted data') + r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"', + webpage, 'encrypted data') video_url_chars = [] @@ -60,7 +61,7 @@ class OpenloadIE(InfoExtractor): if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 if idx == len(enc_data) - 1: - j += 3 + j += 2 video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From 45cae3b021828cc6f7a67c7a14645ae6f0806f59 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Thu, 22 Sep 2016 19:27:57 +0100 Subject: [PATCH 1727/3599] [cbs] extract info from thunder videoPlayerService(closes #10728) --- youtube_dl/extractor/cbs.py | 58 ++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 3f4dea40c..58f258c54 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -4,7 +4,9 @@ from .theplatform import ThePlatformFeedIE from ..utils import ( int_or_none, find_xpath_attr, - ExtractorError, + xpath_element, + xpath_text, + update_url_query, ) @@ -47,27 +49,49 @@ class CBSIE(CBSBaseIE): 'only_matching': True, }] - def _extract_video_info(self, guid): - path = 'dJ5BDC/media/guid/2198311517/' + guid - smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path - formats, subtitles = self._extract_theplatform_smil(smil_url + '&manifest=m3u', guid) - for r in ('OnceURL&formats=M3U', 'HLS&formats=M3U', 'RTMP', 'WIFI', '3G'): - try: - tp_formats, _ = self._extract_theplatform_smil(smil_url + '&assetTypes=' + r, guid, 'Downloading %s SMIL data' % r.split('&')[0]) - formats.extend(tp_formats) - except ExtractorError: + def _extract_video_info(self, content_id): + items_data = self._download_xml( + 'http://can.cbs.com/thunder/player/videoPlayerService.php', + content_id, query={'partner': 'cbs', 'contentId': content_id}) + video_data = xpath_element(items_data, './/item') + title = xpath_text(video_data, 'videoTitle', 'title', True) + tp_path = 'dJ5BDC/media/guid/2198311517/%s' % content_id + tp_release_url = 'http://link.theplatform.com/s/' + tp_path + + asset_types = [] + subtitles = {} + formats = [] + for item in items_data.findall('.//item'): + asset_type = xpath_text(item, 'assetType') + if not asset_type or asset_type in asset_types: continue + asset_types.append(asset_type) + query = { + 'mbr': 'true', + 'assetTypes': asset_type, + } + if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'): + query['formats'] = 'MPEG4,M3U' + elif asset_type in ('RTMP', 'WIFI', '3G'): + query['formats'] = 'MPEG4,FLV' + tp_formats, tp_subtitles = self._extract_theplatform_smil( + update_url_query(tp_release_url, query), content_id, + 'Downloading %s SMIL data' % asset_type) + formats.extend(tp_formats) + subtitles = self._merge_subtitles(subtitles, tp_subtitles) self._sort_formats(formats) - metadata = self._download_theplatform_metadata(path, guid) - info = self._parse_theplatform_metadata(metadata) + + info = self._extract_theplatform_metadata(tp_path, content_id) info.update({ - 'id': guid, + 'id': content_id, + 'title': title, + 'series': xpath_text(video_data, 'seriesTitle'), + 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), + 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), + 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), + 'thumbnail': xpath_text(video_data, 'previewImageURL'), 'formats': formats, 'subtitles': subtitles, - 'series': metadata.get('cbs$SeriesTitle'), - 'season_number': int_or_none(metadata.get('cbs$SeasonNumber')), - 'episode': metadata.get('cbs$EpisodeTitle'), - 'episode_number': int_or_none(metadata.get('cbs$EpisodeNumber')), }) return info From 71ad00c09fecd3ecc84784cf215537cad0a79595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 23 Sep 2016 21:08:16 +0700 Subject: [PATCH 1728/3599] [prosiebensat1] Add support for kabeleinsdoku (Closes #10732) --- youtube_dl/extractor/prosiebensat1.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 7335dc2af..5a29b844d 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -122,7 +122,7 @@ class ProSiebenSat1BaseIE(InfoExtractor): class ProSiebenSat1IE(ProSiebenSat1BaseIE): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' - _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv|kabeleinsdoku)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)' _TESTS = [ { @@ -290,6 +290,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'skip_download': True, }, }, + { + # geo restricted to Germany + 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', + 'only_matching': True, + }, ] _TOKEN = 'prosieben' From 24628cf7db46ecce3fe56d387266c556cd9210ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 02:01:01 +0700 Subject: [PATCH 1729/3599] [soundcloud:playlist] Provide video id for playlist entries (Closes #10733) --- youtube_dl/extractor/soundcloud.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 513c54829..496cc5d8e 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -477,7 +477,11 @@ class SoundcloudPlaylistIE(SoundcloudIE): data = self._download_json( base_url + data, playlist_id, 'Downloading playlist') - entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in data['tracks']] + entries = [ + self.url_result( + track['permalink_url'], SoundcloudIE.ie_key(), + video_id=compat_str(track['id']) if track.get('id') else None) + for track in data['tracks'] if track.get('permalink_url')] return { '_type': 'playlist', From 8eec691e8a89d0094b806b86111fbcfd0ade64c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 02:12:49 +0700 Subject: [PATCH 1730/3599] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index 6c72bae90..e0908aa30 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,23 @@ version <unreleased> +Core ++ Add support for watchTVeverywhere.com authentication provider based MSOs for + Adobe Pass authentication (#10709) + Extractors ++ [soundcloud:playlist] Provide video id for early playlist entries (#10733) ++ [prosiebensat1] Add support for kabeleinsdoku (#10732) +* [cbs] Extract info from thunder videoPlayerService (#10728) * [openload] Fix extraction (#10408) + [ustream] Support the new HLS streams (#10698) ++ [ooyala] Extract all HLS formats ++ [cartoonnetwork] Add support for Adobe Pass authentication ++ [soundcloud] Extract license metadata ++ [fox] Add support for Adobe Pass authentication (#8584) ++ [tbs] Add support for Adobe Pass authentication (#10642, #10222) ++ [trutv] Add support for Adobe Pass authentication (#10519) ++ [turner] Add support for Adobe Pass authentication + version 2016.09.19 From e6332059ac66bfc91ed18e5b15d9238e4283ee7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 02:16:47 +0700 Subject: [PATCH 1731/3599] release 2016.09.24 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 8b28d784a..7669ab9b7 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.19** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.24** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.19 +[debug] youtube-dl version 2016.09.24 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index e0908aa30..a1c4df479 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version <unreleased> +version 2016.09.24 Core + Add support for watchTVeverywhere.com authentication provider based MSOs for diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9d3138181..2af6380b8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.19' +__version__ = '2016.09.24' From 5968d7d2fe619e85eb424d6e47d000f0b295d4a2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 24 Sep 2016 14:20:42 +0800 Subject: [PATCH 1732/3599] [extractor/common] Improved support for HTML5 subtitles Ref: #10625 In a strict sense, <track>s with kind=captions are not subtitles. [1] openload misuses this attribute, and I guess there will be more examples, so I add it to common.py. Also allow extracting information for subtitles-only <video> or <audio> tags, which is the case of openload. [1] https://www.w3.org/TR/html5/embedded-content-0.html#attr-track-kind --- ChangeLog | 6 ++++++ youtube_dl/extractor/common.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index a1c4df479..ebe4ff0e8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +vesion <unreleased> + +Core ++ Improved support for HTML5 subtitles + + version 2016.09.24 Core diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c8991542..5cb4479ec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1828,7 +1828,7 @@ class InfoExtractor(object): for track_tag in re.findall(r'<track[^>]+>', media_content): track_attributes = extract_attributes(track_tag) kind = track_attributes.get('kind') - if not kind or kind == 'subtitles': + if not kind or kind in ('subtitles', 'captions'): src = track_attributes.get('src') if not src: continue @@ -1836,7 +1836,7 @@ class InfoExtractor(object): media_info['subtitles'].setdefault(lang, []).append({ 'url': absolute_url(src), }) - if media_info['formats']: + if media_info['formats'] or media_info['subtitles']: entries.append(media_info) return entries From 0711995bcac2f44e09a943521dceb1c54bf8ffb7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 24 Sep 2016 14:27:08 +0800 Subject: [PATCH 1733/3599] [openload] Support subtitles (closes #10625) --- ChangeLog | 3 +++ youtube_dl/extractor/openload.py | 24 +++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index ebe4ff0e8..766cc477b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ vesion <unreleased> Core + Improved support for HTML5 subtitles +Extractors ++ [openload] Support subtitles (#10625) + version 2016.09.24 diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b6e3ac250..4f5175136 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -24,6 +24,22 @@ class OpenloadIE(InfoExtractor): 'title': 'skyrim_no-audio_1080.mp4', 'thumbnail': 're:^https?://.*\.jpg$', }, + }, { + 'url': 'https://openload.co/embed/rjC09fkPLYs', + 'info_dict': { + 'id': 'rjC09fkPLYs', + 'ext': 'mp4', + 'title': 'movie.mp4', + 'thumbnail': 're:^https?://.*\.jpg$', + 'subtitles': { + 'en': [{ + 'ext': 'vtt', + }], + }, + }, + 'params': { + 'skip_download': True, # test subtitles only + }, }, { 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', 'only_matching': True, @@ -71,11 +87,17 @@ class OpenloadIE(InfoExtractor): 'title', default=None) or self._html_search_meta( 'description', webpage, 'title', fatal=True) - return { + entries = self._parse_html5_media_entries(url, webpage, video_id) + subtitles = entries[0]['subtitles'] if entries else None + + info_dict = { 'id': video_id, 'title': title, 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'url': video_url, # Seems all videos have extensions in their titles 'ext': determine_ext(title), + 'subtitles': subtitles, } + + return info_dict From 8add4bfecb73f44cffe3cbf33941fc409564149b Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 10:41:38 +0100 Subject: [PATCH 1734/3599] [mtv] add support for new website urls(closes #8169)(closes #9808) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/mtv.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8166fd4f9..bf1f70885 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -516,6 +516,7 @@ from .movingimage import MovingImageIE from .msn import MSNIE from .mtv import ( MTVIE, + MTVVideoIE, MTVServicesEmbeddedIE, MTVDEIE, ) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index bdda68819..84a2dcb62 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -270,6 +270,27 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor): + _VALID_URL = r'(?x)https?://(?:www\.)?mtv\.com/(video-clips|full-episodes)/(?P<id>[^/?#.]+)' + _FEED_URL = 'http://www.mtv.com/feeds/mrss/' + + _TESTS = [{ + 'url': 'http://www.mtv.com/video-clips/vl8qof/unlocking-the-truth-trailer', + 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b', + 'info_dict': { + 'id': '5e14040d-18a4-47c4-a582-43ff602de88e', + 'ext': 'mp4', + 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer', + 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.', + 'timestamp': 1468846800, + 'upload_date': '20160718', + }, + }, { + 'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101', + 'only_matching': True, + }] + + +class MTVVideoIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)^https?:// (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$| m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))''' From a54ffb8aa778062901dd15b020576bc7d472ae40 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 10:50:14 +0100 Subject: [PATCH 1735/3599] [mtv] add common IE_NAME prefix for MTVIE and MTVVideoIE --- youtube_dl/extractor/mtv.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 84a2dcb62..2e9580b10 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -270,7 +270,8 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor): - _VALID_URL = r'(?x)https?://(?:www\.)?mtv\.com/(video-clips|full-episodes)/(?P<id>[^/?#.]+)' + IE_NAME = 'mtv' + _VALID_URL = r'https?://(?:www\.)?mtv\.com/(video-clips|full-episodes)/(?P<id>[^/?#.]+)' _FEED_URL = 'http://www.mtv.com/feeds/mrss/' _TESTS = [{ @@ -291,6 +292,7 @@ class MTVIE(MTVServicesInfoExtractor): class MTVVideoIE(MTVServicesInfoExtractor): + IE_NAME = 'mtv:video' _VALID_URL = r'''(?x)^https?:// (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$| m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))''' From f0bc5a8609786633d8b51ab4255c1f0fdb941f73 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 24 Sep 2016 19:57:55 +0800 Subject: [PATCH 1736/3599] [twitter] Support Periscope embeds (closes #10737) Also update _TESTS --- ChangeLog | 1 + youtube_dl/extractor/periscope.py | 9 +++++++ youtube_dl/extractor/twitter.py | 45 +++++++++++++++++++++++++------ 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index 766cc477b..5c96dc179 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core + Improved support for HTML5 subtitles Extractors ++ [twitter] Support Periscope embeds (#10737) + [openload] Support subtitles (#10625) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index eb1aeba46..e8b2f11c6 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( parse_iso8601, @@ -41,6 +43,13 @@ class PeriscopeIE(PeriscopeBaseIE): 'only_matching': True, }] + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?periscope\.tv/(?:(?!\1).)+)\1', webpage) + if mobj: + return mobj.group('url') + def _real_extract(self, url): token = self._match_id(url) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index c5a5843b6..3411fcf7e 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( determine_ext, float_or_none, @@ -13,6 +14,8 @@ from ..utils import ( ExtractorError, ) +from .periscope import PeriscopeIE + class TwitterBaseIE(InfoExtractor): def _get_vmap_video_url(self, vmap_url, video_id): @@ -48,12 +51,12 @@ class TwitterCardIE(TwitterBaseIE): }, { 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', - 'md5': 'd4724ffe6d2437886d004fa5de1043b3', + 'md5': 'b6d9683dd3f48e340ded81c0e917ad46', 'info_dict': { 'id': 'dq4Oj5quskI', 'ext': 'mp4', 'title': 'Ubuntu 11.10 Overview', - 'description': 'Take a quick peek at what\'s new and improved in Ubuntu 11.10.\n\nOnce installed take a look at 10 Things to Do After Installing: http://www.omgubuntu.co.uk/2011/10/10...', + 'description': 'md5:a831e97fa384863d6e26ce48d1c43376', 'upload_date': '20111013', 'uploader': 'OMG! Ubuntu!', 'uploader_id': 'omgubuntu', @@ -100,12 +103,17 @@ class TwitterCardIE(TwitterBaseIE): return self.url_result(iframe_url) config = self._parse_json(self._html_search_regex( - r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'), + r'data-(?:player-)?config="([^"]+)"', webpage, + 'data player config', default='{}'), video_id) if config.get('source_type') == 'vine': return self.url_result(config['player_url'], 'Vine') + periscope_url = PeriscopeIE._extract_url(webpage) + if periscope_url: + return self.url_result(periscope_url, PeriscopeIE.ie_key()) + def _search_dimensions_in_video_url(a_format, video_url): m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url) if m: @@ -244,10 +252,10 @@ class TwitterIE(InfoExtractor): 'info_dict': { 'id': '700207533655363584', 'ext': 'mp4', - 'title': 'Donte The Dumbass - BEAT PROD: @suhmeduh #Damndaniel', - 'description': 'Donte The Dumbass on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', + 'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel', + 'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"', 'thumbnail': 're:^https?://.*\.jpg', - 'uploader': 'Donte The Dumbass', + 'uploader': 'JG', 'uploader_id': 'jaydingeer', }, 'params': { @@ -278,6 +286,18 @@ class TwitterIE(InfoExtractor): 'params': { 'skip_download': True, # requires ffmpeg }, + }, { + 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384', + 'info_dict': { + 'id': '1zqKVVlkqLaKB', + 'ext': 'mp4', + 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence', + 'upload_date': '20160923', + 'uploader_id': 'OPP_HSD', + 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police', + 'timestamp': 1474613214, + }, + 'add_ie': ['Periscope'], }] def _real_extract(self, url): @@ -328,13 +348,22 @@ class TwitterIE(InfoExtractor): }) return info + twitter_card_url = None if 'class="PlayableMedia' in webpage: + twitter_card_url = '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid) + else: + twitter_card_iframe_url = self._search_regex( + r'data-full-card-iframe-url=([\'"])(?P<url>(?:(?!\1).)+)\1', + webpage, 'Twitter card iframe URL', default=None, group='url') + if twitter_card_iframe_url: + twitter_card_url = compat_urlparse.urljoin(url, twitter_card_iframe_url) + + if twitter_card_url: info.update({ '_type': 'url_transparent', 'ie_key': 'TwitterCard', - 'url': '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid), + 'url': twitter_card_url, }) - return info raise ExtractorError('There\'s no video in this tweet.') From 8e45e1cc4d706e6b43dac8105acf3592fa3d4725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 19:18:01 +0700 Subject: [PATCH 1737/3599] [soundcloud] Generalize playlist entries extraction (#10733) --- youtube_dl/extractor/soundcloud.py | 42 ++++++++++++++++++------------ 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 496cc5d8e..f3cb35f77 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -260,7 +260,20 @@ class SoundcloudIE(InfoExtractor): return self._extract_info_dict(info, full_title, secret_token=token) -class SoundcloudSetIE(SoundcloudIE): +class SoundcloudBaseIE(SoundcloudIE): + @staticmethod + def _extract_id(e): + return compat_str(e['id']) if e.get('id') else None + + def _extract_track_entries(self, tracks): + return [ + self.url_result( + track['permalink_url'], SoundcloudIE.ie_key(), + video_id=self._extract_id(track)) + for track in tracks if track.get('permalink_url')] + + +class SoundcloudSetIE(SoundcloudBaseIE): _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' IE_NAME = 'soundcloud:set' _TESTS = [{ @@ -299,7 +312,7 @@ class SoundcloudSetIE(SoundcloudIE): msgs = (compat_str(err['error_message']) for err in info['errors']) raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs)) - entries = [self.url_result(track['permalink_url'], 'Soundcloud') for track in info['tracks']] + entries = self._extract_track_entries(info['tracks']) return { '_type': 'playlist', @@ -309,7 +322,7 @@ class SoundcloudSetIE(SoundcloudIE): } -class SoundcloudUserIE(SoundcloudIE): +class SoundcloudUserIE(SoundcloudBaseIE): _VALID_URL = r'''(?x) https?:// (?:(?:www|m)\.)?soundcloud\.com/ @@ -326,21 +339,21 @@ class SoundcloudUserIE(SoundcloudIE): 'id': '114582580', 'title': 'The Akashic Chronicler (All)', }, - 'playlist_mincount': 111, + 'playlist_mincount': 74, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/tracks', 'info_dict': { 'id': '114582580', 'title': 'The Akashic Chronicler (Tracks)', }, - 'playlist_mincount': 50, + 'playlist_mincount': 37, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/sets', 'info_dict': { 'id': '114582580', 'title': 'The Akashic Chronicler (Playlists)', }, - 'playlist_mincount': 3, + 'playlist_mincount': 2, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/reposts', 'info_dict': { @@ -359,7 +372,7 @@ class SoundcloudUserIE(SoundcloudIE): 'url': 'https://soundcloud.com/grynpyret/spotlight', 'info_dict': { 'id': '7098329', - 'title': 'Grynpyret (Spotlight)', + 'title': 'GRYNPYRET (Spotlight)', }, 'playlist_mincount': 1, }] @@ -421,13 +434,14 @@ class SoundcloudUserIE(SoundcloudIE): for cand in candidates: if isinstance(cand, dict): permalink_url = cand.get('permalink_url') + entry_id = self._extract_id(cand) if permalink_url and permalink_url.startswith('http'): - return permalink_url + return permalink_url, entry_id for e in collection: - permalink_url = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) + permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist'))) if permalink_url: - entries.append(self.url_result(permalink_url)) + entries.append(self.url_result(permalink_url, video_id=entry_id)) next_href = response.get('next_href') if not next_href: @@ -447,7 +461,7 @@ class SoundcloudUserIE(SoundcloudIE): } -class SoundcloudPlaylistIE(SoundcloudIE): +class SoundcloudPlaylistIE(SoundcloudBaseIE): _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' IE_NAME = 'soundcloud:playlist' _TESTS = [{ @@ -477,11 +491,7 @@ class SoundcloudPlaylistIE(SoundcloudIE): data = self._download_json( base_url + data, playlist_id, 'Downloading playlist') - entries = [ - self.url_result( - track['permalink_url'], SoundcloudIE.ie_key(), - video_id=compat_str(track['id']) if track.get('id') else None) - for track in data['tracks'] if track.get('permalink_url')] + entries = self._extract_track_entries(data['tracks']) return { '_type': 'playlist', From 7518a61d416133bff8b99c693dfca0b15c0d5b7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Sep 2016 19:29:49 +0700 Subject: [PATCH 1738/3599] [soundcloud] Fix typo in playlist base class name --- youtube_dl/extractor/soundcloud.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index f3cb35f77..1a8114aa7 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -260,7 +260,7 @@ class SoundcloudIE(InfoExtractor): return self._extract_info_dict(info, full_title, secret_token=token) -class SoundcloudBaseIE(SoundcloudIE): +class SoundcloudPlaylistBaseIE(SoundcloudIE): @staticmethod def _extract_id(e): return compat_str(e['id']) if e.get('id') else None @@ -273,7 +273,7 @@ class SoundcloudBaseIE(SoundcloudIE): for track in tracks if track.get('permalink_url')] -class SoundcloudSetIE(SoundcloudBaseIE): +class SoundcloudSetIE(SoundcloudPlaylistBaseIE): _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' IE_NAME = 'soundcloud:set' _TESTS = [{ @@ -322,7 +322,7 @@ class SoundcloudSetIE(SoundcloudBaseIE): } -class SoundcloudUserIE(SoundcloudBaseIE): +class SoundcloudUserIE(SoundcloudPlaylistBaseIE): _VALID_URL = r'''(?x) https?:// (?:(?:www|m)\.)?soundcloud\.com/ @@ -461,7 +461,7 @@ class SoundcloudUserIE(SoundcloudBaseIE): } -class SoundcloudPlaylistIE(SoundcloudBaseIE): +class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' IE_NAME = 'soundcloud:playlist' _TESTS = [{ From 6f126d903f46d976a380a5b4265084e5a21a3c09 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 15:38:19 +0100 Subject: [PATCH 1739/3599] [download/hls] Delegate downloading to ffmpeg for live streams --- youtube_dl/downloader/hls.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 5d70abf62..541b92ee1 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -31,7 +31,7 @@ class HlsFD(FragmentFD): FD_NAME = 'hlsnative' @staticmethod - def can_download(manifest): + def can_download(manifest, info_dict): UNSUPPORTED_FEATURES = ( r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] @@ -53,6 +53,7 @@ class HlsFD(FragmentFD): ) check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest) + check_results.append(not info_dict.get('is_live')) return all(check_results) def real_download(self, filename, info_dict): @@ -62,7 +63,7 @@ class HlsFD(FragmentFD): s = manifest.decode('utf-8', 'ignore') - if not self.can_download(s): + if not self.can_download(s, info_dict): self.report_warning( 'hlsnative has detected features it does not support, ' 'extraction will be delegated to ffmpeg') From 27e99078d337cdc77a5a7228998d3b2fe722e7cb Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 15:39:06 +0100 Subject: [PATCH 1740/3599] [brightcove:new] add support for live streams --- youtube_dl/extractor/brightcove.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index aeb22be16..2ec55b185 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -621,15 +621,21 @@ class BrightcoveNewIE(InfoExtractor): 'url': text_track['src'], }) + is_live = False + duration = float_or_none(json_data.get('duration'), 1000) + if duration and duration < 0: + is_live = True + return { 'id': video_id, - 'title': title, + 'title': self._live_title(title) if is_live else title, 'description': clean_html(json_data.get('description')), 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), - 'duration': float_or_none(json_data.get('duration'), 1000), + 'duration': duration, 'timestamp': parse_iso8601(json_data.get('published_at')), 'uploader_id': account_id, 'formats': formats, 'subtitles': subtitles, 'tags': json_data.get('tags', []), + 'is_live': is_live, } From e71a450956c808d469b983e5ffde1a63aff24390 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 21:55:53 +0100 Subject: [PATCH 1741/3599] [common] add hdcore sign to akamai f4m formats --- youtube_dl/extractor/common.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5cb4479ec..1076b46da 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1842,10 +1842,15 @@ class InfoExtractor(object): def _extract_akamai_formats(self, manifest_url, video_id): formats = [] + hdcore_sign = 'hdcore=3.7.0' f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') - formats.extend(self._extract_f4m_formats( - update_url_query(f4m_url, {'hdcore': '3.7.0'}), - video_id, f4m_id='hds', fatal=False)) + if 'hdcore=' not in f4m_url: + f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign + f4m_formats = self._extract_f4m_formats( + f4m_url, video_id, f4m_id='hds', fatal=False) + for entry in f4m_formats: + entry.update({'extra_param_to_segment_url': hdcore_sign}) + formats.extend(f4m_formats) m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8') formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', 'm3u8_native', From 7fd57de6fb146ffca594e4ae632d7ff217926b52 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Sat, 24 Sep 2016 21:59:48 +0100 Subject: [PATCH 1742/3599] [cbsnews:livevideo] fix extraction and extract m3u8 formats --- youtube_dl/extractor/cbsnews.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 4aa6917a0..216989230 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -9,6 +9,7 @@ from ..utils import ( class CBSNewsIE(CBSIE): + IE_NAME = 'cbsnews' IE_DESC = 'CBS News' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' @@ -68,15 +69,16 @@ class CBSNewsIE(CBSIE): class CBSNewsLiveVideoIE(InfoExtractor): + IE_NAME = 'cbsnews:livevideo' IE_DESC = 'CBS News Live Videos' - _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[^/?#]+)' # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples _TEST = { 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', 'info_dict': { 'id': 'clinton-sanders-prepare-to-face-off-in-nh', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Clinton, Sanders Prepare To Face Off In NH', 'duration': 334, }, @@ -84,25 +86,22 @@ class CBSNewsLiveVideoIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._match_id(url) + display_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + video_info = self._download_json( + 'http://feeds.cbsn.cbsnews.com/rundown/story', display_id, query={ + 'device': 'desktop', + 'dvr_slug': display_id, + }) - video_info = self._parse_json(self._html_search_regex( - r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story'] - - hdcore_sign = 'hdcore=3.3.1' - f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id) - if f4m_formats: - for entry in f4m_formats: - # URLs without the extra param induce an 404 error - entry.update({'extra_param_to_segment_url': hdcore_sign}) - self._sort_formats(f4m_formats) + formats = self._extract_akamai_formats(video_info['url'], display_id) + self._sort_formats(formats) return { - 'id': video_id, + 'id': display_id, + 'display_id': display_id, 'title': video_info['headline'], 'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'), 'duration': parse_duration(video_info.get('segmentDur')), - 'formats': f4m_formats, + 'formats': formats, } From 63c583eb2c9a906ba1075da289afdde29b385fff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 04:43:10 +0700 Subject: [PATCH 1743/3599] [prosiebensat1] Add support for sat1gold (#10745) --- youtube_dl/extractor/prosiebensat1.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 5a29b844d..2f5aa530a 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -122,7 +122,17 @@ class ProSiebenSat1BaseIE(InfoExtractor): class ProSiebenSat1IE(ProSiebenSat1BaseIE): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' - _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv|kabeleinsdoku)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P<id>.+)' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + (?: + prosieben|prosiebenmaxx|sixx|sat1(?:gold)?|kabeleins|the-voice-of-germany|7tv|kabeleinsdoku + )\.(?:de|at|ch)| + ran\.de|fem\.com + ) + /(?P<id>.+) + ''' _TESTS = [ { @@ -295,6 +305,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', 'only_matching': True, }, + { + # geo restricted to Germany + 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', + 'only_matching': True, + }, ] _TOKEN = 'prosieben' From ddde91952f4eec796b14eb258c0cb33dda3935bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 05:36:18 +0700 Subject: [PATCH 1744/3599] [prosiebensat1] Fix playlist support (Closes #10745) --- youtube_dl/extractor/prosiebensat1.py | 39 ++++++++++++++++++--------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 2f5aa530a..a064de05e 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -310,6 +310,10 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', 'only_matching': True, }, + { + 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', + 'only_matching': True, + }, ] _TOKEN = 'prosieben' @@ -381,19 +385,28 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): def _extract_playlist(self, url, webpage): playlist_id = self._html_search_regex( self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') - for regex in self._PLAYLIST_CLIP_REGEXES: - playlist_clips = re.findall(regex, webpage) - if playlist_clips: - title = self._html_search_regex( - self._TITLE_REGEXES, webpage, 'title') - description = self._html_search_regex( - self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) - entries = [ - self.url_result( - re.match('(.+?//.+?)/', url).group(1) + clip_path, - 'ProSiebenSat1') - for clip_path in playlist_clips] - return self.playlist_result(entries, playlist_id, title, description) + playlist = self._parse_json( + self._search_regex( + 'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script', + webpage, 'playlist'), + playlist_id) + entries = [] + for item in playlist: + clip_id = item.get('id') or item.get('upc') + if not clip_id: + continue + info = self._extract_video_info(url, clip_id) + info.update({ + 'id': clip_id, + 'title': item.get('title') or item.get('teaser', {}).get('headline'), + 'description': item.get('teaser', {}).get('description'), + 'thumbnail': item.get('poster'), + 'duration': float_or_none(item.get('duration')), + 'series': item.get('tvShowTitle'), + 'uploader': item.get('broadcastPublisher'), + }) + entries.append(info) + return self.playlist_result(entries, playlist_id) def _real_extract(self, url): video_id = self._match_id(url) From f92bb612c69957c3803aaf14aea1d03a7d7d917f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 06:14:32 +0700 Subject: [PATCH 1745/3599] [mwave] Relax _VALID_URLs (Closes #10735, closes #10748) --- youtube_dl/extractor/mwave.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/mwave.py b/youtube_dl/extractor/mwave.py index a103e0323..fea1caf47 100644 --- a/youtube_dl/extractor/mwave.py +++ b/youtube_dl/extractor/mwave.py @@ -9,9 +9,9 @@ from ..utils import ( class MwaveIE(InfoExtractor): - _VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)' + _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)' _URL_TEMPLATE = 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=%s' - _TEST = { + _TESTS = [{ 'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859', # md5 is unstable 'info_dict': { @@ -23,7 +23,10 @@ class MwaveIE(InfoExtractor): 'duration': 206, 'view_count': int, } - } + }, { + 'url': 'http://mwave.interest.me/en/mnettv/videodetail.m?searchVideoDetailVO.clip_id=176199', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -60,8 +63,8 @@ class MwaveIE(InfoExtractor): class MwaveMeetGreetIE(InfoExtractor): - _VALID_URL = r'https?://mwave\.interest\.me/meetgreet/view/(?P<id>\d+)' - _TEST = { + _VALID_URL = r'https?://mwave\.interest\.me/(?:[^/]+/)?meetgreet/view/(?P<id>\d+)' + _TESTS = [{ 'url': 'http://mwave.interest.me/meetgreet/view/256', 'info_dict': { 'id': '173294', @@ -72,7 +75,10 @@ class MwaveMeetGreetIE(InfoExtractor): 'duration': 3634, 'view_count': int, } - } + }, { + 'url': 'http://mwave.interest.me/en/meetgreet/view/256', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 0a078550b9ac570cb357c2af74a39068d08ce1ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 06:19:17 +0700 Subject: [PATCH 1746/3599] [prosiebensat1] Improve _VALID_URL --- youtube_dl/extractor/prosiebensat1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index a064de05e..84d04aa69 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -127,7 +127,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): (?:www\.)? (?: (?: - prosieben|prosiebenmaxx|sixx|sat1(?:gold)?|kabeleins|the-voice-of-germany|7tv|kabeleinsdoku + prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv )\.(?:de|at|ch)| ran\.de|fem\.com ) From 493353c7fd5d15fa35152915c10c7249277b5ed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 06:25:57 +0700 Subject: [PATCH 1747/3599] [prosiebensat1] Add support for advopedia --- youtube_dl/extractor/prosiebensat1.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 84d04aa69..873d4f981 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -127,9 +127,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): (?:www\.)? (?: (?: - prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv + prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia )\.(?:de|at|ch)| - ran\.de|fem\.com + ran\.de|fem\.com|advopedia\.de ) /(?P<id>.+) ''' @@ -314,6 +314,10 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', 'only_matching': True, }, + { + 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage', + 'only_matching': True, + }, ] _TOKEN = 'prosieben' From 2d3d29976b2c83e8daab62a0f2a61c232692a310 Mon Sep 17 00:00:00 2001 From: stepshal <nessento@openmailbox.org> Date: Sat, 17 Sep 2016 21:48:20 +0700 Subject: [PATCH 1748/3599] [youtube] Change test URLs from http to https --- youtube_dl/extractor/youtube.py | 34 ++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5ca903825..f86823112 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -369,7 +369,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube' _TESTS = [ { - 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9', + 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9', 'info_dict': { 'id': 'BaW_jenozKc', 'ext': 'mp4', @@ -389,7 +389,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } }, { - 'url': 'http://www.youtube.com/watch?v=UxxajLWwzqY', + 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY', 'note': 'Test generic use_cipher_signature video (#897)', 'info_dict': { 'id': 'UxxajLWwzqY', @@ -443,7 +443,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } }, { - 'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY', + 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY', 'note': 'Use the first video ID in the URL', 'info_dict': { 'id': 'BaW_jenozKc', @@ -465,7 +465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }, { - 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', + 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I', 'note': '256k DASH audio (format 141) via DASH manifest', 'info_dict': { 'id': 'a9LDPn-MO4I', @@ -539,7 +539,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, # Normal age-gate video (No vevo, embed allowed) { - 'url': 'http://youtube.com/watch?v=HtVdAasjOgU', + 'url': 'https://youtube.com/watch?v=HtVdAasjOgU', 'info_dict': { 'id': 'HtVdAasjOgU', 'ext': 'mp4', @@ -555,7 +555,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, # Age-gate video with encrypted signature { - 'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU', + 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU', 'info_dict': { 'id': '6kLq3WMV1nU', 'ext': 'mp4', @@ -748,11 +748,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip': 'Not multifeed anymore', }, { - 'url': 'http://vid.plus/FlRa-iH7PGw', + 'url': 'https://vid.plus/FlRa-iH7PGw', 'only_matching': True, }, { - 'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html', + 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html', 'only_matching': True, }, { @@ -1846,7 +1846,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'playlist_count': 2, }, { 'note': 'embedded', - 'url': 'http://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', + 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu', 'playlist_count': 4, 'info_dict': { 'title': 'JODA15', @@ -1854,7 +1854,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): } }, { 'note': 'Embedded SWF player', - 'url': 'http://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', + 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0', 'playlist_count': 4, 'info_dict': { 'title': 'JODA7', @@ -2156,7 +2156,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:live' _TESTS = [{ - 'url': 'http://www.youtube.com/user/TheYoungTurks/live', + 'url': 'https://www.youtube.com/user/TheYoungTurks/live', 'info_dict': { 'id': 'a48o2S1cPoo', 'ext': 'mp4', @@ -2176,7 +2176,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, { - 'url': 'http://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', + 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', 'only_matching': True, }] @@ -2201,7 +2201,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): IE_NAME = 'youtube:playlists' _TESTS = [{ - 'url': 'http://www.youtube.com/user/ThirstForScience/playlists', + 'url': 'https://www.youtube.com/user/ThirstForScience/playlists', 'playlist_mincount': 4, 'info_dict': { 'id': 'ThirstForScience', @@ -2209,7 +2209,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): }, }, { # with "Load more" button - 'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd', + 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd', 'playlist_mincount': 70, 'info_dict': { 'id': 'igorkle1', @@ -2442,10 +2442,10 @@ class YoutubeTruncatedURLIE(InfoExtractor): ''' _TESTS = [{ - 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', + 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041', 'only_matching': True, }, { - 'url': 'http://www.youtube.com/watch?', + 'url': 'https://www.youtube.com/watch?', 'only_matching': True, }, { 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534', @@ -2466,7 +2466,7 @@ class YoutubeTruncatedURLIE(InfoExtractor): 'Did you forget to quote the URL? Remember that & is a meta ' 'character in most shells, so you want to put the URL in quotes, ' 'like youtube-dl ' - '"http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' + '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' ' or simply youtube-dl BaW_jenozKc .', expected=True) From f3625cc4ca8d8683b900e070ad7acd58b1fac5c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 18:08:35 +0700 Subject: [PATCH 1749/3599] [PULL_REQUEST_TEMPLATE.md] Add Unlicense notice --- .github/PULL_REQUEST_TEMPLATE.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index f24bb4b09..3a168b7b1 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -10,6 +10,10 @@ - [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections - [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests +### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: +- [ ] I am the original original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) +- [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) + ### What is the purpose of your *pull request*? - [ ] Bug fix - [ ] New extractor From e590b7ff9e8e408bb9ec4da58ab6847686d29dbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 18:09:46 +0700 Subject: [PATCH 1750/3599] [PULL_REQUEST_TEMPLATE.md] Add checkable Improvement options PR's purpose --- .github/PULL_REQUEST_TEMPLATE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3a168b7b1..89e8a3188 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -16,6 +16,7 @@ ### What is the purpose of your *pull request*? - [ ] Bug fix +- [ ] Improvement - [ ] New extractor - [ ] New feature From a3d8b3816802c76beffa48789eac5181e02db3dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 25 Sep 2016 21:58:17 +0700 Subject: [PATCH 1751/3599] [npo] Generalize playlist extractors --- youtube_dl/extractor/npo.py | 63 +++++++++++++++---------------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 3293bdb17..f95867d58 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -438,9 +438,29 @@ class SchoolTVIE(InfoExtractor): } -class VPROIE(NPOIE): +class NPOPlaylistBaseIE(NPOIE): + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) + for video_id in re.findall(self._PLAYLIST_ENTRY_RE, webpage) + ] + + playlist_title = self._html_search_regex( + self._PLAYLIST_TITLE_RE, webpage, 'playlist title', + default=None) or self._og_search_title(webpage) + + return self.playlist_result(entries, playlist_id, playlist_title) + + +class VPROIE(NPOPlaylistBaseIE): IE_NAME = 'vpro' _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html' + _PLAYLIST_TITLE_RE = r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*' + _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"' _TESTS = [ { @@ -473,48 +493,17 @@ class VPROIE(NPOIE): } ] - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - - entries = [ - self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) - for video_id in re.findall(r'data-media-id="([^"]+)"', webpage) - ] - - playlist_title = self._search_regex( - r'\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*', - webpage, 'playlist title', default=None) or self._og_search_title(webpage) - - return self.playlist_result(entries, playlist_id, playlist_title) - - -class WNLIE(InfoExtractor): +class WNLIE(NPOPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P[^/]+)__\d+' + _PLAYLIST_TITLE_RE = r'(?s)]+class="subject"[^>]*>(.+?)

' + _PLAYLIST_ENTRY_RE = r']+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+' - _TEST = { + _TESTS = [{ 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515', 'info_dict': { 'id': 'vandaag-de-dag-6-mei', 'title': 'Vandaag de Dag 6 mei', }, 'playlist_count': 4, - } - - def _real_extract(self, url): - playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) - - entries = [ - self.url_result('npo:%s' % video_id, 'NPO') - for video_id, part in re.findall( - r']+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage) - ] - - playlist_title = self._html_search_regex( - r'(?s)]+class="subject"[^>]*>(.+?)', - webpage, 'playlist title') - - return self.playlist_result(entries, playlist_id, playlist_title) + }] From ddb19772d572ae2118664a22d083a8f31fc63d8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Sep 2016 22:03:18 +0700 Subject: [PATCH 1752/3599] [vpro] Fix playlist title extraction and update tests --- youtube_dl/extractor/npo.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index f95867d58..ff02d0309 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -459,7 +459,7 @@ class NPOPlaylistBaseIE(NPOIE): class VPROIE(NPOPlaylistBaseIE): IE_NAME = 'vpro' _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P[^/]+)\.html' - _PLAYLIST_TITLE_RE = r'\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*' + _PLAYLIST_TITLE_RE = r']+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)' _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"' _TESTS = [ @@ -473,12 +473,13 @@ class VPROIE(NPOPlaylistBaseIE): 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', 'upload_date': '20130225', }, + 'skip': 'Video gone', }, { 'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html', 'info_dict': { 'id': 'sergio-herman', - 'title': 'Sergio Herman: Fucking perfect', + 'title': 'sergio herman: fucking perfect', }, 'playlist_count': 2, }, @@ -487,7 +488,7 @@ class VPROIE(NPOPlaylistBaseIE): 'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html', 'info_dict': { 'id': 'education-education', - 'title': '2Doc', + 'title': 'education education', }, 'playlist_count': 2, } From 5742c18bc1ea3da5b0fd480e75fcdf099220e52f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Sep 2016 22:19:00 +0700 Subject: [PATCH 1753/3599] [npo] Add support for anderetijden.nl (Closes #10754) --- youtube_dl/extractor/npo.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index ff02d0309..66035a77c 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( fix_xml_ampersands, + orderedSet, parse_duration, qualities, strip_jsonp, @@ -446,7 +447,7 @@ class NPOPlaylistBaseIE(NPOIE): entries = [ self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) - for video_id in re.findall(self._PLAYLIST_ENTRY_RE, webpage) + for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage)) ] playlist_title = self._html_search_regex( @@ -508,3 +509,18 @@ class WNLIE(NPOPlaylistBaseIE): }, 'playlist_count': 4, }] + + +class AndereTijdenIE(NPOPlaylistBaseIE): + _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P[^/?#&]+)' + _PLAYLIST_TITLE_RE = r'(?s)]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)' + _PLAYLIST_ENTRY_RE = r']+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']' + + _TESTS = [{ + 'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem', + 'info_dict': { + 'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem', + 'title': 'Duitse soldaten over de Slag bij Arnhem', + }, + 'playlist_count': 3, + }] From f1ee462c82381d3c68673500c0491fe477030c3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Sep 2016 22:38:36 +0700 Subject: [PATCH 1754/3599] [PULL_REQUEST_TEMPLATE.md] Fix typo --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 89e8a3188..46fa26f02 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -11,7 +11,7 @@ - [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests ### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: -- [ ] I am the original original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) +- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) - [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) ### What is the purpose of your *pull request*? From 2d5b4af0070f8aa6f3f4eb8fdabef5d006f6429a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 25 Sep 2016 23:30:57 +0700 Subject: [PATCH 1755/3599] [extractors] Add import for anderetijden extractor --- youtube_dl/extractor/extractors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index bf1f70885..23fd2a308 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -612,13 +612,14 @@ from .nowtv import ( ) from .noz import NozIE from .npo import ( + AndereTijdenIE, NPOIE, NPOLiveIE, NPORadioIE, NPORadioFragmentIE, SchoolTVIE, VPROIE, - WNLIE + WNLIE, ) from .npr import NprIE from .nrk import ( From d3c97bad6181e1d3dc0cb4eece041e1cfb0ba6bc Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 26 Sep 2016 14:14:37 +0800 Subject: [PATCH 1756/3599] Ignore and cleanup 3gp files --- .gitignore | 1 + Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a802c75a1..002b700f5 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ updates_key.pem *.m4a *.m4v *.mp3 +*.3gp *.part *.swp test/testdata diff --git a/Makefile b/Makefile index ac234fcb0..a2763a664 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete From fffb9cff944cfab11f311900ee8138f28f7232d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Sep 2016 22:15:58 +0700 Subject: [PATCH 1757/3599] [kaltura] Speed up embed regexes (#10764) --- youtube_dl/extractor/kaltura.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 5a8403777..91bc3a0a7 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -105,20 +105,20 @@ class KalturaIE(InfoExtractor): kWidget\.(?:thumb)?[Ee]mbed\( \{.*? (?P['\"])wid(?P=q1)\s*:\s* - (?P['\"])_?(?P[^'\"]+)(?P=q2),.*? + (?P['\"])_?(?P(?:(?!(?P=q2)).)+)(?P=q2),.*? (?P['\"])entry_?[Ii]d(?P=q3)\s*:\s* - (?P['\"])(?P[^'\"]+)(?P=q4), + (?P['\"])(?P(?:(?!(?P=q4)).)+)(?P=q4), """, webpage) or re.search( r'''(?xs) (?P["\']) - (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P\d+).*? + (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/(?:(?!(?P=q1)).)*(?:p|partner_id)/(?P\d+)(?:(?!(?P=q1)).)* (?P=q1).*? (?: entry_?[Ii]d| (?P["\'])entry_?[Ii]d(?P=q2) )\s*:\s* - (?P["\'])(?P.+?)(?P=q3) + (?P["\'])(?P(?:(?!(?P=q3)).)+)(?P=q3) ''', webpage)) if mobj: embed_info = mobj.groupdict() From d3dbb46330461c0c70c3aae47b69d27882cfc325 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20B=C3=A1rta?= Date: Sun, 25 Sep 2016 12:44:46 +0200 Subject: [PATCH 1758/3599] [promptfile] Fix extraction (Closes #10634) --- youtube_dl/extractor/promptfile.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index f93bd19ff..54c4aee13 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -15,12 +15,12 @@ from ..utils import ( class PromptFileIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P[0-9A-Z\-]+)' _TEST = { - 'url': 'http://www.promptfile.com/l/D21B4746E9-F01462F0FF', - 'md5': 'd1451b6302da7215485837aaea882c4c', + 'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416', + 'md5': '5a7e285a26e0d66d9a263fae91bc92ce', 'info_dict': { - 'id': 'D21B4746E9-F01462F0FF', + 'id': '86D1CE8462-576CAAE416', 'ext': 'mp4', - 'title': 'Birds.mp4', + 'title': 'oceans.mp4', 'thumbnail': 're:^https?://.*\.jpg$', } } @@ -33,14 +33,20 @@ class PromptFileIE(InfoExtractor): raise ExtractorError('Video %s does not exist' % video_id, expected=True) + chash_pattern = r'\$\("#chash"\)\.val\("(.+)"\+\$\("#chash"\)' + chash = self._html_search_regex(chash_pattern, webpage, "chash") fields = self._hidden_inputs(webpage) + k = list(fields)[0] + fields[k] = chash + fields[k] + post = urlencode_postdata(fields) req = sanitized_Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( req, video_id, 'Downloading video page') - url = self._html_search_regex(r'url:\s*\'([^\']+)\'', webpage, 'URL') + url_pattern = r'', webpage, 'title') thumbnail = self._html_search_regex( From 72c3d02d294b04b35a19417b31ad497e7540caa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 26 Sep 2016 23:39:54 +0700 Subject: [PATCH 1759/3599] [promptfile] Improve and modernize --- youtube_dl/extractor/promptfile.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index 54c4aee13..d40cca06f 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -7,7 +7,6 @@ from .common import InfoExtractor from ..utils import ( determine_ext, ExtractorError, - sanitized_Request, urlencode_postdata, ) @@ -33,20 +32,23 @@ class PromptFileIE(InfoExtractor): raise ExtractorError('Video %s does not exist' % video_id, expected=True) - chash_pattern = r'\$\("#chash"\)\.val\("(.+)"\+\$\("#chash"\)' - chash = self._html_search_regex(chash_pattern, webpage, "chash") + chash = self._search_regex( + r'val\("([^"]*)"\s*\+\s*\$\("#chash"\)', webpage, 'chash') fields = self._hidden_inputs(webpage) - k = list(fields)[0] - fields[k] = chash + fields[k] + keys = list(fields.keys()) + chash_key = keys[0] if len(keys) == 1 else next( + key for key in keys if key.startswith('cha')) + fields[chash_key] = chash + fields[chash_key] - post = urlencode_postdata(fields) - req = sanitized_Request(url, post) - req.add_header('Content-type', 'application/x-www-form-urlencoded') webpage = self._download_webpage( - req, video_id, 'Downloading video page') + url, video_id, 'Downloading video page', + data=urlencode_postdata(fields), + headers={'Content-type': 'application/x-www-form-urlencoded'}) - url_pattern = r']+href=(["\'])(?P(?:(?!\1).)+)\1[^>]*>\s*Download File', + r']+href=(["\'])(?Phttps?://(?:www\.)?promptfile\.com/file/(?:(?!\1).)+)\1'), + webpage, 'video url', group='url') title = self._html_search_regex( r'', webpage, 'title') thumbnail = self._html_search_regex( @@ -55,7 +57,7 @@ class PromptFileIE(InfoExtractor): formats = [{ 'format_id': 'sd', - 'url': url, + 'url': video_url, 'ext': determine_ext(title), }] self._sort_formats(formats) From d75d9e343e91527c1fe34678e913ae16a0eafbdd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 27 Sep 2016 14:38:41 +0800 Subject: [PATCH 1760/3599] [einthusan] Fix extraction (closes #10714) --- ChangeLog | 1 + youtube_dl/extractor/einthusan.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5c96dc179..fdebb89b9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core + Improved support for HTML5 subtitles Extractors +* [einthusan] Fix extraction (#10714) + [twitter] Support Periscope embeds (#10737) + [openload] Support subtitles (#10625) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index f7339702c..443865ad2 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -14,7 +14,7 @@ class EinthusanIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.einthusan.com/movies/watch.php?id=2447', - 'md5': 'af244f4458cd667205e513d75da5b8b1', + 'md5': 'd71379996ff5b7f217eca034c34e3461', 'info_dict': { 'id': '2447', 'ext': 'mp4', @@ -25,13 +25,13 @@ class EinthusanIE(InfoExtractor): }, { 'url': 'http://www.einthusan.com/movies/watch.php?id=1671', - 'md5': 'ef63c7a803e22315880ed182c10d1c5c', + 'md5': 'b16a6fd3c67c06eb7c79c8a8615f4213', 'info_dict': { 'id': '1671', 'ext': 'mp4', 'title': 'Soodhu Kavvuum', 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'md5:05d8a0c0281a4240d86d76e14f2f4d51', + 'description': 'md5:b40f2bf7320b4f9414f3780817b2af8c', } }, ] @@ -50,9 +50,11 @@ class EinthusanIE(InfoExtractor): video_id = self._search_regex( r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id) - video_url = self._download_webpage( + m3u8_url = self._download_webpage( 'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/' - % video_id, video_id) + % video_id, video_id, headers={'Referer': url}) + formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native') description = self._html_search_meta('description', webpage) thumbnail = self._html_search_regex( @@ -64,7 +66,7 @@ class EinthusanIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'url': video_url, + 'formats': formats, 'thumbnail': thumbnail, 'description': description, } From 93933c9819fa1282081a5f0761cbeabc9fbea336 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 27 Sep 2016 15:28:37 +0100 Subject: [PATCH 1761/3599] [awaan:video] fix test(closes #10773) --- youtube_dl/extractor/awaan.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/awaan.py b/youtube_dl/extractor/awaan.py index 66d7515bc..a2603bbff 100644 --- a/youtube_dl/extractor/awaan.py +++ b/youtube_dl/extractor/awaan.py @@ -66,6 +66,7 @@ class AWAANVideoIE(AWAANBaseIE): 'duration': 2041, 'timestamp': 1227504126, 'upload_date': '20081124', + 'uploader_id': '71', }, }, { 'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', From 2342733f850c979c6f23ea2e83dfcb176fb08fa5 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 27 Sep 2016 15:29:50 +0100 Subject: [PATCH 1762/3599] fix tests related to 1978540a5122c53012e17a78841f3da0df77fd34(closes #10774) --- youtube_dl/extractor/formula1.py | 6 +++++- youtube_dl/extractor/voxmedia.py | 10 ++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/formula1.py b/youtube_dl/extractor/formula1.py index 8c417ab65..fecfc28ae 100644 --- a/youtube_dl/extractor/formula1.py +++ b/youtube_dl/extractor/formula1.py @@ -11,9 +11,13 @@ class Formula1IE(InfoExtractor): 'md5': '8c79e54be72078b26b89e0e111c0502b', 'info_dict': { 'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Race highlights - Spain 2016', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'add_ie': ['Ooyala'], }, { 'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html', diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py index b1b32ad44..f8e331493 100644 --- a/youtube_dl/extractor/voxmedia.py +++ b/youtube_dl/extractor/voxmedia.py @@ -9,13 +9,16 @@ class VoxMediaIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com/(?:[^/]+/)*(?P[^/?]+)' _TESTS = [{ 'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of', - 'md5': '73856edf3e89a711e70d5cf7cb280b37', 'info_dict': { 'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe', 'ext': 'mp4', 'title': 'Google\'s new material design direction', 'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'add_ie': ['Ooyala'], }, { # data-ooyala-id @@ -31,13 +34,16 @@ class VoxMediaIE(InfoExtractor): }, { # volume embed 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', - 'md5': '375c483c5080ab8cd85c9c84cfc2d1e4', 'info_dict': { 'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b', 'ext': 'mp4', 'title': 'The new frontier of LGBTQ civil rights, explained', 'description': 'md5:0dc58e94a465cbe91d02950f770eb93f', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'add_ie': ['Ooyala'], }, { # youtube embed From f9dd86a112835e04e271e8d1d844f250e6ff0c5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 21:37:33 +0700 Subject: [PATCH 1763/3599] [npo] Clarify IE_NAMEs (Closes #10775) --- youtube_dl/extractor/npo.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 66035a77c..9c7cc777b 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -497,6 +497,7 @@ class VPROIE(NPOPlaylistBaseIE): class WNLIE(NPOPlaylistBaseIE): + IE_NAME = 'wnl' _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P[^/]+)__\d+' _PLAYLIST_TITLE_RE = r'(?s)]+class="subject"[^>]*>(.+?)' _PLAYLIST_ENTRY_RE = r']+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+' @@ -512,6 +513,7 @@ class WNLIE(NPOPlaylistBaseIE): class AndereTijdenIE(NPOPlaylistBaseIE): + IE_NAME = 'anderetijden' _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P[^/?#&]+)' _PLAYLIST_TITLE_RE = r'(?s)]+class=["\'].*?\bpage-title\b.*?["\'][^>]*>(.+?)' _PLAYLIST_ENTRY_RE = r']+class=["\']episode-container episode-page["\'][^>]+data-prid=["\'](.+?)["\']' From 1a2fbe322ee2d711b474f32a7d3f331791fb1881 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 21:55:51 +0700 Subject: [PATCH 1764/3599] [periscope] Treat timed_out state as finished stream --- youtube_dl/extractor/periscope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index e8b2f11c6..61043cad5 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -87,7 +87,7 @@ class PeriscopeIE(PeriscopeBaseIE): 'ext': 'flv' if format_id == 'rtmp' else 'mp4', } if format_id != 'rtmp': - f['protocol'] = 'm3u8_native' if state == 'ended' else 'm3u8' + f['protocol'] = 'm3u8_native' if state in ('ended', 'timed_out') else 'm3u8' formats.append(f) self._sort_formats(formats) From e3845525906228091fdf446f2cf2e9a20e93f59f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Sun, 25 Sep 2016 05:39:29 +0200 Subject: [PATCH 1765/3599] [vk] Add support for dailymotion embeds Fixes #10661 --- youtube_dl/extractor/vk.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index cd22df25a..f26e0732c 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -23,8 +23,9 @@ from ..utils import ( unified_strdate, urlencode_postdata, ) -from .vimeo import VimeoIE +from .dailymotion import DailymotionIE from .pladform import PladformIE +from .vimeo import VimeoIE class VKBaseIE(InfoExtractor): @@ -210,6 +211,23 @@ class VKIE(VKBaseIE): 'view_count': int, }, }, + { + # dailymotion embed + 'url': 'https://vk.com/video-37468416_456239855', + 'info_dict': { + 'id': 'k3lz2cmXyRuJQSjGHUv', + 'ext': 'mp4', + 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f', + 'description': 'md5:c651358f03c56f1150b555c26d90a0fd', + 'uploader': 'AniLibria.Tv', + 'upload_date': '20160914', + 'uploader_id': 'x1p5vl5', + 'timestamp': 1473877246, + }, + 'params': { + 'skip_download': True, + } + }, { # video key is extra_data not url\d+ 'url': 'http://vk.com/video-110305615_171782105', @@ -315,6 +333,10 @@ class VKIE(VKBaseIE): m_rutube.group(1).replace('\\', '')) return self.url_result(rutube_url) + dailymotion_urls = DailymotionIE._extract_urls(info_page) + if dailymotion_urls: + return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key()) + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page) if m_opts: m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1)) From cdfcc4ce95e351c3f560fa3c07ae7d4ab188ef25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 22:27:10 +0700 Subject: [PATCH 1766/3599] [mtv] Improve _VALID_URL --- youtube_dl/extractor/mtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 2e9580b10..74a3a035e 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -271,7 +271,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): class MTVIE(MTVServicesInfoExtractor): IE_NAME = 'mtv' - _VALID_URL = r'https?://(?:www\.)?mtv\.com/(video-clips|full-episodes)/(?P[^/?#.]+)' + _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|full-episodes)/(?P[^/?#.]+)' _FEED_URL = 'http://www.mtv.com/feeds/mrss/' _TESTS = [{ From dd2cffeeec8feac8fe52924760b2cb368249396a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 22:43:35 +0700 Subject: [PATCH 1767/3599] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ChangeLog b/ChangeLog index fdebb89b9..33c94ef55 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,24 @@ vesion Core ++ Add hdcore query parameter to akamai f4m formats ++ Delegate HLS live streams downloading to ffmpeg + Improved support for HTML5 subtitles Extractors ++ [vk] Add support for dailymotion embeds (#10661) +* [einthusan] Fix extraction (#10714) +* [promptfile] Fix extraction (#10634) +* [kaltura] Speed up embed regular expressions (#10764) ++ [npo] Add support for anderetijden.nl (#10754) ++ [prosiebensat1] Add support for advopedia sites +* [mwave] Relax URL regular expression (#10735, #10748) +* [prosiebensat1] Fix playlist support (#10745) ++ [prosiebensat1] Add support for sat1gold sites (#10745) ++ [cbsnews:livevideo] Fix extraction and extract m3u8 formats ++ [brightcove:new] Add support for live streams +* [soundcloud] Generalize playlist entries extraction (#10733) ++ [mtv] Add support for new URL schema (#8169, #9808) * [einthusan] Fix extraction (#10714) + [twitter] Support Periscope embeds (#10737) + [openload] Support subtitles (#10625) From c8f45f763cac3c0d0e4ca35ba072d8d321957e85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 23:03:00 +0700 Subject: [PATCH 1768/3599] [ChangeLog] Remove duplicate --- ChangeLog | 1 - 1 file changed, 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 33c94ef55..0d15b6a82 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,7 +7,6 @@ Core Extractors + [vk] Add support for dailymotion embeds (#10661) -* [einthusan] Fix extraction (#10714) * [promptfile] Fix extraction (#10634) * [kaltura] Speed up embed regular expressions (#10764) + [npo] Add support for anderetijden.nl (#10754) From 8f0cf20ab987019c3ba66c375450f80bb1cfe281 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 27 Sep 2016 23:09:46 +0700 Subject: [PATCH 1769/3599] release 2016.09.27 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 10 ++++++---- youtube_dl/version.py | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 7669ab9b7..273eb8c0b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.24*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.24** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.24 +[debug] youtube-dl version 2016.09.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 0d15b6a82..f8149cc30 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -vesion +vesion 2016.09.27 Core + Add hdcore query parameter to akamai f4m formats diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 95a137393..26f275577 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -40,6 +40,7 @@ - **Allocine** - **AlphaPorno** - **AMCNetworks** + - **anderetijden**: npo.nl and ntr.nl - **AnimeOnDemand** - **anitube.se** - **AnySex** @@ -127,8 +128,8 @@ - **CBS** - **CBSInteractive** - **CBSLocal** - - **CBSNews**: CBS News - - **CBSNewsLiveVideo**: CBS News Live Videos + - **cbsnews**: CBS News + - **cbsnews:livevideo**: CBS News Live Videos - **CBSSports** - **CCTV** - **CDA** @@ -424,8 +425,9 @@ - **MPORA** - **MSN** - **mtg**: MTG services - - **MTV** + - **mtv** - **mtv.de** + - **mtv:video** - **mtvservices:embedded** - **MuenchenTV**: münchen.tv - **MusicPlayOn** @@ -865,7 +867,7 @@ - **wholecloud**: WholeCloud - **Wimp** - **Wistia** - - **WNL** + - **wnl**: npo.nl and ntr.nl - **WorldStarHipHop** - **wrzuta.pl** - **wrzuta.pl:playlist** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2af6380b8..af0c2cfc4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.24' +__version__ = '2016.09.27' From 8bfda726c20198b7e68a805967917ef1a79e9b91 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 28 Sep 2016 16:34:27 +0100 Subject: [PATCH 1770/3599] [limelight:media] improve http formats extraction --- youtube_dl/extractor/limelight.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py index 6752ffee2..b7bfa7a6d 100644 --- a/youtube_dl/extractor/limelight.py +++ b/youtube_dl/extractor/limelight.py @@ -59,7 +59,7 @@ class LimelightBaseIE(InfoExtractor): format_id = 'rtmp' if stream.get('videoBitRate'): format_id += '-%d' % int_or_none(stream['videoBitRate']) - http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]) + http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:] urls.append(http_url) http_fmt = fmt.copy() http_fmt.update({ From f533490bb7b2d25b9c6fe7ccd381ebe2bef7d4f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 28 Sep 2016 22:58:25 +0700 Subject: [PATCH 1771/3599] [ketnet] Extract mzsource formats (#10770) --- youtube_dl/extractor/ketnet.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py index aaf3f807a..eb0a16008 100644 --- a/youtube_dl/extractor/ketnet.py +++ b/youtube_dl/extractor/ketnet.py @@ -21,6 +21,10 @@ class KetnetIE(InfoExtractor): }, { 'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life', 'only_matching': True, + }, { + # mzsource, geo restricted to Belgium + 'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe', + 'only_matching': True, }] def _real_extract(self, url): @@ -36,9 +40,25 @@ class KetnetIE(InfoExtractor): title = config['title'] - formats = self._extract_m3u8_formats( - config['source']['hls'], video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') + formats = [] + for source_key in ('', 'mz'): + source = config.get('%ssource' % source_key) + if not isinstance(source, dict): + continue + for format_id, format_url in source.items(): + if format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id=format_id, + fatal=False)) + elif format_id == 'hds': + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_id, fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) self._sort_formats(formats) return { From a56e74e2713ed45f4096735cf49d1d97b5e75389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Wed, 28 Sep 2016 16:54:06 +0200 Subject: [PATCH 1772/3599] [Instagram] Extract comments --- youtube_dl/extractor/instagram.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 8f7f232be..5ebc30a10 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -29,6 +29,7 @@ class InstagramIE(InfoExtractor): 'uploader': 'Naomi Leonor Phan-Quang', 'like_count': int, 'comment_count': int, + 'comments': list, }, }, { # missing description @@ -44,6 +45,7 @@ class InstagramIE(InfoExtractor): 'uploader': 'Britney Spears', 'like_count': int, 'comment_count': int, + 'comments': list, }, 'params': { 'skip_download': True, @@ -101,6 +103,14 @@ class InstagramIE(InfoExtractor): uploader_id = media.get('owner', {}).get('username') like_count = int_or_none(media.get('likes', {}).get('count')) comment_count = int_or_none(media.get('comments', {}).get('count')) + comments = [{ + 'author': comment.get('user', {}).get('username'), + 'author_id': comment.get('user', {}).get('id'), + 'id': comment.get('id'), + 'text': comment.get('text'), + 'timestamp': int_or_none(comment.get('created_at')), + } for comment in media.get('comments', {}).get('nodes', []) + if comment.get('text')] if not video_url: video_url = self._og_search_video_url(webpage, secure=False) @@ -131,6 +141,7 @@ class InstagramIE(InfoExtractor): 'uploader': uploader, 'like_count': like_count, 'comment_count': comment_count, + 'comments': comments, } From 0d72ff9c51ecc84aae1717c05f8b73ad94199687 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 29 Sep 2016 21:39:35 +0800 Subject: [PATCH 1773/3599] [leeco] Recognize more Le Sports URLs (#10794) --- ChangeLog | 8 +++++++- youtube_dl/extractor/leeco.py | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index f8149cc30..70da55c90 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,10 @@ -vesion 2016.09.27 +version + +Extractors ++ [leeco] Recognize more Le Sports URLs (#10794) + + +version 2016.09.27 Core + Add hdcore query parameter to akamai f4m formats diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index e9cc9aa59..c48a5aad1 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -29,7 +29,7 @@ from ..utils import ( class LeIE(InfoExtractor): IE_DESC = '乐视网' - _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|sports\.le\.com/video)/(?P\d+)\.html' + _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P\d+)\.html' _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html' @@ -73,6 +73,12 @@ class LeIE(InfoExtractor): }, { 'url': 'http://sports.le.com/video/25737697.html', 'only_matching': True, + }, { + 'url': 'http://www.lesports.com/match/1023203003.html', + 'only_matching': True, + }, { + 'url': 'http://sports.le.com/match/1023203003.html', + 'only_matching': True, }] # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf From 93aa0b631878b62f756c83e1069a14cd2d8775f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:04:10 +0700 Subject: [PATCH 1774/3599] [vk] Add support for finished live streams (#10799) --- youtube_dl/extractor/vk.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index f26e0732c..1d089c9d7 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -226,7 +226,7 @@ class VKIE(VKBaseIE): }, 'params': { 'skip_download': True, - } + }, }, { # video key is extra_data not url\d+ @@ -241,6 +241,18 @@ class VKIE(VKBaseIE): 'view_count': int, }, }, + { + # finished live stream, live_mp4 + 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2', + 'md5': '90d22d051fccbbe9becfccc615be6791', + 'info_dict': { + 'id': '456242764', + 'ext': 'mp4', + 'title': 'ИгроМир 2016 — день 1', + 'uploader': 'Игромания', + 'duration': 5239, + }, + }, { # removed video, just testing that we match the pattern 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', @@ -366,7 +378,10 @@ class VKIE(VKBaseIE): formats = [] for k, v in data.items(): - if not k.startswith('url') and not k.startswith('cache') and k != 'extra_data' or not v: + if (not k.startswith('url') and not k.startswith('cache') + and k not in ('extra_data', 'live_mp4')): + continue + if not isinstance(v, compat_str) or not v.startswith('http'): continue height = int_or_none(self._search_regex( r'^(?:url|cache)(\d+)', k, 'height', default=None)) From 475f8a458099c64d367356471069bd0ff2bd1b0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:21:39 +0700 Subject: [PATCH 1775/3599] [vk] Add support for running live streams (Closes #10799) --- youtube_dl/extractor/vk.py | 47 ++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 1d089c9d7..9f7a593ef 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -253,6 +253,12 @@ class VKIE(VKBaseIE): 'duration': 5239, }, }, + { + # live stream, hls and rtmp links,most likely already finished live + # stream by the time you are reading this comment + 'url': 'https://vk.com/video-140332_456239111', + 'only_matching': True, + }, { # removed video, just testing that we match the pattern 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', @@ -361,6 +367,11 @@ class VKIE(VKBaseIE): data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars') data = json.loads(data_json) + title = unescapeHTML(data['md_title']) + + if data.get('live') == 2: + title = self._live_title(title) + # Extract upload date upload_date = None mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page) @@ -377,25 +388,33 @@ class VKIE(VKBaseIE): r'([\d,.]+)', views, 'view count', fatal=False)) formats = [] - for k, v in data.items(): - if (not k.startswith('url') and not k.startswith('cache') - and k not in ('extra_data', 'live_mp4')): + for format_id, format_url in data.items(): + if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')): continue - if not isinstance(v, compat_str) or not v.startswith('http'): - continue - height = int_or_none(self._search_regex( - r'^(?:url|cache)(\d+)', k, 'height', default=None)) - formats.append({ - 'format_id': k, - 'url': v, - 'height': height, - }) + if format_id.startswith(('url', 'cache')) or format_id in ('extra_data', 'live_mp4'): + height = int_or_none(self._search_regex( + r'^(?:url|cache)(\d+)', format_id, 'height', default=None)) + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'height': height, + }) + elif format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id, + fatal=False, live=True)) + elif format_id == 'rtmp': + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'ext': 'flv', + }) self._sort_formats(formats) return { - 'id': compat_str(data['vid']), + 'id': compat_str(data.get('vid') or video_id), 'formats': formats, - 'title': unescapeHTML(data['md_title']), + 'title': title, 'thumbnail': data.get('jpg'), 'uploader': data.get('md_author'), 'duration': data.get('duration'), From efa97bdcf1f1e90d1b51a09324d7869dcd70729b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 30 Sep 2016 00:28:32 +0800 Subject: [PATCH 1776/3599] Move write_xattr to utils.py There are some other places that use xattr functions. It's better to move it to a common place so that others can use it. --- youtube_dl/postprocessor/xattrpp.py | 114 ++-------------------------- youtube_dl/utils.py | 99 ++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 107 deletions(-) diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py index e39ca60aa..fbdfa02ac 100644 --- a/youtube_dl/postprocessor/xattrpp.py +++ b/youtube_dl/postprocessor/xattrpp.py @@ -1,37 +1,15 @@ from __future__ import unicode_literals -import os -import subprocess -import sys -import errno - from .common import PostProcessor from ..compat import compat_os_name from ..utils import ( - check_executable, hyphenate_date, - version_tuple, - PostProcessingError, - encodeArgument, - encodeFilename, + write_xattr, + XAttrMetadataError, + XAttrUnavailableError, ) -class XAttrMetadataError(PostProcessingError): - def __init__(self, code=None, msg='Unknown error'): - super(XAttrMetadataError, self).__init__(msg) - self.code = code - - # Parsing code and msg - if (self.code in (errno.ENOSPC, errno.EDQUOT) or - 'No space left' in self.msg or 'Disk quota excedded' in self.msg): - self.reason = 'NO_SPACE' - elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: - self.reason = 'VALUE_TOO_LONG' - else: - self.reason = 'NOT_SUPPORTED' - - class XAttrMetadataPP(PostProcessor): # @@ -48,88 +26,6 @@ class XAttrMetadataPP(PostProcessor): def run(self, info): """ Set extended attributes on downloaded file (if xattr support is found). """ - # This mess below finds the best xattr tool for the job and creates a - # "write_xattr" function. - try: - # try the pyxattr module... - import xattr - - # Unicode arguments are not supported in python-pyxattr until - # version 0.5.0 - # See https://github.com/rg3/youtube-dl/issues/5498 - pyxattr_required_version = '0.5.0' - if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): - self._downloader.report_warning( - 'python-pyxattr is detected but is too old. ' - 'youtube-dl requires %s or above while your version is %s. ' - 'Falling back to other xattr implementations' % ( - pyxattr_required_version, xattr.__version__)) - - raise ImportError - - def write_xattr(path, key, value): - try: - xattr.set(path, key, value) - except EnvironmentError as e: - raise XAttrMetadataError(e.errno, e.strerror) - - except ImportError: - if compat_os_name == 'nt': - # Write xattrs to NTFS Alternate Data Streams: - # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 - def write_xattr(path, key, value): - assert ':' not in key - assert os.path.exists(path) - - ads_fn = path + ':' + key - try: - with open(ads_fn, 'wb') as f: - f.write(value) - except EnvironmentError as e: - raise XAttrMetadataError(e.errno, e.strerror) - else: - user_has_setfattr = check_executable('setfattr', ['--version']) - user_has_xattr = check_executable('xattr', ['-h']) - - if user_has_setfattr or user_has_xattr: - - def write_xattr(path, key, value): - value = value.decode('utf-8') - if user_has_setfattr: - executable = 'setfattr' - opts = ['-n', key, '-v', value] - elif user_has_xattr: - executable = 'xattr' - opts = ['-w', key, value] - - cmd = ([encodeFilename(executable, True)] + - [encodeArgument(o) for o in opts] + - [encodeFilename(path, True)]) - - try: - p = subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) - except EnvironmentError as e: - raise XAttrMetadataError(e.errno, e.strerror) - stdout, stderr = p.communicate() - stderr = stderr.decode('utf-8', 'replace') - if p.returncode != 0: - raise XAttrMetadataError(p.returncode, stderr) - - else: - # On Unix, and can't find pyxattr, setfattr, or xattr. - if sys.platform.startswith('linux'): - self._downloader.report_error( - "Couldn't find a tool to set the xattrs. " - "Install either the python 'pyxattr' or 'xattr' " - "modules, or the GNU 'attr' package " - "(which contains the 'setfattr' tool).") - else: - self._downloader.report_error( - "Couldn't find a tool to set the xattrs. " - "Install either the python 'xattr' module, " - "or the 'xattr' binary.") - # Write the metadata to the file's xattrs self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs') @@ -159,6 +55,10 @@ class XAttrMetadataPP(PostProcessor): return [], info + except XAttrUnavailableError as e: + self._downloader.report_error(str(e)) + return [], info + except XAttrMetadataError as e: if e.reason == 'NO_SPACE': self._downloader.report_warning( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 69ca88c85..fcbfa0d76 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -42,6 +42,7 @@ from .compat import ( compat_html_entities_html5, compat_http_client, compat_kwargs, + compat_os_name, compat_parse_qs, compat_shlex_quote, compat_socket_create_connection, @@ -775,6 +776,25 @@ class ContentTooShortError(Exception): self.expected = expected +class XAttrMetadataError(Exception): + def __init__(self, code=None, msg='Unknown error'): + super(XAttrMetadataError, self).__init__(msg) + self.code = code + + # Parsing code and msg + if (self.code in (errno.ENOSPC, errno.EDQUOT) or + 'No space left' in self.msg or 'Disk quota excedded' in self.msg): + self.reason = 'NO_SPACE' + elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: + self.reason = 'VALUE_TOO_LONG' + else: + self.reason = 'NOT_SUPPORTED' + + +class XAttrUnavailableError(Exception): + pass + + def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting # expected HTTP responses to meet HTTP/1.0 or later (see also @@ -3131,3 +3151,82 @@ def decode_png(png_data): current_row.append(color) return width, height, pixels + + +def write_xattr(path, key, value): + # This mess below finds the best xattr tool for the job + try: + # try the pyxattr module... + import xattr + + # Unicode arguments are not supported in python-pyxattr until + # version 0.5.0 + # See https://github.com/rg3/youtube-dl/issues/5498 + pyxattr_required_version = '0.5.0' + if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): + # TODO: fallback to CLI tools + raise XAttrUnavailableError( + 'python-pyxattr is detected but is too old. ' + 'youtube-dl requires %s or above while your version is %s. ' + 'Falling back to other xattr implementations' % ( + pyxattr_required_version, xattr.__version__)) + + try: + xattr.set(path, key, value) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) + + except ImportError: + if compat_os_name == 'nt': + # Write xattrs to NTFS Alternate Data Streams: + # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 + assert ':' not in key + assert os.path.exists(path) + + ads_fn = path + ':' + key + try: + with open(ads_fn, 'wb') as f: + f.write(value) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) + else: + user_has_setfattr = check_executable('setfattr', ['--version']) + user_has_xattr = check_executable('xattr', ['-h']) + + if user_has_setfattr or user_has_xattr: + + value = value.decode('utf-8') + if user_has_setfattr: + executable = 'setfattr' + opts = ['-n', key, '-v', value] + elif user_has_xattr: + executable = 'xattr' + opts = ['-w', key, value] + + cmd = ([encodeFilename(executable, True)] + + [encodeArgument(o) for o in opts] + + [encodeFilename(path, True)]) + + try: + p = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) + stdout, stderr = p.communicate() + stderr = stderr.decode('utf-8', 'replace') + if p.returncode != 0: + raise XAttrMetadataError(p.returncode, stderr) + + else: + # On Unix, and can't find pyxattr, setfattr, or xattr. + if sys.platform.startswith('linux'): + raise XAttrUnavailableError( + "Couldn't find a tool to set the xattrs. " + "Install either the python 'pyxattr' or 'xattr' " + "modules, or the GNU 'attr' package " + "(which contains the 'setfattr' tool).") + else: + raise XAttrUnavailableError( + "Couldn't find a tool to set the xattrs. " + "Install either the python 'xattr' module, " + "or the 'xattr' binary.") From 3aa3953d28dae68b87aa83682043b5eec0973ddc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= Date: Sun, 25 Sep 2016 20:26:58 +0200 Subject: [PATCH 1777/3599] [vk] Fix date and view count extraction. --- youtube_dl/extractor/vk.py | 17 +++++------------ youtube_dl/utils.py | 2 ++ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 9f7a593ef..3cfbd97af 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -373,19 +373,12 @@ class VKIE(VKBaseIE): title = self._live_title(title) # Extract upload date - upload_date = None - mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page) - if mobj is not None: - mobj.group(1) + ' ' + mobj.group(2) - upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2)) + upload_date = unified_strdate(self._html_search_regex( + r'class="mv_info_date[^>]*>([^<]*)<', info_page, 'upload date', default=None)) - view_count = None - views = self._html_search_regex( - r'"mv_views_count_number"[^>]*>(.+?\bviews?)<', - info_page, 'view count', default=None) - if views: - view_count = str_to_int(self._search_regex( - r'([\d,.]+)', views, 'view count', fatal=False)) + view_count = str_to_int(self._html_search_regex( + r'class="mv_views_count[^>]*>([\d,.]+)', + info_page, 'view count', default=None)) formats = [] for format_id, format_url in data.items(): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index fcbfa0d76..243d09034 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -124,6 +124,8 @@ DATE_FORMATS = ( '%d %b %Y', '%B %d %Y', '%b %d %Y', + '%b %d %Y at %H:%M', + '%b %d %Y at %H:%M:%S', '%b %dst %Y %I:%M', '%b %dnd %Y %I:%M', '%b %dth %Y %I:%M', From c6eed6b8c000672f0515d916dda54002c7fca356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:47:25 +0700 Subject: [PATCH 1778/3599] [utils] Lower priority for rare date formats and add tests --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 9789d8611..b1b2effca 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -292,6 +292,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('25-09-2014'), '20140925') self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) + self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207') def test_unified_timestamps(self): self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) @@ -312,6 +313,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200) self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) + self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 243d09034..d2dfa8013 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -124,8 +124,6 @@ DATE_FORMATS = ( '%d %b %Y', '%B %d %Y', '%b %d %Y', - '%b %d %Y at %H:%M', - '%b %d %Y at %H:%M:%S', '%b %dst %Y %I:%M', '%b %dnd %Y %I:%M', '%b %dth %Y %I:%M', @@ -144,6 +142,8 @@ DATE_FORMATS = ( '%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%dT%H:%M', + '%b %d %Y at %H:%M', + '%b %d %Y at %H:%M:%S', ) DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) From a7ee8a00f4af9853d06ed895c5023cc6b573fd57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:48:21 +0700 Subject: [PATCH 1779/3599] [vk] Extract timestamp (Closes #10760) --- youtube_dl/extractor/vk.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 3cfbd97af..77f5cebcf 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -20,7 +20,7 @@ from ..utils import ( remove_start, str_to_int, unescapeHTML, - unified_strdate, + unified_timestamp, urlencode_postdata, ) from .dailymotion import DailymotionIE @@ -106,6 +106,7 @@ class VKIE(VKBaseIE): 'title': 'ProtivoGunz - Хуёвая песня', 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', 'duration': 195, + 'timestamp': 1329060660, 'upload_date': '20120212', 'view_count': int, }, @@ -119,6 +120,7 @@ class VKIE(VKBaseIE): 'uploader': 'Tom Cruise', 'title': 'No name', 'duration': 9, + 'timestamp': 1374374880, 'upload_date': '20130721', 'view_count': int, } @@ -195,6 +197,7 @@ class VKIE(VKBaseIE): 'upload_date': '20150709', 'view_count': int, }, + 'skip': 'Removed', }, { # youtube embed @@ -237,6 +240,7 @@ class VKIE(VKBaseIE): 'ext': 'mp4', 'title': 'S-Dance, репетиции к The way show', 'uploader': 'THE WAY SHOW | 17 апреля', + 'timestamp': 1454870100, 'upload_date': '20160207', 'view_count': int, }, @@ -373,8 +377,9 @@ class VKIE(VKBaseIE): title = self._live_title(title) # Extract upload date - upload_date = unified_strdate(self._html_search_regex( - r'class="mv_info_date[^>]*>([^<]*)<', info_page, 'upload date', default=None)) + timestamp = unified_timestamp(self._html_search_regex( + r'class=["\']mv_info_date[^>]*>([^<]+)(?:<|from)', info_page, + 'upload date', fatal=False)) view_count = str_to_int(self._html_search_regex( r'class="mv_views_count[^>]*>([\d,.]+)', @@ -411,7 +416,7 @@ class VKIE(VKBaseIE): 'thumbnail': data.get('jpg'), 'uploader': data.get('md_author'), 'duration': data.get('duration'), - 'upload_date': upload_date, + 'timestamp': timestamp, 'view_count': view_count, } From 70d7b323b6556eb693bec43a1eb10ded889184b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:51:52 +0700 Subject: [PATCH 1780/3599] [vk] Improve view count extraction --- youtube_dl/extractor/vk.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 77f5cebcf..58799d413 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -255,6 +255,7 @@ class VKIE(VKBaseIE): 'title': 'ИгроМир 2016 — день 1', 'uploader': 'Игромания', 'duration': 5239, + 'view_count': int, }, }, { @@ -376,14 +377,13 @@ class VKIE(VKBaseIE): if data.get('live') == 2: title = self._live_title(title) - # Extract upload date timestamp = unified_timestamp(self._html_search_regex( - r'class=["\']mv_info_date[^>]*>([^<]+)(?:<|from)', info_page, + r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page, 'upload date', fatal=False)) - view_count = str_to_int(self._html_search_regex( - r'class="mv_views_count[^>]*>([\d,.]+)', - info_page, 'view count', default=None)) + view_count = str_to_int(self._search_regex( + r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)', + info_page, 'view count', fatal=False)) formats = [] for format_id, format_url in data.items(): From af33dd8ee7da49b5daf1582b2870deaa5427444b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Sep 2016 00:13:03 +0700 Subject: [PATCH 1781/3599] [aftonbladet] Remove extractor --- youtube_dl/extractor/aftonbladet.py | 64 ----------------------------- youtube_dl/extractor/extractors.py | 1 - 2 files changed, 65 deletions(-) delete mode 100644 youtube_dl/extractor/aftonbladet.py diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py deleted file mode 100644 index 5766b4fe8..000000000 --- a/youtube_dl/extractor/aftonbladet.py +++ /dev/null @@ -1,64 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import int_or_none - - -class AftonbladetIE(InfoExtractor): - _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P[0-9]+)' - _TEST = { - 'url': 'http://tv.aftonbladet.se/abtv/articles/36015', - 'info_dict': { - 'id': '36015', - 'ext': 'mp4', - 'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna', - 'description': 'Jupiters måne mest aktiv av alla himlakroppar', - 'timestamp': 1394142732, - 'upload_date': '20140306', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - # find internal video meta data - meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json' - player_config = self._parse_json(self._html_search_regex( - r'data-player-config="([^"]+)"', webpage, 'player config'), video_id) - internal_meta_id = player_config['aptomaVideoId'] - internal_meta_url = meta_url % internal_meta_id - internal_meta_json = self._download_json( - internal_meta_url, video_id, 'Downloading video meta data') - - # find internal video formats - format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s' - internal_video_id = internal_meta_json['videoId'] - internal_formats_url = format_url % internal_video_id - internal_formats_json = self._download_json( - internal_formats_url, video_id, 'Downloading video formats') - - formats = [] - for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']: - p = fmt['paths'][0] - formats.append({ - 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']), - 'ext': 'mp4', - 'width': int_or_none(fmt.get('width')), - 'height': int_or_none(fmt.get('height')), - 'tbr': int_or_none(fmt.get('bitrate')), - 'protocol': 'http', - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': internal_meta_json['title'], - 'formats': formats, - 'thumbnail': internal_meta_json.get('imageUrl'), - 'description': internal_meta_json.get('shortPreamble'), - 'timestamp': int_or_none(internal_meta_json.get('timePublished')), - 'duration': int_or_none(internal_meta_json.get('duration')), - 'view_count': int_or_none(internal_meta_json.get('views')), - } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 23fd2a308..09b3b4942 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -31,7 +31,6 @@ from .aenetworks import ( HistoryTopicIE, ) from .afreecatv import AfreecaTVIE -from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE From b0582fc80615ec94c37e14015bd9bbfef6745aa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Sep 2016 00:15:09 +0700 Subject: [PATCH 1782/3599] [vgtv] Add support for tv.aftonbladet.se (Closes #10800) --- youtube_dl/extractor/vgtv.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 185756301..3b38ac700 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -22,6 +22,7 @@ class VGTVIE(XstreamIE): 'fvn.no/fvntv': 'fvntv', 'aftenposten.no/webtv': 'aptv', 'ap.vgtv.no/webtv': 'aptv', + 'tv.aftonbladet.se/abtv': 'abtv', } _APP_NAME_TO_VENDOR = { @@ -30,6 +31,7 @@ class VGTVIE(XstreamIE): 'satv': 'sa', 'fvntv': 'fvn', 'aptv': 'ap', + 'abtv': 'ab', } _VALID_URL = r'''(?x) @@ -40,7 +42,8 @@ class VGTVIE(XstreamIE): /? (?: \#!/(?:video|live)/| - embed?.*id= + embed?.*id=| + articles/ )| (?P %s @@ -135,6 +138,14 @@ class VGTVIE(XstreamIE): 'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk', 'only_matching': True, }, + { + 'url': 'http://tv.aftonbladet.se/abtv/articles/36015', + 'only_matching': True, + }, + { + 'url': 'abtv:140026', + 'only_matching': True, + } ] def _real_extract(self, url): From de6babf92252ea5828a9c17d76766357cff3e440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Sep 2016 22:30:34 +0700 Subject: [PATCH 1783/3599] [tvland] Extend _VALID_URL (Closes #10812) --- youtube_dl/extractor/tvland.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py index cb76a2a58..957cf1ea2 100644 --- a/youtube_dl/extractor/tvland.py +++ b/youtube_dl/extractor/tvland.py @@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor class TVLandIE(MTVServicesInfoExtractor): IE_NAME = 'tvland.com' - _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P[^/?#.]+)' + _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P[^/?#.]+)' _FEED_URL = 'http://www.tvland.com/feeds/mrss/' _TESTS = [{ # Geo-restricted. Without a proxy metadata are still there. With a @@ -28,4 +28,7 @@ class TVLandIE(MTVServicesInfoExtractor): 'upload_date': '20151228', 'timestamp': 1451289600, }, + }, { + 'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301', + 'only_matching': True, }] From 16097822582b839a3744b54af90f7b3fd7132d26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Wed, 28 Sep 2016 17:28:16 +0200 Subject: [PATCH 1784/3599] [Instagram] Extract video dimensions --- youtube_dl/extractor/instagram.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 5ebc30a10..dde435189 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -84,7 +84,7 @@ class InstagramIE(InfoExtractor): webpage = self._download_webpage(url, video_id) (video_url, description, thumbnail, timestamp, uploader, - uploader_id, like_count, comment_count) = [None] * 8 + uploader_id, like_count, comment_count, height, width) = [None] * 10 shared_data = self._parse_json( self._search_regex( @@ -96,6 +96,8 @@ class InstagramIE(InfoExtractor): shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict) if media: video_url = media.get('video_url') + height = int_or_none(media.get('dimensions', {}).get('height')) + width = int_or_none(media.get('dimensions', {}).get('width')) description = media.get('caption') thumbnail = media.get('display_src') timestamp = int_or_none(media.get('date')) @@ -115,6 +117,12 @@ class InstagramIE(InfoExtractor): if not video_url: video_url = self._og_search_video_url(webpage, secure=False) + formats = [{ + 'url': video_url, + 'width': width, + 'height': height, + }] + if not uploader_id: uploader_id = self._search_regex( r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', @@ -131,7 +139,7 @@ class InstagramIE(InfoExtractor): return { 'id': video_id, - 'url': video_url, + 'formats': formats, 'ext': 'mp4', 'title': 'Video by %s' % uploader_id, 'description': description, From a1001f47fc19adf983859bb281f08a09bd7f7e9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 1 Oct 2016 00:16:08 +0700 Subject: [PATCH 1785/3599] [instagram] PEP 8 --- youtube_dl/extractor/instagram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index dde435189..196407b06 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -111,8 +111,8 @@ class InstagramIE(InfoExtractor): 'id': comment.get('id'), 'text': comment.get('text'), 'timestamp': int_or_none(comment.get('created_at')), - } for comment in media.get('comments', {}).get('nodes', []) - if comment.get('text')] + } for comment in media.get( + 'comments', {}).get('nodes', []) if comment.get('text')] if not video_url: video_url = self._og_search_video_url(webpage, secure=False) From eaf9b22f94f37487d75457423a9a293dee1b1d32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 30 Sep 2016 20:03:25 +0200 Subject: [PATCH 1786/3599] [clubic] Rely on _match_id and _parse_json --- youtube_dl/extractor/clubic.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py index 2fba93543..f7ee3a8f8 100644 --- a/youtube_dl/extractor/clubic.py +++ b/youtube_dl/extractor/clubic.py @@ -1,9 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor from ..utils import ( clean_html, @@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id player_page = self._download_webpage(player_url, video_id) - config_json = self._search_regex( + config = self._parse_json(self._search_regex( r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page, - 'configuration') - config = json.loads(config_json) + 'configuration'), video_id) video_info = config['videoInfo'] sources = config['sources'] From d7753d194803086d97ffe47f022c47c906ebcc71 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 30 Sep 2016 00:49:14 +0800 Subject: [PATCH 1787/3599] [downloader/http] Use write_xattr function for --xattr-set-filesize --- youtube_dl/__init__.py | 6 ------ youtube_dl/downloader/http.py | 8 +++++--- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1cf3140a0..72141b983 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -283,12 +283,6 @@ def _real_main(argv=None): 'key': 'ExecAfterDownload', 'exec_cmd': opts.exec_cmd, }) - if opts.xattr_set_filesize: - try: - import xattr - xattr # Confuse flake8 - except ImportError: - parser.error('setting filesize xattr requested but python-xattr is not available') external_downloader_args = None if opts.external_downloader_args: external_downloader_args = compat_shlex_split(opts.external_downloader_args) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index f8b69d186..11294d106 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -13,6 +13,9 @@ from ..utils import ( encodeFilename, sanitize_open, sanitized_Request, + write_xattr, + XAttrMetadataError, + XAttrUnavailableError, ) @@ -179,9 +182,8 @@ class HttpFD(FileDownloader): if self.params.get('xattr_set_filesize', False) and data_len is not None: try: - import xattr - xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) - except(OSError, IOError, ImportError) as err: + write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) + except (XAttrUnavailableError, XAttrMetadataError) as err: self.report_error('unable to set filesize xattr: %s' % str(err)) try: From e295618f9e1c1fc404d9baa4ccef961d3eb3ea88 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 1 Oct 2016 15:22:48 +0800 Subject: [PATCH 1788/3599] [dctp] Fix extraction (closes #10734) --- ChangeLog | 1 + youtube_dl/extractor/dctp.py | 67 ++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/ChangeLog b/ChangeLog index 70da55c90..efc3e494e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [dctp] Fix extraction (#10734) + [leeco] Recognize more Le Sports URLs (#10794) diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index a47e04993..14ba88715 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -1,61 +1,54 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str +from ..utils import unified_strdate class DctpTvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P.+?)/$' _TEST = { 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', + 'md5': '174dd4a8a6225cf5655952f969cfbe24', 'info_dict': { - 'id': '1324', + 'id': '95eaa4f33dad413aa17b4ee613cccc6c', 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade', - 'ext': 'flv', - 'title': 'Videoinstallation für eine Kaufhausfassade' + 'ext': 'mp4', + 'title': 'Videoinstallation für eine Kaufhausfassade', + 'description': 'Kurzfilm', + 'upload_date': '20110407', + 'thumbnail': 're:^https?://.*\.jpg$', }, - 'params': { - # rtmp download - 'skip_download': True, - } } def _real_extract(self, url): video_id = self._match_id(url) - base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/' - version_json = self._download_json( - base_url + 'version.json', - video_id, note='Determining file version') - version = version_json['version_name'] - info_json = self._download_json( - '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id), - video_id, note='Fetching object ID') - object_id = compat_str(info_json['object_id']) - meta_json = self._download_json( - '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id), - video_id, note='Downloading metadata') - uuid = meta_json['uuid'] - title = meta_json['title'] - wide = meta_json['is_wide'] - if wide: - ratio = '16x9' - else: - ratio = '4x3' - play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio) + webpage = self._download_webpage(url, video_id) + + object_id = self._html_search_meta('DC.identifier', webpage) servers_json = self._download_json( - 'http://www.dctp.tv/streaming_servers/', + 'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/', video_id, note='Downloading server list') - url = servers_json[0]['endpoint'] + server = servers_json[0]['server'] + m3u8_path = self._search_regex( + r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path') + formats = self._extract_m3u8_formats( + 'http://%s%s' % (server, m3u8_path), video_id, ext='mp4', + entry_protocol='m3u8_native') + + title = self._og_search_title(webpage) + description = self._html_search_meta('DC.description', webpage) + upload_date = unified_strdate( + self._html_search_meta('DC.date.created', webpage)) + thumbnail = self._og_search_thumbnail(webpage) return { 'id': object_id, 'title': title, - 'format': 'rtmp', - 'url': url, - 'play_path': play_path, - 'rtmp_real_time': True, - 'ext': 'flv', - 'display_id': video_id + 'formats': formats, + 'display_id': video_id, + 'description': description, + 'upload_date': upload_date, + 'thumbnail': thumbnail, } From 9bd7bd0b8054231adbeb2a0eddd42a0b969fd6c4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 1 Oct 2016 16:37:49 +0800 Subject: [PATCH 1789/3599] [twitch] Skip a 404 test --- youtube_dl/extractor/twitch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index bc352391e..46c2cfe7b 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -247,6 +247,7 @@ class TwitchVodIE(TwitchItemBaseIE): # m3u8 download 'skip_download': True, }, + 'skip': 'HTTP Error 404: Not Found', }] def _real_extract(self, url): From 9c51a2464276f5eb26d1b571d32052df55d6ead9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 30 Sep 2016 20:06:08 +0200 Subject: [PATCH 1790/3599] [criterion] Rely on _match_id, improve regex and add thumbnail to test --- youtube_dl/extractor/criterion.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index ad32673a8..cf6a5d6cb 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -16,20 +14,20 @@ class CriterionIE(InfoExtractor): 'ext': 'mp4', 'title': 'Le Samouraï', 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', + 'thumbnail': 're:^https?://.*\.jpg$', } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) final_url = self._search_regex( - r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') + r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') title = self._og_search_title(webpage) description = self._html_search_meta('description', webpage) thumbnail = self._search_regex( - r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', + r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;', webpage, 'thumbnail url') return { From d54739a2e6a8dc089e7530afda0a1cfe355a6fef Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 1 Oct 2016 19:58:13 +0800 Subject: [PATCH 1791/3599] [downloader/http] xattr values should be bytes --- youtube_dl/downloader/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 11294d106..af405b950 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -182,7 +182,7 @@ class HttpFD(FileDownloader): if self.params.get('xattr_set_filesize', False) and data_len is not None: try: - write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len)) + write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) except (XAttrUnavailableError, XAttrMetadataError) as err: self.report_error('unable to set filesize xattr: %s' % str(err)) From 53a7e3d2879feac7b1b6f714692581057b9b5f6b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 1 Oct 2016 20:13:04 +0800 Subject: [PATCH 1792/3599] [utils] Support xattr as well as pyxattr Closes #9054 There are two xattr packages in Python, pyxattr [1] and xattr [2]. They have different APIs. In old days pyxattr supports Linux only and xattr supports Linux, Mac, FreeBSD and Solaris, and pyxattr supports Linux only. Recently pyxattr adds support for Mac OS X. [3] An old version of [2] is shipped with Mac OS X. However, some Linux distributions have pyxattr only, for example PLD-Linux [4] and old Arch Linux. [5] As a result, supporting both is the way to go. [1] https://github.com/iustin/pyxattr [2] https://github.com/xattr/xattr [3] https://github.com/iustin/pyxattr/pull/9 [4] https://github.com/rg3/youtube-dl/issues/5498 [5] https://git.archlinux.org/svntogit/community.git/commit/?id=427c4c76401e386d865ccddea4fbfdc74df80492 https://git.archlinux.org/svntogit/community.git/commit/?id=59b40da7b69622a6761d364a8b07909e9cccaa56 python-xattr is added on 2016/06/29 while pyxattr is there for more than 6 years --- ChangeLog | 4 ++++ youtube_dl/utils.py | 29 +++++++++++++++++------------ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index efc3e494e..8ef39cd63 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ version +Core ++ Support pyxattr as well as python-xattr for --xattrs and + --xattr-set-filesize (#9054) + Extractors * [dctp] Fix extraction (#10734) + [leeco] Recognize more Le Sports URLs (#10794) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d2dfa8013..c259f8bff 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3161,20 +3161,25 @@ def write_xattr(path, key, value): # try the pyxattr module... import xattr - # Unicode arguments are not supported in python-pyxattr until - # version 0.5.0 - # See https://github.com/rg3/youtube-dl/issues/5498 - pyxattr_required_version = '0.5.0' - if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): - # TODO: fallback to CLI tools - raise XAttrUnavailableError( - 'python-pyxattr is detected but is too old. ' - 'youtube-dl requires %s or above while your version is %s. ' - 'Falling back to other xattr implementations' % ( - pyxattr_required_version, xattr.__version__)) + if hasattr(xattr, 'set'): # pyxattr + # Unicode arguments are not supported in python-pyxattr until + # version 0.5.0 + # See https://github.com/rg3/youtube-dl/issues/5498 + pyxattr_required_version = '0.5.0' + if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): + # TODO: fallback to CLI tools + raise XAttrUnavailableError( + 'python-pyxattr is detected but is too old. ' + 'youtube-dl requires %s or above while your version is %s. ' + 'Falling back to other xattr implementations' % ( + pyxattr_required_version, xattr.__version__)) + + setxattr = xattr.set + else: # xattr + setxattr = xattr.setxattr try: - xattr.set(path, key, value) + setxattr(path, key, value) except EnvironmentError as e: raise XAttrMetadataError(e.errno, e.strerror) From e1e97c2446ab6fcffcfae738e0c7f29ff58a9dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 1 Oct 2016 22:50:47 +0700 Subject: [PATCH 1793/3599] [periscope:user] Fix extraction (Closes #10820) --- youtube_dl/extractor/periscope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 61043cad5..0e3623024 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -132,7 +132,7 @@ class PeriscopeUserIE(PeriscopeBaseIE): user = list(data_store['UserCache']['users'].values())[0]['user'] user_id = user['id'] - session_id = data_store['SessionToken']['broadcastHistory']['token']['session_id'] + session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id'] broadcasts = self._call_api( 'getUserBroadcastsPublic', From 4da4516973b56bcaa65794a8ae0856cf54740c54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 30 Sep 2016 19:59:08 +0200 Subject: [PATCH 1794/3599] [byutv] Rely on _match_id and _parse_json --- youtube_dl/extractor/byutv.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 3aec601f8..b2d25eec0 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import json import re from .common import InfoExtractor @@ -8,7 +7,7 @@ from ..utils import ExtractorError class BYUtvIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P[^/?#]+)' + _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P[^/?#]+)' _TEST = { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'md5': '05850eb8c749e2ee05ad5a1c34668493', @@ -27,15 +26,15 @@ class BYUtvIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) episode_code = self._search_regex( r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') - episode_json = re.sub( - r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code) - ep = json.loads(episode_json) + + ep = self._parse_json( + episode_code, video_id, transform_source=lambda s: + re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s)) if ep['providerType'] == 'Ooyala': return { From 6d2549fb4f2a646b6b6898db5281cde669277626 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Oct 2016 00:44:54 +0700 Subject: [PATCH 1795/3599] [byutv] Fix id and display id --- youtube_dl/extractor/byutv.py | 46 ++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index b2d25eec0..084cc7ae2 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -7,15 +7,15 @@ from ..utils import ExtractorError class BYUtvIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P[^/?#]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?byutv.org/watch/(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' + _TESTS = [{ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', - 'md5': '05850eb8c749e2ee05ad5a1c34668493', 'info_dict': { - 'id': 'studio-c-season-5-episode-5', + 'id': '6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', + 'display_id': 'studio-c-season-5-episode-5', 'ext': 'mp4', - 'description': 'md5:e07269172baff037f8e8bf9956bc9747', 'title': 'Season 5 Episode 5', + 'description': 'md5:e07269172baff037f8e8bf9956bc9747', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 1486.486, }, @@ -23,28 +23,34 @@ class BYUtvIE(InfoExtractor): 'skip_download': True, }, 'add_ie': ['Ooyala'], - } + }, { + 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', + 'only_matching': True, + }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) episode_code = self._search_regex( r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') ep = self._parse_json( - episode_code, video_id, transform_source=lambda s: + episode_code, display_id, transform_source=lambda s: re.sub(r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', s)) - if ep['providerType'] == 'Ooyala': - return { - '_type': 'url_transparent', - 'ie_key': 'Ooyala', - 'url': 'ooyala:%s' % ep['providerId'], - 'id': video_id, - 'title': ep['title'], - 'description': ep.get('description'), - 'thumbnail': ep.get('imageThumbnail'), - } - else: + if ep['providerType'] != 'Ooyala': raise ExtractorError('Unsupported provider %s' % ep['provider']) + + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % ep['providerId'], + 'id': video_id, + 'display_id': display_id, + 'title': ep['title'], + 'description': ep.get('description'), + 'thumbnail': ep.get('imageThumbnail'), + } From f6ba581f89fc764e4eaf3045ff5b63e27ad66cbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Oct 2016 00:50:07 +0700 Subject: [PATCH 1796/3599] [byutv:event] Add extractor --- youtube_dl/extractor/byutv.py | 39 +++++++++++++++++++++++++++++- youtube_dl/extractor/extractors.py | 5 +++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 084cc7ae2..4be175d70 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class BYUtvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?byutv.org/watch/(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' + _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/(?!event/)(?P[0-9a-f-]+)(?:/(?P[^/?#&]+))?' _TESTS = [{ 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'info_dict': { @@ -54,3 +54,40 @@ class BYUtvIE(InfoExtractor): 'description': ep.get('description'), 'thumbnail': ep.get('imageThumbnail'), } + + +class BYUtvEventIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?byutv\.org/watch/event/(?P[0-9a-f-]+)' + _TEST = { + 'url': 'http://www.byutv.org/watch/event/29941b9b-8bf6-48d2-aebf-7a87add9e34b', + 'info_dict': { + 'id': '29941b9b-8bf6-48d2-aebf-7a87add9e34b', + 'ext': 'mp4', + 'title': 'Toledo vs. BYU (9/30/16)', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Ooyala'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + ooyala_id = self._search_regex( + r'providerId\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'ooyala id', group='id') + + title = self._search_regex( + r'class=["\']description["\'][^>]*>\s*

([^<]+)

', webpage, + 'title').strip() + + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % ooyala_id, + 'id': video_id, + 'title': title, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 09b3b4942..e8928307c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -116,7 +116,10 @@ from .brightcove import ( BrightcoveNewIE, ) from .buzzfeed import BuzzFeedIE -from .byutv import BYUtvIE +from .byutv import ( + BYUtvIE, + BYUtvEventIE, +) from .c56 import C56IE from .camdemy import ( CamdemyIE, From b19e275d99c8dfe121ba0dc3478e4eb9c83e4f9b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 02:12:14 +0800 Subject: [PATCH 1797/3599] [__init__] Fix lost xattr if --embed-thumbnail used Reported at https://github.com/rg3/youtube-dl/issues/9054#issuecomment-250451823 --- ChangeLog | 1 + youtube_dl/__init__.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8ef39cd63..acceb9d02 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core +* Fix possibly lost extended attributes + Support pyxattr as well as python-xattr for --xattrs and --xattr-set-filesize (#9054) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 72141b983..f84b866df 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -266,8 +266,6 @@ def _real_main(argv=None): postprocessors.append({ 'key': 'FFmpegEmbedSubtitle', }) - if opts.xattrs: - postprocessors.append({'key': 'XAttrMetadata'}) if opts.embedthumbnail: already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails postprocessors.append({ @@ -276,6 +274,10 @@ def _real_main(argv=None): }) if not already_have_thumbnail: opts.writethumbnail = True + # XAttrMetadataPP should be run after post-processors that may change file + # contents + if opts.xattrs: + postprocessors.append({'key': 'XAttrMetadata'}) # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. # So if the user is able to remove the file before your postprocessor runs it might cause a few problems. if opts.exec_cmd: From bd2644120526429783c55e885f7042633826d7da Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 03:03:41 +0800 Subject: [PATCH 1798/3599] [utils] Fix xattr error handling --- youtube_dl/extractor/generic.py | 16 ++++++++++++---- youtube_dl/utils.py | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c1792c534..489b3c7c1 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2332,12 +2332,23 @@ class GenericIE(InfoExtractor): info_dict.update(json_ld) return info_dict + # Look for HTML5 media + entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') + if entries: + for entry in entries: + entry.update({ + 'id': video_id, + 'title': video_title, + }) + self._sort_formats(entry['formats']) + return self.playlist_result(entries) + def check_video(vurl): if YoutubeIE.suitable(vurl): return True vpath = compat_urlparse.urlparse(vurl).path vext = determine_ext(vpath) - return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml') + return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js') def filter_video(urls): return list(filter(check_video, urls)) @@ -2387,9 +2398,6 @@ class GenericIE(InfoExtractor): # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: if m_video_type is not None: found = filter_video(re.findall(r'.*?]*)?\s+src=["\'](.*?)["\']', webpage) if not found: REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' found = re.search( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c259f8bff..044520037 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -782,6 +782,7 @@ class XAttrMetadataError(Exception): def __init__(self, code=None, msg='Unknown error'): super(XAttrMetadataError, self).__init__(msg) self.code = code + self.msg = msg # Parsing code and msg if (self.code in (errno.ENOSPC, errno.EDQUOT) or From fd152641726a3f5e47a6a5065f8e9b6fe2623c11 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 05:24:31 +0800 Subject: [PATCH 1799/3599] [jwplatform] Support old-style jwplayer playlists --- ChangeLog | 1 + youtube_dl/extractor/jwplatform.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index acceb9d02..d48a09122 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,7 @@ Core --xattr-set-filesize (#9054) Extractors +* [jwplatform] Improve JWPlayer handling * [dctp] Fix extraction (#10734) + [leeco] Recognize more Le Sports URLs (#10794) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 38199fcd0..e10f7e9f9 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -39,6 +39,12 @@ class JWPlatformBaseIE(InfoExtractor): jwplayer_data = {'playlist': [jwplayer_data]} entries = [] + + # JWPlayer backward compatibility: single playlist item + # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 + if not isinstance(jwplayer_data['playlist'], list): + jwplayer_data['playlist'] = [jwplayer_data['playlist']] + for video_data in jwplayer_data['playlist']: # JWPlayer backward compatibility: flattened sources # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 From 99ed78c79e94c14ce24bc5bdccaf9573d4f83552 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 14:07:49 +0800 Subject: [PATCH 1800/3599] [jwplatform] Support DASH streams --- ChangeLog | 1 + youtube_dl/extractor/jwplatform.py | 6 +++++- youtube_dl/extractor/rudo.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index d48a09122..f5172a864 100644 --- a/ChangeLog +++ b/ChangeLog @@ -6,6 +6,7 @@ Core --xattr-set-filesize (#9054) Extractors ++ [jwplatform] Support DASH streams in JWPlayer * [jwplatform] Improve JWPlayer handling * [dctp] Fix extraction (#10734) + [leeco] Recognize more Le Sports URLs (#10794) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index e10f7e9f9..5d56e0a28 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -32,7 +32,8 @@ class JWPlatformBaseIE(InfoExtractor): return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) - def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None): + def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, + m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): # JWPlayer backward compatibility: flattened playlists # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 if 'playlist' not in jwplayer_data: @@ -63,6 +64,9 @@ class JWPlatformBaseIE(InfoExtractor): if source_type == 'hls' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + source_url, this_video_id, mpd_id=mpd_id, fatal=False)) # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67 elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'): formats.append({ diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py index 38366b784..9a330c196 100644 --- a/youtube_dl/extractor/rudo.py +++ b/youtube_dl/extractor/rudo.py @@ -43,7 +43,7 @@ class RudoIE(JWPlatformBaseIE): transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) info_dict = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False, m3u8_id='hls') + jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash') info_dict.update({ 'title': self._og_search_title(webpage), From 703b3afa93326c96bc5faf753305ab95c4e98b10 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 2 Oct 2016 14:25:06 +0800 Subject: [PATCH 1801/3599] [amcnetworks] Skip a restricted _TEST --- youtube_dl/extractor/amcnetworks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index c739d2c99..d2b03b177 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -28,6 +28,7 @@ class AMCNetworksIE(ThePlatformIE): # m3u8 download 'skip_download': True, }, + 'skip': 'Requires TV provider accounts', }, { 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', 'only_matching': True, From 26406d33c7808bdff38ffcda36d2d6a4e5bb4f4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Oct 2016 15:56:33 +0700 Subject: [PATCH 1802/3599] [ChangeLog] Actualize --- ChangeLog | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index f5172a864..277cc2ee1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,15 +1,26 @@ version Core -* Fix possibly lost extended attributes +* Fix possibly lost extended attributes during post-processing + Support pyxattr as well as python-xattr for --xattrs and --xattr-set-filesize (#9054) Extractors + [jwplatform] Support DASH streams in JWPlayer -* [jwplatform] Improve JWPlayer handling ++ [jwplatform] Support old-style JWPlayer playlists ++ [byutv:event] Add extractor +* [periscope:user] Fix extraction (#10820) * [dctp] Fix extraction (#10734) ++ [instagram] Extract video dimensions (#10790) ++ [tvland] Extend URL regular expression (#10812) ++ [vgtv] Add support for tv.aftonbladet.se (#10800) +- [aftonbladet] Remove extractor +* [vk] Fix timestamp and view count extraction (#10760) ++ [vk] Add support for running and finished live streams (#10799) + [leeco] Recognize more Le Sports URLs (#10794) ++ [instagram] Extract comments (#10788) ++ [ketnet] Extract mzsource formats (#10770) +* [limelight:media] Improve HTTP formats extraction version 2016.09.27 From 6c152ce20f7bd5f1fbb786abe70c4aa3412aef26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 2 Oct 2016 15:58:00 +0700 Subject: [PATCH 1803/3599] release 2016.10.02 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 2 +- youtube_dl/version.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 273eb8c0b..e813e4c59 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.27*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.27** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.02** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.27 +[debug] youtube-dl version 2016.10.02 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 277cc2ee1..4f64edabb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.02 Core * Fix possibly lost extended attributes during post-processing diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 26f275577..828ed0ba9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -34,7 +34,6 @@ - **AdultSwim** - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network - **AfreecaTV**: afreecatv.com - - **Aftonbladet** - **AirMozilla** - **AlJazeera** - **Allocine** @@ -112,6 +111,7 @@ - **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **BuzzFeed** - **BYUtv** + - **BYUtvEvent** - **Camdemy** - **CamdemyFolder** - **CamWithHer** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index af0c2cfc4..161ba4391 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.27' +__version__ = '2016.10.02' From 567a5996cac5f3ba2d06748cbbfb295eab48074c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Sat, 1 Oct 2016 15:34:46 +0200 Subject: [PATCH 1804/3599] [pornoxo] Use JWPlatform to improve metadata extraction --- youtube_dl/extractor/pornoxo.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py index 202f58673..3c9087f2d 100644 --- a/youtube_dl/extractor/pornoxo.py +++ b/youtube_dl/extractor/pornoxo.py @@ -2,13 +2,13 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .jwplatform import JWPlatformBaseIE from ..utils import ( str_to_int, ) -class PornoXOIE(InfoExtractor): +class PornoXOIE(JWPlatformBaseIE): _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P\d+)/(?P[^/]+)\.html' _TEST = { 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', @@ -17,7 +17,8 @@ class PornoXOIE(InfoExtractor): 'id': '7564', 'ext': 'flv', 'title': 'Striptease From Sexy Secretary!', - 'description': 'Striptease From Sexy Secretary!', + 'display_id': 'striptease-from-sexy-secretary', + 'description': 'md5:0ee35252b685b3883f4a1d38332f9980', 'categories': list, # NSFW 'thumbnail': 're:https?://.*\.jpg$', 'age_limit': 18, @@ -26,23 +27,14 @@ class PornoXOIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id, display_id = mobj.groups() webpage = self._download_webpage(url, video_id) - - video_url = self._html_search_regex( - r'\'file\'\s*:\s*"([^"]+)"', webpage, 'video_url') + video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False) title = self._html_search_regex( r'([^<]+)\s*-\s*PornoXO', webpage, 'title') - description = self._html_search_regex( - r'<meta name="description" content="([^"]+)\s*featuring', - webpage, 'description', fatal=False) - - thumbnail = self._html_search_regex( - r'\'image\'\s*:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) - view_count = str_to_int(self._html_search_regex( r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False)) @@ -53,13 +45,14 @@ class PornoXOIE(InfoExtractor): None if categories_str is None else categories_str.split(',')) - return { + video_data.update({ 'id': video_id, - 'url': video_url, 'title': title, - 'description': description, - 'thumbnail': thumbnail, + 'display_id': display_id, + 'description': self._html_search_meta('description', webpage), 'categories': categories, 'view_count': view_count, 'age_limit': 18, - } + }) + + return video_data From ee5de4e38e3629ffc5d6360e06fa5dcfd43cbeb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 3 Oct 2016 00:54:02 +0700 Subject: [PATCH 1805/3599] [nhl] Add support for wch2016.com (Closes #10833) --- youtube_dl/extractor/nhl.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index b04d21113..26149c88f 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -245,7 +245,11 @@ class NHLVideocenterCategoryIE(NHLBaseInfoExtractor): class NHLIE(InfoExtractor): IE_NAME = 'nhl.com' - _VALID_URL = r'https?://(?:www\.)?nhl\.com/([^/]+/)*c-(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)' + _SITES_MAP = { + 'nhl': 'nhl', + 'wch2016': 'wch', + } _TESTS = [{ # type=video 'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503', @@ -270,13 +274,20 @@ class NHLIE(InfoExtractor): 'upload_date': '20160204', 'timestamp': 1454544904, }, + }, { + 'url': 'https://www.wch2016.com/video/caneur-best-of-game-2-micd-up/t-281230378/c-44983703', + 'only_matching': True, + }, { + 'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068', + 'only_matching': True, }] def _real_extract(self, url): - tmp_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + tmp_id, site = mobj.group('id'), mobj.group('site') video_data = self._download_json( - 'https://nhl.bamcontent.com/nhl/id/v1/%s/details/web-v1.json' % tmp_id, - tmp_id) + 'https://nhl.bamcontent.com/%s/id/v1/%s/details/web-v1.json' + % (self._SITES_MAP[site], tmp_id), tmp_id) if video_data.get('type') == 'article': video_data = video_data['media'] From c1084ddb0c87dac450d2b7c1b1cfef386d6f4481 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 3 Oct 2016 15:27:09 +0100 Subject: [PATCH 1806/3599] [thisoldhouse] Add new extractor(closes #10837) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/thisoldhouse.py | 32 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 youtube_dl/extractor/thisoldhouse.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e8928307c..dca4973d4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -892,6 +892,7 @@ from .thesixtyone import TheSixtyOneIE from .thestar import TheStarIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE +from .thisoldhouse import ThisOldHouseIE from .threeqsdn import ThreeQSDNIE from .tinypic import TinyPicIE from .tlc import TlcDeIE diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py new file mode 100644 index 000000000..7629f0d10 --- /dev/null +++ b/youtube_dl/extractor/thisoldhouse.py @@ -0,0 +1,32 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class ThisOldHouseIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench', + 'md5': '568acf9ca25a639f0c4ff905826b662f', + 'info_dict': { + 'id': '2REGtUDQ', + 'ext': 'mp4', + 'title': 'How to Build a Storage Bench', + 'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.', + 'timestamp': 1442548800, + 'upload_date': '20150918', + } + }, { + 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + video_id = drupal_settings['jwplatform']['video_id'] + return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id) From dcdb292fddc82ae11f4c0b647815a45c88a6b6d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= <trox1972@users.noreply.github.com> Date: Sun, 2 Oct 2016 13:39:18 +0200 Subject: [PATCH 1807/3599] Unify coding cookie --- devscripts/lazy_load_template.py | 2 +- docs/conf.py | 2 +- setup.py | 2 +- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 2 +- youtube_dl/extractor/adobepass.py | 2 +- youtube_dl/extractor/allocine.py | 2 +- youtube_dl/extractor/arte.py | 2 +- youtube_dl/extractor/brightcove.py | 2 +- youtube_dl/extractor/canalplus.py | 2 +- youtube_dl/extractor/cbsnews.py | 2 +- youtube_dl/extractor/ceskatelevize.py | 2 +- youtube_dl/extractor/comcarcoff.py | 2 +- youtube_dl/extractor/crunchyroll.py | 2 +- youtube_dl/extractor/daum.py | 2 +- youtube_dl/extractor/dramafever.py | 2 +- youtube_dl/extractor/eitb.py | 2 +- youtube_dl/extractor/embedly.py | 2 +- youtube_dl/extractor/faz.py | 2 +- youtube_dl/extractor/firsttv.py | 2 +- youtube_dl/extractor/folketinget.py | 2 +- youtube_dl/extractor/francetv.py | 2 +- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/goshgay.py | 2 +- youtube_dl/extractor/hark.py | 2 +- youtube_dl/extractor/helsinki.py | 2 +- youtube_dl/extractor/ina.py | 2 +- youtube_dl/extractor/jpopsukitv.py | 2 +- youtube_dl/extractor/kickstarter.py | 2 +- youtube_dl/extractor/kontrtube.py | 2 +- youtube_dl/extractor/krasview.py | 2 +- youtube_dl/extractor/lifenews.py | 2 +- youtube_dl/extractor/m6.py | 2 +- youtube_dl/extractor/mailru.py | 2 +- youtube_dl/extractor/moviezine.py | 2 +- youtube_dl/extractor/musicplayon.py | 2 +- youtube_dl/extractor/myspace.py | 2 +- youtube_dl/extractor/naver.py | 2 +- youtube_dl/extractor/newstube.py | 2 +- youtube_dl/extractor/niconico.py | 2 +- youtube_dl/extractor/noco.py | 2 +- youtube_dl/extractor/normalboots.py | 2 +- youtube_dl/extractor/nova.py | 2 +- youtube_dl/extractor/nowness.py | 2 +- youtube_dl/extractor/nrk.py | 2 +- youtube_dl/extractor/ntvru.py | 2 +- youtube_dl/extractor/nuevo.py | 2 +- youtube_dl/extractor/oktoberfesttv.py | 2 +- youtube_dl/extractor/pandoratv.py | 2 +- youtube_dl/extractor/patreon.py | 2 +- youtube_dl/extractor/porn91.py | 2 +- youtube_dl/extractor/prosiebensat1.py | 2 +- youtube_dl/extractor/puls4.py | 2 +- youtube_dl/extractor/radiobremen.py | 2 +- youtube_dl/extractor/rmcdecouverte.py | 2 +- youtube_dl/extractor/rtl2.py | 2 +- youtube_dl/extractor/rtve.py | 2 +- youtube_dl/extractor/ruhd.py | 2 +- youtube_dl/extractor/rutube.py | 2 +- youtube_dl/extractor/rutv.py | 2 +- youtube_dl/extractor/safari.py | 2 +- youtube_dl/extractor/sapo.py | 2 +- youtube_dl/extractor/sbs.py | 2 +- youtube_dl/extractor/screencast.py | 2 +- youtube_dl/extractor/screenwavemedia.py | 2 +- youtube_dl/extractor/smotri.py | 2 +- youtube_dl/extractor/sohu.py | 2 +- youtube_dl/extractor/soundcloud.py | 2 +- youtube_dl/extractor/southpark.py | 2 +- youtube_dl/extractor/spiegel.py | 2 +- youtube_dl/extractor/srmediathek.py | 2 +- youtube_dl/extractor/streamcz.py | 2 +- youtube_dl/extractor/swrmediathek.py | 2 +- youtube_dl/extractor/sztvhu.py | 2 +- youtube_dl/extractor/tagesschau.py | 2 +- youtube_dl/extractor/tass.py | 2 +- youtube_dl/extractor/teachertube.py | 2 +- youtube_dl/extractor/teamcoco.py | 2 +- youtube_dl/extractor/theintercept.py | 2 +- youtube_dl/extractor/theplatform.py | 2 +- youtube_dl/extractor/tlc.py | 2 +- youtube_dl/extractor/toypics.py | 2 +- youtube_dl/extractor/tumblr.py | 2 +- youtube_dl/extractor/tv2.py | 2 +- youtube_dl/extractor/tvigle.py | 2 +- youtube_dl/extractor/vbox7.py | 2 +- youtube_dl/extractor/vesti.py | 2 +- youtube_dl/extractor/vimeo.py | 2 +- youtube_dl/extractor/vk.py | 2 +- youtube_dl/extractor/vodlocker.py | 2 +- youtube_dl/extractor/wdr.py | 2 +- youtube_dl/extractor/wrzuta.py | 2 +- youtube_dl/extractor/wsj.py | 2 +- youtube_dl/extractor/xboxclips.py | 2 +- youtube_dl/extractor/xnxx.py | 2 +- youtube_dl/extractor/xuite.py | 2 +- youtube_dl/extractor/zingmp3.py | 2 +- youtube_dl/postprocessor/embedthumbnail.py | 2 +- youtube_dl/utils.py | 2 +- 99 files changed, 99 insertions(+), 99 deletions(-) diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index 2e6e6641b..c4e5fc1f4 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/docs/conf.py b/docs/conf.py index 594ca61a6..0aaf1b8fc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 # # youtube-dl documentation build configuration file, created by # sphinx-quickstart on Fri Mar 14 21:05:43 2014. diff --git a/setup.py b/setup.py index 508b27f37..ce6dd1870 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import print_function diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 442aa663b..99825e343 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import absolute_import, unicode_literals diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index f84b866df..643393558 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 8f7ed6ef2..d62010cb2 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 190bc2cc8..7d280d871 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index e0c5c1804..dbac24b18 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 2ec55b185..945cf19e8 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 69e8f4f57..6dab226af 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 216989230..91b0f5fa9 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 87c2e7089..4ec79d19d 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py index 747c245c8..588aad0d9 100644 --- a/youtube_dl/extractor/comcarcoff.py +++ b/youtube_dl/extractor/comcarcoff.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index e4c10ad24..c38fd095a 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index b5c310ccb..732b4362a 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 3b6529f4b..c11595612 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import itertools diff --git a/youtube_dl/extractor/eitb.py b/youtube_dl/extractor/eitb.py index 713cb7b32..ee5ead18b 100644 --- a/youtube_dl/extractor/eitb.py +++ b/youtube_dl/extractor/eitb.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/embedly.py b/youtube_dl/extractor/embedly.py index 1cdb11e34..a5820b21e 100644 --- a/youtube_dl/extractor/embedly.py +++ b/youtube_dl/extractor/embedly.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py index fd535457d..4bc8fc512 100644 --- a/youtube_dl/extractor/faz.py +++ b/youtube_dl/extractor/faz.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 332d12020..6b662cc3c 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py index 75399fa7d..b3df93f28 100644 --- a/youtube_dl/extractor/folketinget.py +++ b/youtube_dl/extractor/folketinget.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 3233f66d5..e7068d1ae 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 489b3c7c1..9ea306e3a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index a43abd154..74e1720ee 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py index 749e9154f..342a6130e 100644 --- a/youtube_dl/extractor/hark.py +++ b/youtube_dl/extractor/hark.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/helsinki.py b/youtube_dl/extractor/helsinki.py index 93107b306..575fb332a 100644 --- a/youtube_dl/extractor/helsinki.py +++ b/youtube_dl/extractor/helsinki.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/ina.py b/youtube_dl/extractor/ina.py index 65712abc2..9544ff9d4 100644 --- a/youtube_dl/extractor/ina.py +++ b/youtube_dl/extractor/ina.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/jpopsukitv.py b/youtube_dl/extractor/jpopsukitv.py index 122e2dd8c..4b5f346d1 100644 --- a/youtube_dl/extractor/jpopsukitv.py +++ b/youtube_dl/extractor/jpopsukitv.py @@ -1,4 +1,4 @@ -# coding=utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index fbe499497..d4da8f484 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py index 704bd7b34..1fda45107 100644 --- a/youtube_dl/extractor/kontrtube.py +++ b/youtube_dl/extractor/kontrtube.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py index 0ae8ebd68..cf8876fa1 100644 --- a/youtube_dl/extractor/krasview.py +++ b/youtube_dl/extractor/krasview.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import json diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 87120ecd1..afce2010e 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/m6.py b/youtube_dl/extractor/m6.py index 39d2742c8..9806875e8 100644 --- a/youtube_dl/extractor/m6.py +++ b/youtube_dl/extractor/m6.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index 9a7098c43..f7cc3c832 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py index aa091a62c..478e39967 100644 --- a/youtube_dl/extractor/moviezine.py +++ b/youtube_dl/extractor/moviezine.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/musicplayon.py b/youtube_dl/extractor/musicplayon.py index 2174e5665..1854d59a5 100644 --- a/youtube_dl/extractor/musicplayon.py +++ b/youtube_dl/extractor/musicplayon.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/myspace.py b/youtube_dl/extractor/myspace.py index 0d5238d77..ab32e632e 100644 --- a/youtube_dl/extractor/myspace.py +++ b/youtube_dl/extractor/myspace.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 0891d2772..055070ff5 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py index 0092b85ce..e3f35f1d8 100644 --- a/youtube_dl/extractor/newstube.py +++ b/youtube_dl/extractor/newstube.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 6eaaa8416..a104e33f8 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 06f2bda07..70ff2ab36 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index af44c3bb5..6aa0895b8 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 17671ad39..103952345 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py index 74860eb20..7e5346316 100644 --- a/youtube_dl/extractor/nowness.py +++ b/youtube_dl/extractor/nowness.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .brightcove import ( diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index ed42eb301..d471eb20c 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py index e8702ebcd..7d7a785ab 100644 --- a/youtube_dl/extractor/ntvru.py +++ b/youtube_dl/extractor/ntvru.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py index ef093dec2..87fb94d1f 100644 --- a/youtube_dl/extractor/nuevo.py +++ b/youtube_dl/extractor/nuevo.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dl/extractor/oktoberfesttv.py index f2ccc53dc..50fbbc79c 100644 --- a/youtube_dl/extractor/oktoberfesttv.py +++ b/youtube_dl/extractor/oktoberfesttv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py index 8d49f5c4a..2b07958bb 100644 --- a/youtube_dl/extractor/pandoratv.py +++ b/youtube_dl/extractor/pandoratv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index 229750665..a6a2c273f 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 9894f3262..073fc3e21 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from ..compat import ( diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 873d4f981..7cc07a2ad 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py index 9c2ccbe2d..1c54af002 100644 --- a/youtube_dl/extractor/puls4.py +++ b/youtube_dl/extractor/puls4.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .prosiebensat1 import ProSiebenSat1BaseIE diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py index 19a751da0..0aa8d059b 100644 --- a/youtube_dl/extractor/radiobremen.py +++ b/youtube_dl/extractor/radiobremen.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py index f3bb4fa66..2340dae53 100644 --- a/youtube_dl/extractor/rmcdecouverte.py +++ b/youtube_dl/extractor/rmcdecouverte.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index de004671d..cb4ee8803 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index f1b92f6da..6a43b036e 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import base64 diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py index 1f7c26299..ce631b46c 100644 --- a/youtube_dl/extractor/ruhd.py +++ b/youtube_dl/extractor/ruhd.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 5d0ace5bf..fd1df925b 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index a2379eb04..a5e672c0a 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index eabe41efe..8b35fd244 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/sapo.py b/youtube_dl/extractor/sapo.py index 172cc1275..49a9b313a 100644 --- a/youtube_dl/extractor/sapo.py +++ b/youtube_dl/extractor/sapo.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index 96472fbc4..43131fb7e 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index 356631700..ed9de9648 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index 40333c825..7d77e8825 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 114358786..def46abda 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 48e2ba2dd..30760ca06 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 1a8114aa7..3b7ecb3c3 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index e2a9e45ac..08f8c5744 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index b41d9f59f..ec1b60388 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py index 409d50304..b03272f7a 100644 --- a/youtube_dl/extractor/srmediathek.py +++ b/youtube_dl/extractor/srmediathek.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .ard import ARDMediathekIE diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py index d3d2b7eb7..9e533103c 100644 --- a/youtube_dl/extractor/streamcz.py +++ b/youtube_dl/extractor/streamcz.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import hashlib diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dl/extractor/swrmediathek.py index 58073eefe..6d69f7686 100644 --- a/youtube_dl/extractor/swrmediathek.py +++ b/youtube_dl/extractor/swrmediathek.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/sztvhu.py b/youtube_dl/extractor/sztvhu.py index f562aa6d3..cfad33146 100644 --- a/youtube_dl/extractor/sztvhu.py +++ b/youtube_dl/extractor/sztvhu.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index 136e18f96..8670cee28 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/tass.py b/youtube_dl/extractor/tass.py index c4ef70778..5293393ef 100644 --- a/youtube_dl/extractor/tass.py +++ b/youtube_dl/extractor/tass.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import json diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 82675431f..df5d5556f 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 79a778920..75346393b 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import base64 diff --git a/youtube_dl/extractor/theintercept.py b/youtube_dl/extractor/theintercept.py index ec6f4ecaa..f23b58713 100644 --- a/youtube_dl/extractor/theintercept.py +++ b/youtube_dl/extractor/theintercept.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 6febf805b..cfbf7f4e1 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index ce4f91f46..fd145ba42 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py index 2579ba8c6..938e05076 100644 --- a/youtube_dl/extractor/toypics.py +++ b/youtube_dl/extractor/toypics.py @@ -1,4 +1,4 @@ -# -*- coding:utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 4d8b57111..ebe411e12 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index f225ec684..bd28267b0 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index ead4c00c7..f3817ab28 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index e17988573..a1e0851b7 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/vesti.py b/youtube_dl/extractor/vesti.py index cb64ae0bd..5ab716880 100644 --- a/youtube_dl/extractor/vesti.py +++ b/youtube_dl/extractor/vesti.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 50aacc6ac..309a47bf0 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import json diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 58799d413..ac77bc623 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import collections diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index a938a4007..c85b474d2 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 390f9e830..f7e6360a3 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index bdd7097ba..0f53f1bcb 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/extractor/wsj.py b/youtube_dl/extractor/wsj.py index a83e68b17..deb7483ae 100644 --- a/youtube_dl/extractor/wsj.py +++ b/youtube_dl/extractor/wsj.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/xboxclips.py b/youtube_dl/extractor/xboxclips.py index b113ab1c4..d9c277bc3 100644 --- a/youtube_dl/extractor/xboxclips.py +++ b/youtube_dl/extractor/xboxclips.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index bcb140305..e0a6255dc 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index a66daee46..4b9c1ee9c 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals import base64 diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dl/extractor/zingmp3.py index bd708b42c..0f0e9d0eb 100644 --- a/youtube_dl/extractor/zingmp3.py +++ b/youtube_dl/extractor/zingmp3.py @@ -1,4 +1,4 @@ -# coding=utf-8 +# coding: utf-8 from __future__ import unicode_literals import re diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 3bad5a266..2e4789eb2 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 044520037..0569d231c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals From 215ff6e0f3b092aac9edc91b8026ffc7b55d8b70 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Mon, 3 Oct 2016 18:16:55 +0100 Subject: [PATCH 1808/3599] [theweatherchannel] Add new extractor(closes #7188) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/theweatherchannel.py | 79 +++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 youtube_dl/extractor/theweatherchannel.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dca4973d4..f67e19526 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -890,6 +890,7 @@ from .theplatform import ( from .thescene import TheSceneIE from .thesixtyone import TheSixtyOneIE from .thestar import TheStarIE +from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .thisoldhouse import ThisOldHouseIE diff --git a/youtube_dl/extractor/theweatherchannel.py b/youtube_dl/extractor/theweatherchannel.py new file mode 100644 index 000000000..c34a49d03 --- /dev/null +++ b/youtube_dl/extractor/theweatherchannel.py @@ -0,0 +1,79 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .theplatform import ThePlatformIE +from ..utils import ( + determine_ext, + parse_duration, +) + + +class TheWeatherChannelIE(ThePlatformIE): + _VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock', + 'md5': 'ab924ac9574e79689c24c6b95e957def', + 'info_dict': { + 'id': 'cc82397e-cc3f-4d11-9390-a785add090e8', + 'ext': 'mp4', + 'title': 'Ice Climber Is In For A Shock', + 'description': 'md5:55606ce1378d4c72e6545e160c9d9695', + 'uploader': 'TWC - Digital (No Distro)', + 'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + video_id = drupal_settings['twc']['contexts']['node']['uuid'] + video_data = self._download_json( + 'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id) + seo_meta = video_data.get('seometa', {}) + title = video_data.get('title') or seo_meta['title'] + + urls = [] + thumbnails = [] + formats = [] + for variant_id, variant_url in video_data.get('variants', []).items(): + variant_url = variant_url.strip() + if not variant_url or variant_url in urls: + continue + urls.append(variant_url) + ext = determine_ext(variant_url) + if ext == 'jpg': + thumbnails.append({ + 'url': variant_url, + 'id': variant_id, + }) + elif ThePlatformIE.suitable(variant_url): + tp_formats, _ = self._extract_theplatform_smil(variant_url, video_id) + formats.extend(tp_formats) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + variant_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=variant_id, fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + variant_url, video_id, f4m_id=variant_id, fatal=False)) + else: + formats.append({ + 'url': variant_url, + 'format_id': variant_id, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': video_data.get('description') or seo_meta.get('description') or seo_meta.get('og:description'), + 'duration': parse_duration(video_data.get('duration')), + 'uploader': video_data.get('providername'), + 'uploader_id': video_data.get('providerid'), + 'thumbnails': thumbnails, + 'formats': formats, + } From c1b2a0858cafc3362e5da73b9fb737f18cde4618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 4 Oct 2016 02:10:23 +0700 Subject: [PATCH 1809/3599] [youtube:live] Extend _VALID_URL (Closes #10839) --- youtube_dl/extractor/youtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f86823112..cb266eab6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2152,7 +2152,7 @@ class YoutubeUserIE(YoutubeChannelIE): class YoutubeLiveIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com live streams' - _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+))/live' + _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+))/live' IE_NAME = 'youtube:live' _TESTS = [{ @@ -2178,6 +2178,9 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): }, { 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/c/CommanderVideoHq/live', + 'only_matching': True, }] def _real_extract(self, url): From 539c881bfc1380890a55a08dbf970900328f8ec5 Mon Sep 17 00:00:00 2001 From: Aleksander Nitecki <ixendr@itogi.re> Date: Mon, 3 Oct 2016 21:47:19 +0200 Subject: [PATCH 1810/3599] [techtalks] Allow URL-s with name part omitted. --- youtube_dl/extractor/techtalks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/techtalks.py b/youtube_dl/extractor/techtalks.py index 16e945d8e..0ec7b1273 100644 --- a/youtube_dl/extractor/techtalks.py +++ b/youtube_dl/extractor/techtalks.py @@ -10,7 +10,7 @@ from ..utils import ( class TechTalksIE(InfoExtractor): - _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/' + _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]*/)?(?P<id>\d+)/' _TEST = { 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', From 6eb5503b12286ef9813ee22e95622f09dab2ebe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 4 Oct 2016 02:54:36 +0700 Subject: [PATCH 1811/3599] [techtalks] Relax _VALID_URL --- youtube_dl/extractor/techtalks.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/techtalks.py b/youtube_dl/extractor/techtalks.py index 0ec7b1273..a5b62c717 100644 --- a/youtube_dl/extractor/techtalks.py +++ b/youtube_dl/extractor/techtalks.py @@ -10,9 +10,9 @@ from ..utils import ( class TechTalksIE(InfoExtractor): - _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]*/)?(?P<id>\d+)/' + _VALID_URL = r'https?://techtalks\.tv/talks/(?:[^/]+/)?(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', 'info_dict': { 'id': '57758', @@ -38,7 +38,10 @@ class TechTalksIE(InfoExtractor): # rtmp download 'skip_download': True, }, - } + }, { + 'url': 'http://techtalks.tv/talks/57758', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 7232e54813481dc7b9b2ea9f70499a49badd75cc Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 4 Oct 2016 07:59:53 +0100 Subject: [PATCH 1812/3599] [tonline] Add new extractor(#10376) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tonline.py | 59 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/tonline.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index f67e19526..e73956923 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -908,6 +908,7 @@ from .tnaflix import ( MovieFapIE, ) from .toggle import ToggleIE +from .tonline import TOnlineIE from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE diff --git a/youtube_dl/extractor/tonline.py b/youtube_dl/extractor/tonline.py new file mode 100644 index 000000000..cc11eae2a --- /dev/null +++ b/youtube_dl/extractor/tonline.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class TOnlineIE(InfoExtractor): + IE_NAME = 't-online.de' + _VALID_URL = r'https?://(?:www\.)?t-online\.de/tv/(?:[^/]+/)*id_(?P<id>\d+)' + _TEST = { + 'url': 'http://www.t-online.de/tv/sport/fussball/id_79166266/drittes-remis-zidane-es-muss-etwas-passieren-.html', + 'md5': '7d94dbdde5f9d77c5accc73c39632c29', + 'info_dict': { + 'id': '79166266', + 'ext': 'mp4', + 'title': 'Drittes Remis! Zidane: "Es muss etwas passieren"', + 'description': 'Es läuft nicht rund bei Real Madrid. Das 1:1 gegen den SD Eibar war das dritte Unentschieden in Folge in der Liga.', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + 'http://www.t-online.de/tv/id_%s/tid_json_video' % video_id, video_id) + title = video_data['subtitle'] + + formats = [] + for asset in video_data.get('assets', []): + asset_source = asset.get('source') or asset.get('source2') + if not asset_source: + continue + formats_id = [] + for field_key in ('type', 'profile'): + field_value = asset.get(field_key) + if field_value: + formats_id.append(field_value) + formats.append({ + 'format_id': '-'.join(formats_id), + 'url': asset_source, + }) + + thumbnails = [] + for image in video_data.get('images', []): + image_source = image.get('source') + if not image_source: + continue + thumbnails.append({ + 'url': image_source, + }) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'duration': int_or_none(video_data.get('duration')), + 'thumbnails': thumbnails, + 'formats': formats, + } From 185744f92f172a5cd1db317fbf87fee733cfdfe6 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Tue, 4 Oct 2016 10:30:57 +0100 Subject: [PATCH 1813/3599] [lego] Add new extractor(closes #10369) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/lego.py | 86 ++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 youtube_dl/extractor/lego.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e73956923..feee06004 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -437,6 +437,7 @@ from .lcp import ( ) from .learnr import LearnrIE from .lecture2go import Lecture2GoIE +from .lego import LEGOIE from .lemonde import LemondeIE from .leeco import ( LeIE, diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py new file mode 100644 index 000000000..5be7d622c --- /dev/null +++ b/youtube_dl/extractor/lego.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + unescapeHTML, + int_or_none, +) + + +class LEGOIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?lego\.com/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]+)' + _TEST = { + 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1', + 'md5': 'f34468f176cfd76488767fc162c405fa', + 'info_dict': { + 'id': '55492d823b1b4d5e985787fa8c2973b1', + 'ext': 'mp4', + 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi', + } + } + _BITRATES = [256, 512, 1024, 1536, 2560] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://www.lego.com/en-US/mediaplayer/video/' + video_id, video_id) + title = self._search_regex(r'<title>(.+?)', webpage, 'title') + video_data = self._parse_json(unescapeHTML(self._search_regex( + r"video='([^']+)'", webpage, 'video data')), video_id) + progressive_base = self._search_regex( + r'data-video-progressive-url="([^"]+)"', + webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/') + streaming_base = self._search_regex( + r'data-video-streaming-url="([^"]+)"', + webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/') + item_id = video_data['ItemId'] + + net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]]) + base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])]) + path = '/'.join([net_storage_path, base_path]) + streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES)) + + formats = self._extract_akamai_formats( + '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id) + m3u8_formats = list(filter( + lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + formats)) + if len(m3u8_formats) == len(self._BITRATES): + self._sort_formats(m3u8_formats) + for bitrate, m3u8_format in zip(self._BITRATES, m3u8_formats): + progressive_base_url = '%spublic/%s_%d.' % (progressive_base, path, bitrate) + mp4_f = m3u8_format.copy() + mp4_f.update({ + 'url': progressive_base_url + 'mp4', + 'format_id': m3u8_format['format_id'].replace('hls', 'mp4'), + 'protocol': 'http', + }) + web_f = { + 'url': progressive_base_url + 'webm', + 'format_id': m3u8_format['format_id'].replace('hls', 'webm'), + 'width': m3u8_format['width'], + 'height': m3u8_format['height'], + 'tbr': m3u8_format.get('tbr'), + 'ext': 'webm', + } + formats.extend([web_f, mp4_f]) + else: + for bitrate in self._BITRATES: + for ext in ('web', 'mp4'): + formats.append({ + 'format_id': '%s-%s' % (ext, bitrate), + 'url': '%spublic/%s_%d.%s' % (progressive_base, path, bitrate, ext), + 'tbr': bitrate, + 'ext': ext, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': video_data.get('CoverImageUrl'), + 'duration': int_or_none(video_data.get('Length')), + 'formats': formats, + } From 0a33bb2cb2ca401ffe88e520d7bbd7482d976cbc Mon Sep 17 00:00:00 2001 From: Steffan Donal Date: Tue, 4 Oct 2016 09:52:02 +0100 Subject: [PATCH 1814/3599] Rename "Steffan 'Ruirize' James" to "Steffan Donal" Legal name change! --- AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 937742c5d..b6456052d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -26,7 +26,7 @@ Albert Kim Pierre Rudloff Huarong Huo Ismael Mejía -Steffan 'Ruirize' James +Steffan Donal Andras Elso Jelle van der Waa Marcin Cieślak From b1d798887e5bc26c938fe8c07ae5ccf382568f58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 5 Oct 2016 23:43:08 +0700 Subject: [PATCH 1815/3599] [npo] Add support for 2doc.nl (Closes #10842) --- youtube_dl/extractor/npo.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 9c7cc777b..c3915ec6e 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -459,8 +459,9 @@ class NPOPlaylistBaseIE(NPOIE): class VPROIE(NPOPlaylistBaseIE): IE_NAME = 'vpro' - _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P[^/]+)\.html' - _PLAYLIST_TITLE_RE = r']+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:tegenlicht\.)?vpro|2doc)\.nl/(?:[^/]+/)*(?P[^/]+)\.html' + _PLAYLIST_TITLE_RE = (r']+class=["\'].*?\bmedia-platform-title\b.*?["\'][^>]*>([^<]+)', + r']+class=["\'].*?\bmedia-platform-subtitle\b.*?["\'][^>]*>([^<]+)') _PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"' _TESTS = [ @@ -492,6 +493,27 @@ class VPROIE(NPOPlaylistBaseIE): 'title': 'education education', }, 'playlist_count': 2, + }, + { + 'url': 'http://www.2doc.nl/documentaires/series/2doc/2015/oktober/de-tegenprestatie.html', + 'info_dict': { + 'id': 'de-tegenprestatie', + 'title': 'De Tegenprestatie', + }, + 'playlist_count': 2, + }, { + 'url': 'http://www.2doc.nl/speel~VARA_101375237~mh17-het-verdriet-van-nederland~.html', + 'info_dict': { + 'id': 'VARA_101375237', + 'ext': 'm4v', + 'title': 'MH17: Het verdriet van Nederland', + 'description': 'md5:09e1a37c1fdb144621e22479691a9f18', + 'upload_date': '20150716', + }, + 'params': { + # Skip because of m3u8 download + 'skip_download': True + }, } ] From 017eb829343dfff9b70ab7f2278053f35cee953c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 5 Oct 2016 18:27:02 +0100 Subject: [PATCH 1816/3599] [npo] detect geo restriction --- youtube_dl/extractor/npo.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index c3915ec6e..c91f58461 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_HTTPError from ..utils import ( fix_xml_ampersands, orderedSet, @@ -10,6 +11,7 @@ from ..utils import ( qualities, strip_jsonp, unified_strdate, + ExtractorError, ) @@ -181,9 +183,16 @@ class NPOIE(NPOBaseIE): continue streams = format_info.get('streams') if streams: - video_info = self._download_json( - streams[0] + '&type=json', - video_id, 'Downloading %s stream JSON' % format_id) + try: + video_info = self._download_json( + streams[0] + '&type=json', + video_id, 'Downloading %s stream JSON' % format_id) + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error = (self._parse_json(ee.cause.read().decode(), video_id, fatal=False) or {}).get('errorstring') + if error: + raise ExtractorError(error, expected=True) + raise else: video_info = format_info video_url = video_info.get('url') From 33898fb19c1af161c503ebce8f9a4774fecee45e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 6 Oct 2016 10:45:57 +0100 Subject: [PATCH 1817/3599] [nzz] Add new extractor(#4407) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nzz.py | 36 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/nzz.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index feee06004..72bc4f57c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -638,6 +638,7 @@ from .nytimes import ( NYTimesArticleIE, ) from .nuvid import NuvidIE +from .nzz import NZZIE from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE diff --git a/youtube_dl/extractor/nzz.py b/youtube_dl/extractor/nzz.py new file mode 100644 index 000000000..2d352f53f --- /dev/null +++ b/youtube_dl/extractor/nzz.py @@ -0,0 +1,36 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, +) + + +class NZZIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P\d+)' + _TEST = { + 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153', + 'info_dict': { + 'id': '9153', + }, + 'playlist_mincount': 6, + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + entries = [] + for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage): + player_params = extract_attributes(player_element) + if player_params.get('data-type') not in ('kaltura_singleArticle',): + self.report_warning('Unsupported player type') + continue + entry_id = player_params['data-id'] + entries.append(self.url_result( + 'kaltura:1750922:' + entry_id, 'Kaltura', entry_id)) + + return self.playlist_result(entries, page_id) From 09b9c45e242cb9e85beaa98b4783ec02065f1ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 6 Oct 2016 23:22:52 +0700 Subject: [PATCH 1818/3599] [generic] Add support for multiple vimeo embeds (Closes #10862) --- youtube_dl/extractor/generic.py | 6 +++--- youtube_dl/extractor/vimeo.py | 36 ++++++++++++++++++--------------- youtube_dl/extractor/vk.py | 2 +- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9ea306e3a..8ef8fb5f4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1754,9 +1754,9 @@ class GenericIE(InfoExtractor): if matches: return _playlist_from_matches(matches, ie='RtlNl') - vimeo_url = VimeoIE._extract_vimeo_url(url, webpage) - if vimeo_url is not None: - return self.url_result(vimeo_url) + vimeo_urls = VimeoIE._extract_urls(url, webpage) + if vimeo_urls: + return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key()) vid_me_embed_url = self._search_regex( r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 309a47bf0..ea8fc5908 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -355,23 +355,27 @@ class VimeoIE(VimeoBaseInfoExtractor): return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) @staticmethod - def _extract_vimeo_url(url, webpage): + def _extract_urls(url, webpage): + urls = [] # Look for embedded (iframe) Vimeo player - mobj = re.search( - r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) - if mobj: - player_url = unescapeHTML(mobj.group('url')) - return VimeoIE._smuggle_referrer(player_url, url) - # Look for embedded (swf embed) Vimeo player - mobj = re.search( - r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) - if mobj: - return mobj.group(1) - # Look more for non-standard embedded Vimeo player - mobj = re.search( - r']+src=(?P[\'"])(?P(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage) - if mobj: - return mobj.group('url') + for mobj in re.finditer( + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage): + urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url)) + PLAIN_EMBED_RE = ( + # Look for embedded (swf embed) Vimeo player + r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1', + # Look more for non-standard embedded Vimeo player + r']+src=(["\'])(?P(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1', + ) + for embed_re in PLAIN_EMBED_RE: + for mobj in re.finditer(embed_re, webpage): + urls.append(mobj.group('url')) + return urls + + @staticmethod + def _extract_url(url, webpage): + urls = VimeoIE._extract_urls(url, webpage) + return urls[0] if urls else None def _verify_player_video_password(self, url, video_id): password = self._downloader.params.get('videopassword') diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index ac77bc623..df43ba867 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -341,7 +341,7 @@ class VKIE(VKBaseIE): if youtube_url: return self.url_result(youtube_url, 'Youtube') - vimeo_url = VimeoIE._extract_vimeo_url(url, info_page) + vimeo_url = VimeoIE._extract_url(url, info_page) if vimeo_url is not None: return self.url_result(vimeo_url) From 831a34caa2112a9b2d867e05f8a4debf965e8389 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 03:28:41 +0800 Subject: [PATCH 1819/3599] [Makefilea] Fix for GNU make < 4 Closes #9387 The shell assignment operator != was introduced in GNU make 4.0, or specifically the commit in [1]. This fix removes such usages and fallback to a more portable syntax. Tested with: * GNU make 3.82 on CentOS 7.2 * bmake 20150910 on CentOS 7.2, source RPM from Fedora 24 [2] * GNU make 4.2.1 on Arch Linux (Arch official package) * bmake 20160926 on Arch Linux (Arch official package) * GNU make 3.82 on Arch Linux (Compiled from source) * Apple bsdmake-24 on macOS Sierra, binary package from Homebrew Thanks @bdeyal for the feedback of the first tests [1] http://git.savannah.gnu.org/cgit/make.git/commit/?id=b34438bee83ee906a23b881f257e684a0993b9b1 [2] http://koji.fedoraproject.org/koji/buildinfo?buildID=716769 --- ChangeLog | 6 ++++++ Makefile | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4f64edabb..be1cf90fb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* Support for GNU make < 4 is fixed (#9387) + + version 2016.10.02 Core diff --git a/Makefile b/Makefile index a2763a664..7393e3e1e 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi +SYSCONFDIR = $$(if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) @@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish lazy-extractors: youtube_dl/extractor/lazy_extractors.py -_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' +_EXTRACTOR_FILES = $$(find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py') youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ From c0a7b9b348bb580d32fc94ee90c1b3b02b668a9e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 16:02:53 +0800 Subject: [PATCH 1820/3599] Revert "[Makefilea] Fix for GNU make < 4" This reverts commit 831a34caa2112a9b2d867e05f8a4debf965e8389. The reverted commit breaks lazy extractors. --- ChangeLog | 6 ------ Makefile | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index be1cf90fb..4f64edabb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,3 @@ -version - -Core -* Support for GNU make < 4 is fixed (#9387) - - version 2016.10.02 Core diff --git a/Makefile b/Makefile index 7393e3e1e..a2763a664 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR = $$(if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) +SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) @@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish lazy-extractors: youtube_dl/extractor/lazy_extractors.py -_EXTRACTOR_FILES = $$(find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py') +_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ From 3d83a1ae924902a0421bea8e2e6cd57bb34ee299 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 17:50:45 +0800 Subject: [PATCH 1821/3599] [generic] Support direct MMS links (closes #10838) --- ChangeLog | 6 ++++++ youtube_dl/extractor/generic.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4f64edabb..55e60758d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [generic] Support direct MMS links (#10838) + + version 2016.10.02 Core diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8ef8fb5f4..1f18cbfe9 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1412,6 +1412,18 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 3, }, + { + # Direct MMS link + 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv', + 'info_dict': { + 'id': 'MilesReid(0709)', + 'ext': 'wmv', + 'title': 'MilesReid(0709)', + }, + 'params': { + 'skip_download': True, # rtsp downloads, requiring mplayer or mpv + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -1551,6 +1563,13 @@ class GenericIE(InfoExtractor): else: video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) + if parsed_url.scheme == 'mms': + return { + 'id': video_id, + 'title': video_id, + 'url': url, + } + self.to_screen('%s: Requesting header' % video_id) head_req = HEADRequest(url) From 98763ee354ffc13a57f28dbd006729affacb6d30 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 19:20:53 +0800 Subject: [PATCH 1822/3599] [extractor/common] Add id and title helpers for generic IEs --- youtube_dl/extractor/common.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1076b46da..da192728f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -21,6 +21,7 @@ from ..compat import ( compat_os_name, compat_str, compat_urllib_error, + compat_urllib_parse_unquote, compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, @@ -2020,6 +2021,12 @@ class InfoExtractor(object): headers['Ytdl-request-proxy'] = geo_verification_proxy return headers + def _generic_id(self, url): + return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) + + def _generic_title(self, url): + return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]) + class SearchInfoExtractor(InfoExtractor): """ From 9dcd6fd3aae77571116ee8b823b6b9224d0ef2ad Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 19:22:30 +0800 Subject: [PATCH 1823/3599] [generic,commonprotocols] Move mms suuport from GenericIE And use _generic_* helpers in those extractors --- ChangeLog | 2 +- youtube_dl/extractor/commonprotocols.py | 36 ++++++++++++++++++++----- youtube_dl/extractor/generic.py | 24 ++--------------- 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/ChangeLog b/ChangeLog index 55e60758d..3aa4d67f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,7 @@ version Extractors -+ [generic] Support direct MMS links (#10838) ++ [commonprotocols] Support direct MMS links (#10838) version 2016.10.02 diff --git a/youtube_dl/extractor/commonprotocols.py b/youtube_dl/extractor/commonprotocols.py index 5d130a170..d98331a4e 100644 --- a/youtube_dl/extractor/commonprotocols.py +++ b/youtube_dl/extractor/commonprotocols.py @@ -1,13 +1,9 @@ from __future__ import unicode_literals -import os - from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_unquote, compat_urlparse, ) -from ..utils import url_basename class RtmpIE(InfoExtractor): @@ -23,8 +19,8 @@ class RtmpIE(InfoExtractor): }] def _real_extract(self, url): - video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) - title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]) + video_id = self._generic_id(url) + title = self._generic_title(url) return { 'id': video_id, 'title': title, @@ -34,3 +30,31 @@ class RtmpIE(InfoExtractor): 'format_id': compat_urlparse.urlparse(url).scheme, }], } + + +class MmsIE(InfoExtractor): + IE_DESC = False # Do not list + _VALID_URL = r'(?i)mms://.+' + + _TEST = { + # Direct MMS link + 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv', + 'info_dict': { + 'id': 'MilesReid(0709)', + 'ext': 'wmv', + 'title': 'MilesReid(0709)', + }, + 'params': { + 'skip_download': True, # rtsp downloads, requiring mplayer or mpv + }, + } + + def _real_extract(self, url): + video_id = self._generic_id(url) + title = self._generic_title(url) + + return { + 'id': video_id, + 'title': title, + 'url': url, + } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1f18cbfe9..7b8a9cf9a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -27,7 +27,6 @@ from ..utils import ( unified_strdate, unsmuggle_url, UnsupportedError, - url_basename, xpath_text, ) from .brightcove import ( @@ -1412,18 +1411,6 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 3, }, - { - # Direct MMS link - 'url': 'mms://kentro.kaist.ac.kr/200907/MilesReid(0709).wmv', - 'info_dict': { - 'id': 'MilesReid(0709)', - 'ext': 'wmv', - 'title': 'MilesReid(0709)', - }, - 'params': { - 'skip_download': True, # rtsp downloads, requiring mplayer or mpv - }, - }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -1561,14 +1548,7 @@ class GenericIE(InfoExtractor): force_videoid = smuggled_data['force_videoid'] video_id = force_videoid else: - video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) - - if parsed_url.scheme == 'mms': - return { - 'id': video_id, - 'title': video_id, - 'url': url, - } + video_id = self._generic_id(url) self.to_screen('%s: Requesting header' % video_id) @@ -1597,7 +1577,7 @@ class GenericIE(InfoExtractor): info_dict = { 'id': video_id, - 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), + 'title': self._generic_title(url), 'upload_date': unified_strdate(head_response.headers.get('Last-Modified')) } From 85bcdd081ce0009bcb7135d8d68192d34969e168 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 19:31:26 +0800 Subject: [PATCH 1824/3599] [extractors] Add MmsIE --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 72bc4f57c..5c1d2abfb 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -186,7 +186,10 @@ from .comedycentral import ( ) from .comcarcoff import ComCarCoffIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE -from .commonprotocols import RtmpIE +from .commonprotocols import ( + MmsIE, + RtmpIE, +) from .condenast import CondeNastIE from .cracked import CrackedIE from .crackle import CrackleIE From 38588ab9770813cb92013b870edc15def4f9ac1c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 7 Oct 2016 20:04:49 +0800 Subject: [PATCH 1825/3599] [facebook] Fix for new handleServerJS syntax (closes #10846) According to the dump file in #10846, handleServerJS() now accepts an optional second argument. It's a string from available dump files. --- ChangeLog | 1 + youtube_dl/extractor/facebook.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3aa4d67f5..7aa0787ca 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [facebook] Fix video extraction (#10846) + [commonprotocols] Support direct MMS links (#10838) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 3a220e995..801573459 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -258,7 +258,7 @@ class FacebookIE(InfoExtractor): if not video_data: server_js_data = self._parse_json(self._search_regex( - r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id) + r'handleServerJS\(({.+})(?:\);|,")', webpage, 'server js data', default='{}'), video_id) for item in server_js_data.get('instances', []): if item[1][0] == 'VideoConfig': video_data = video_data_list2dict(item[2][0]['videoData']) From 3c6b3bf2217e91c0d01ca65fa2b013ffa132fdbc Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 7 Oct 2016 15:53:03 +0100 Subject: [PATCH 1826/3599] [iprima] detect geo restriction --- youtube_dl/extractor/iprima.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 788bbe0d5..da2cdc656 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -81,6 +81,9 @@ class IPrimaIE(InfoExtractor): for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage): extract_formats(src) + if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage: + self.raise_geo_restricted() + self._sort_formats(formats) return { From f475e8812197027ba7770a421e7fc7094ee8ae0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Oct 2016 22:15:26 +0700 Subject: [PATCH 1827/3599] [vimeo] PEP 8 [ci skip] --- youtube_dl/extractor/vimeo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index ea8fc5908..a46c5c282 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -359,7 +359,8 @@ class VimeoIE(VimeoBaseInfoExtractor): urls = [] # Look for embedded (iframe) Vimeo player for mobj in re.finditer( - r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage): + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', + webpage): urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url)) PLAIN_EMBED_RE = ( # Look for embedded (swf embed) Vimeo player From 888f8d6ba40f17d8f8a13ca6259e0312d9befc87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Oct 2016 22:23:16 +0700 Subject: [PATCH 1828/3599] [ChangeLog] Actualize --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index 7aa0787ca..fb248f9e9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,21 @@ version Extractors ++ [iprima] Detect geo restriction * [facebook] Fix video extraction (#10846) + [commonprotocols] Support direct MMS links (#10838) ++ [generic] Add support for multiple vimeo embeds (#10862) ++ [nzz] Add support for nzz.ch (#4407) ++ [npo] Detect geo restriction ++ [npo] Add support for 2doc.nl (#10842) ++ [lego] Add support for lego.com (#10369) ++ [tonline] Add support for t-online.de (#10376) +* [techtalks] Relax URL regular expression (#10840) +* [youtube:live] Extend URL regular expression (#10839) ++ [theweatherchannel] Add support for weather.com (#7188) ++ [thisoldhouse] Add support for thisoldhouse.com (#10837) ++ [nhl] Add support for wch2016.com (#10833) +* [pornoxo] Use JWPlatform to improve metadata extraction version 2016.10.02 From dd4291f72984037286dfd1800fdc07204b0b621a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 7 Oct 2016 22:25:30 +0700 Subject: [PATCH 1829/3599] release 2016.10.07 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 5 +++++ youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index e813e4c59..15a93776b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.02** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.07** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.02 +[debug] youtube-dl version 2016.10.07 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index fb248f9e9..7e9b2b873 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.07 Extractors + [iprima] Detect geo restriction diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 828ed0ba9..5bbef0c41 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -364,6 +364,7 @@ - **Le**: 乐视网 - **Learnr** - **Lecture2Go** + - **LEGO** - **Lemonde** - **LePlaylist** - **LetvCloud**: 乐视云 @@ -507,6 +508,7 @@ - **Nuvid** - **NYTimes** - **NYTimesArticle** + - **NZZ** - **ocw.mit.edu** - **OdaTV** - **Odnoklassniki** @@ -692,6 +694,7 @@ - **SWRMediathek** - **Syfy** - **SztvHu** + - **t-online.de** - **Tagesschau** - **tagesschau:player** - **Tass** @@ -721,8 +724,10 @@ - **TheScene** - **TheSixtyOne** - **TheStar** + - **TheWeatherChannel** - **ThisAmericanLife** - **ThisAV** + - **ThisOldHouse** - **tinypic**: tinypic.com videos - **tlc.de** - **TMZ** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 161ba4391..ac0921b7a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.02' +__version__ = '2016.10.07' From 1dd58e14d846a64a3c014531b1dc7a377648c73b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 8 Oct 2016 08:33:02 +0100 Subject: [PATCH 1830/3599] [lego] improve info extraction and bypass geo restriction(closes #10872) --- youtube_dl/extractor/lego.py | 88 ++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py index 5be7d622c..d3bca6435 100644 --- a/youtube_dl/extractor/lego.py +++ b/youtube_dl/extractor/lego.py @@ -1,45 +1,86 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( unescapeHTML, - int_or_none, + parse_duration, + get_element_by_class, ) class LEGOIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lego\.com/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P[0-9a-f]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P[0-9a-f]+)' + _TESTS = [{ 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1', 'md5': 'f34468f176cfd76488767fc162c405fa', 'info_dict': { 'id': '55492d823b1b4d5e985787fa8c2973b1', 'ext': 'mp4', 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi', - } - } + 'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi', + }, + }, { + # geo-restricted but the contentUrl contain a valid url + 'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399', + 'md5': '4c3fec48a12e40c6e5995abc3d36cc2e', + 'info_dict': { + 'id': '13bdc2299ab24d9685701a915b3d71e7', + 'ext': 'mp4', + 'title': 'Aflevering 20 - Helden van het koninkrijk', + 'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941', + }, + }, { + # special characters in title + 'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d', + 'info_dict': { + 'id': '9685ee9d12e84ff38e84b4e3d0db533d', + 'ext': 'mp4', + 'title': 'Force Surprise – LEGO® Star Wars™ Microfighters', + 'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3', + }, + 'params': { + 'skip_download': True, + }, + }] _BITRATES = [256, 512, 1024, 1536, 2560] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://www.lego.com/en-US/mediaplayer/video/' + video_id, video_id) - title = self._search_regex(r'(.+?)', webpage, 'title') - video_data = self._parse_json(unescapeHTML(self._search_regex( - r"video='([^']+)'", webpage, 'video data')), video_id) - progressive_base = self._search_regex( - r'data-video-progressive-url="([^"]+)"', - webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/') - streaming_base = self._search_regex( - r'data-video-streaming-url="([^"]+)"', - webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/') - item_id = video_data['ItemId'] + locale, video_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id) + title = get_element_by_class('video-header', webpage).strip() + progressive_base = 'https://lc-mediaplayerns-live-s.legocdn.com/' + streaming_base = 'http://legoprod-f.akamaihd.net/' + content_url = self._html_search_meta('contentUrl', webpage) + path = self._search_regex( + r'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)', + content_url, 'video path', default=None) + if not path: + player_url = self._proto_relative_url(self._search_regex( + r']+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)', + webpage, 'player url', default=None)) + if not player_url: + base_url = self._proto_relative_url(self._search_regex( + r'data-baseurl="([^"]+)"', webpage, 'base url', + default='http://www.lego.com/%s/mediaplayer/video/' % locale)) + player_url = base_url + video_id + player_webpage = self._download_webpage(player_url, video_id) + video_data = self._parse_json(unescapeHTML(self._search_regex( + r"video='([^']+)'", player_webpage, 'video data')), video_id) + progressive_base = self._search_regex( + r'data-video-progressive-url="([^"]+)"', + player_webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/') + streaming_base = self._search_regex( + r'data-video-streaming-url="([^"]+)"', + player_webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/') + item_id = video_data['ItemId'] - net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]]) - base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])]) - path = '/'.join([net_storage_path, base_path]) + net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]]) + base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])]) + path = '/'.join([net_storage_path, base_path]) streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES)) formats = self._extract_akamai_formats( @@ -80,7 +121,8 @@ class LEGOIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'thumbnail': video_data.get('CoverImageUrl'), - 'duration': int_or_none(video_data.get('Length')), + 'description': self._html_search_meta('description', webpage), + 'thumbnail': self._html_search_meta('thumbnail', webpage), + 'duration': parse_duration(self._html_search_meta('duration', webpage)), 'formats': formats, } From 3adb9d119e049d2bbc92fe2b56f1a22f4a664892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Fri, 30 Sep 2016 19:54:12 +0200 Subject: [PATCH 1831/3599] [reverbnation] Modernize --- youtube_dl/extractor/reverbnation.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py index 3c6725aeb..52f18e231 100644 --- a/youtube_dl/extractor/reverbnation.py +++ b/youtube_dl/extractor/reverbnation.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import str_or_none @@ -10,20 +8,19 @@ class ReverbNationIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' _TESTS = [{ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', - 'md5': '3da12ebca28c67c111a7f8b262d3f7a7', + 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', 'info_dict': { 'id': '16965047', 'ext': 'mp3', 'title': 'MONA LISA', 'uploader': 'ALKILADOS', 'uploader_id': '216429', - 'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$' + 'thumbnail': 're:^https?://.*\.jpg', }, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - song_id = mobj.group('id') + song_id = self._match_id(url) api_res = self._download_json( 'https://api.reverbnation.com/song/%s' % song_id, @@ -31,14 +28,24 @@ class ReverbNationIE(InfoExtractor): note='Downloading information of song %s' % song_id ) + thumbnails = [] + if api_res.get('image'): + thumbnails.append({ + 'url': api_res.get('image'), + }) + if api_res.get('thumbnail'): + thumbnails.append({ + 'url': api_res.get('thumbnail'), + 'preference': -2, + }) + return { 'id': song_id, - 'title': api_res.get('name'), - 'url': api_res.get('url'), + 'title': api_res['name'], + 'url': api_res['url'], 'uploader': api_res.get('artist', {}).get('name'), 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), - 'thumbnail': self._proto_relative_url( - api_res.get('image', api_res.get('thumbnail'))), + 'thumbnails': thumbnails, 'ext': 'mp3', 'vcodec': 'none', } From f68901e50a9286aa4d82348cac0e85e26359c81c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Oct 2016 01:02:35 +0700 Subject: [PATCH 1832/3599] [reverbnation] Eliminate code duplication in thumbnails extraction --- youtube_dl/extractor/reverbnation.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py index 52f18e231..4875009e5 100644 --- a/youtube_dl/extractor/reverbnation.py +++ b/youtube_dl/extractor/reverbnation.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import str_or_none +from ..utils import ( + qualities, + str_or_none, +) class ReverbNationIE(InfoExtractor): @@ -28,16 +31,15 @@ class ReverbNationIE(InfoExtractor): note='Downloading information of song %s' % song_id ) + THUMBNAILS = ('thumbnail', 'image') + quality = qualities(THUMBNAILS) thumbnails = [] - if api_res.get('image'): - thumbnails.append({ - 'url': api_res.get('image'), - }) - if api_res.get('thumbnail'): - thumbnails.append({ - 'url': api_res.get('thumbnail'), - 'preference': -2, - }) + for thumb_key in THUMBNAILS: + if api_res.get(thumb_key): + thumbnails.append({ + 'url': api_res[thumb_key], + 'preference': quality(thumb_key) + }) return { 'id': song_id, From 2b51dac1f9750f6eb4988f3c23b0e8f618136b6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Sat, 1 Oct 2016 13:57:18 +0200 Subject: [PATCH 1833/3599] [slutload] Fix test and simplify --- youtube_dl/extractor/slutload.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py index 7efb29f65..18cc7721e 100644 --- a/youtube_dl/extractor/slutload.py +++ b/youtube_dl/extractor/slutload.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -9,7 +7,7 @@ class SlutloadIE(InfoExtractor): _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P[^/]+)/?$' _TEST = { 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', - 'md5': '0cf531ae8006b530bd9df947a6a0df77', + 'md5': '868309628ba00fd488cf516a113fd717', 'info_dict': { 'id': 'TD73btpBqSxc', 'ext': 'mp4', @@ -20,9 +18,7 @@ class SlutloadIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_title = self._html_search_regex(r'

([^<]+)', From 8204c733523675d505a8c726ec65b65e15485ce1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 18:22:55 +0800 Subject: [PATCH 1834/3599] [Makefile] Fix for GNU make < 4 (closes #9387) Shell assignment operator in BSD make != is ported to GNU make in version 4.0, so 3.x doesn't work. I choose to drop BSD make support as installing GNU make on *BSD systems is easier than installing newer GNU make. --- ChangeLog | 6 ++++++ Makefile | 4 ++-- README.md | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7e9b2b873..3d3473a4b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Core +* [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) + + version 2016.10.07 Extractors diff --git a/Makefile b/Makefile index a2763a664..8d66e48c9 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi +SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) @@ -90,7 +90,7 @@ fish-completion: youtube-dl.fish lazy-extractors: youtube_dl/extractor/lazy_extractors.py -_EXTRACTOR_FILES != find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py' +_EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py') youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ diff --git a/README.md b/README.md index 4debe15fe..1cb44b2cf 100644 --- a/README.md +++ b/README.md @@ -923,7 +923,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file If you want to create a build of youtube-dl yourself, you'll need * python -* make (both GNU make and BSD make are supported) +* make (only GNU make is supported) * pandoc * zip * nosetests From b0082629a9cf65796d503786c45c144d992010e7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 18:42:15 +0800 Subject: [PATCH 1835/3599] =?UTF-8?q?[nextmedia]=20Support=20action=20news?= =?UTF-8?q?=20(=E5=8B=95=E6=96=B0=E8=81=9E)=20on=20Apple=20Daily?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog | 3 +++ youtube_dl/extractor/nextmedia.py | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 3d3473a4b..f74c6b5a4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ version Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) +Extractors ++ [nextmedia] Recognize action news on AppleDaily + version 2016.10.07 diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index a08e48c4b..dee9056d3 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' + _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews|actionnews)/[^/]+/[^/]+/(?P\d+)/(?P\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', @@ -154,6 +154,9 @@ class AppleDailyIE(NextMediaIE): 'description': 'md5:7b859991a6a4fedbdf3dd3b66545c748', 'upload_date': '20140417', }, + }, { + 'url': 'http://www.appledaily.com.tw/actionnews/appledaily/7/20161003/960588/', + 'only_matching': True, }] _URL_PATTERN = r'\{url: \'(.+)\'\}' From 65f4c1de3d442a49367597a80687fddcf3d142a2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 18:58:15 +0800 Subject: [PATCH 1836/3599] [allocine] Fix extraction (closes #10860) I change the URL of the third test case, because now the original URL does not contain a video anymore, and there's no easy to get the real URL from the /film/ one. --- ChangeLog | 1 + youtube_dl/extractor/allocine.py | 57 ++++++++++++-------------------- 2 files changed, 22 insertions(+), 36 deletions(-) diff --git a/ChangeLog b/ChangeLog index f74c6b5a4..6c6053a2a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 7d280d871..b292ffdd9 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -1,29 +1,25 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import json - from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( qualities, - unescapeHTML, - xpath_element, + url_basename, ) class AllocineIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?Particle|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P[0-9]+)(?:\.html)?' + _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P[0-9]+)(?:\.html)?' _TESTS = [{ 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', 'md5': '0c9fcf59a841f65635fa300ac43d8269', 'info_dict': { 'id': '19546517', + 'display_id': '18635087', 'ext': 'mp4', 'title': 'Astérix - Le Domaine des Dieux Teaser VF', - 'description': 'md5:abcd09ce503c6560512c14ebfdb720d2', + 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -31,19 +27,21 @@ class AllocineIE(InfoExtractor): 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', 'info_dict': { 'id': '19540403', + 'display_id': '19540403', 'ext': 'mp4', 'title': 'Planes 2 Bande-annonce VF', 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', 'thumbnail': 're:http://.*\.jpg', }, }, { - 'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html', + 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html', 'md5': '101250fb127ef9ca3d73186ff22a47ce', 'info_dict': { 'id': '19544709', + 'display_id': '19544709', 'ext': 'mp4', 'title': 'Dragons 2 - Bande annonce finale VF', - 'description': 'md5:601d15393ac40f249648ef000720e7e3', + 'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -52,43 +50,30 @@ class AllocineIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - typ = mobj.group('typ') - display_id = mobj.group('id') + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - if typ == 'film': - video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id') - else: - player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player', default=None) - if player: - player_data = json.loads(player) - video_id = compat_str(player_data['refMedia']) - else: - model = self._search_regex(r'data-model="([^"]+)">', webpage, 'data model') - model_data = self._parse_json(unescapeHTML(model), display_id) - video_id = compat_str(model_data['id']) + model = self._html_search_regex( + r'data-model="([^"]+)"', webpage, 'data model') + model_data = self._parse_json(model, display_id) - xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) - - video = xpath_element(xml, './/AcVisionVideo').attrib quality = qualities(['ld', 'md', 'hd']) formats = [] - for k, v in video.items(): - if re.match(r'.+_path', k): - format_id = k.split('_')[0] - formats.append({ - 'format_id': format_id, - 'quality': quality(format_id), - 'url': v, - }) + for video_url in model_data['sources'].values(): + video_id, format_id = url_basename(video_url).split('_')[:2] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': video_url, + }) self._sort_formats(formats) return { 'id': video_id, - 'title': video['videoTitle'], + 'display_id': display_id, + 'title': model_data['title'], 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'description': self._og_search_description(webpage), From 176006a1202cc6ef3d0a768ace41f97516c76c6d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 9 Oct 2016 19:41:44 +0800 Subject: [PATCH 1837/3599] [allocine] Fix for /video/ videos (closes #10860) --- youtube_dl/extractor/allocine.py | 58 ++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index b292ffdd9..517b06def 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + remove_end, qualities, url_basename, ) @@ -46,7 +47,14 @@ class AllocineIE(InfoExtractor): }, }, { 'url': 'http://www.allocine.fr/video/video-19550147/', - 'only_matching': True, + 'md5': '3566c0668c0235e2d224fd8edb389f67', + 'info_dict': { + 'id': '19550147', + 'ext': 'mp4', + 'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger', + 'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354', + 'thumbnail': 're:http://.*\.jpg', + }, }] def _real_extract(self, url): @@ -54,26 +62,48 @@ class AllocineIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - model = self._html_search_regex( - r'data-model="([^"]+)"', webpage, 'data model') - model_data = self._parse_json(model, display_id) - + formats = [] quality = qualities(['ld', 'md', 'hd']) - formats = [] - for video_url in model_data['sources'].values(): - video_id, format_id = url_basename(video_url).split('_')[:2] - formats.append({ - 'format_id': format_id, - 'quality': quality(format_id), - 'url': video_url, - }) + model = self._html_search_regex( + r'data-model="([^"]+)"', webpage, 'data model', default=None) + if model: + model_data = self._parse_json(model, display_id) + + for video_url in model_data['sources'].values(): + video_id, format_id = url_basename(video_url).split('_')[:2] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': video_url, + }) + + title = model_data['title'] + else: + video_id = display_id + media_data = self._download_json( + 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) + for key, value in media_data['video'].items(): + if not key.endswith('Path'): + continue + + format_id = key[:-len('Path')] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': value, + }) + + title = remove_end(self._html_search_regex( + r'(?s)(.+?)', webpage, 'title' + ).strip(), ' - AlloCiné') + self._sort_formats(formats) return { 'id': video_id, 'display_id': display_id, - 'title': model_data['title'], + 'title': title, 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'description': self._og_search_description(webpage), From 71cdcb23316b55baf9330741ede4c77d08e4d77f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 12:30:35 +0800 Subject: [PATCH 1838/3599] [hbo] Support episode pages (closes #10892) --- ChangeLog | 1 + youtube_dl/extractor/hbo.py | 63 ++++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6c6053a2a..d49682c8b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors ++ [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index dad0f3994..3606d64fd 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -12,17 +12,7 @@ from ..utils import ( ) -class HBOIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P[0-9]+)' - _TEST = { - 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', - 'md5': '1c33253f0c7782142c993c0ba62a8753', - 'info_dict': { - 'id': '1437839', - 'ext': 'mp4', - 'title': 'Ep. 64 Clip: Encryption', - } - } +class HBOBaseIE(InfoExtractor): _FORMATS_INFO = { '1920': { 'width': 1280, @@ -50,8 +40,7 @@ class HBOIE(InfoExtractor): }, } - def _real_extract(self, url): - video_id = self._match_id(url) + def _extract_from_id(self, video_id): video_data = self._download_xml( 'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id) title = xpath_text(video_data, 'title', 'title', True) @@ -116,7 +105,53 @@ class HBOIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'duration': parse_duration(xpath_element(video_data, 'duration/tv14')), + 'duration': parse_duration(xpath_text(video_data, 'duration/tv14')), 'formats': formats, 'thumbnails': thumbnails, } + + +class HBOIE(HBOBaseIE): + _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P[0-9]+)' + _TEST = { + 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', + 'md5': '1c33253f0c7782142c993c0ba62a8753', + 'info_dict': { + 'id': '1437839', + 'ext': 'mp4', + 'title': 'Ep. 64 Clip: Encryption', + 'thumbnail': 're:https?://.*\.jpg$', + 'duration': 1072, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + return self._extract_from_id(video_id) + + +class HBOEpisodeIE(HBOBaseIE): + _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P[0-9a-z-]+)\.html' + + _TESTS = [{ + 'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true', + 'md5': '689132b253cc0ab7434237fc3a293210', + 'info_dict': { + 'id': '1439518', + 'ext': 'mp4', + 'title': 'Ep. 52: Inside the Episode', + 'thumbnail': 're:https?://.*\.jpg$', + 'duration': 240, + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'(?P[\'"])videoId(?P=q1)\s*:\s*(?P[\'"])(?P\d+)(?P=q2)', + webpage, 'video ID', group='video_id') + + return self._extract_from_id(video_id) From 27b8d2ee9535e19cdaed69de7a08ba0e026700e0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 12:41:30 +0800 Subject: [PATCH 1839/3599] [hbo] Add display_id and another test (#10892) --- youtube_dl/extractor/hbo.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py index 3606d64fd..cbf774377 100644 --- a/youtube_dl/extractor/hbo.py +++ b/youtube_dl/extractor/hbo.py @@ -138,11 +138,15 @@ class HBOEpisodeIE(HBOBaseIE): 'md5': '689132b253cc0ab7434237fc3a293210', 'info_dict': { 'id': '1439518', + 'display_id': 'ep-52-inside-the-episode', 'ext': 'mp4', 'title': 'Ep. 52: Inside the Episode', 'thumbnail': 're:https?://.*\.jpg$', 'duration': 240, }, + }, { + 'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true', + 'only_matching': True, }] def _real_extract(self, url): @@ -154,4 +158,7 @@ class HBOEpisodeIE(HBOBaseIE): r'(?P[\'"])videoId(?P=q1)\s*:\s*(?P[\'"])(?P\d+)(?P=q2)', webpage, 'video ID', group='video_id') - return self._extract_from_id(video_id) + info_dict = self._extract_from_id(video_id) + info_dict['display_id'] = display_id + + return info_dict From f165ca70eb4f7911949278e17751092a3cc8619f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 12:53:27 +0800 Subject: [PATCH 1840/3599] [abc.net.au:iview] Fix for non-series videos (closes #10895) --- ChangeLog | 1 + youtube_dl/extractor/abc.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index d49682c8b..26f01790a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [abc.net.au:iview] Fix for standalone (non series) videos (#10895) + [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 465249bbf..0247cabf9 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -102,16 +102,16 @@ class ABCIViewIE(InfoExtractor): # ABC iview programs are normally available for 14 days only. _TESTS = [{ - 'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00', - 'md5': '979d10b2939101f0d27a06b79edad536', + 'url': 'http://iview.abc.net.au/programs/diaries-of-a-broken-mind/ZX9735A001S00', + 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', 'info_dict': { - 'id': 'FA1505V024S00', + 'id': 'ZX9735A001S00', 'ext': 'mp4', - 'title': 'Series 27 Ep 24', - 'description': 'md5:b28baeae7504d1148e1d2f0e3ed3c15d', - 'upload_date': '20160820', - 'uploader_id': 'abc1', - 'timestamp': 1471719600, + 'title': 'Diaries Of A Broken Mind', + 'description': 'md5:7de3903874b7a1be279fe6b68718fc9e', + 'upload_date': '20161010', + 'uploader_id': 'abc2', + 'timestamp': 1476064920, }, 'skip': 'Video gone', }] @@ -121,7 +121,7 @@ class ABCIViewIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_params = self._parse_json(self._search_regex( r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id) - title = video_params['title'] + title = video_params.get('title') or video_params['seriesTitle'] stream = next(s for s in video_params['playlist'] if s.get('type') == 'program') formats = self._extract_akamai_formats(stream['hds-unmetered'], video_id) @@ -144,8 +144,8 @@ class ABCIViewIE(InfoExtractor): 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), 'series': video_params.get('seriesTitle'), 'series_id': video_params.get('seriesHouseNumber') or video_id[:7], - 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage)), - 'episode': self._html_search_meta('episode_title', webpage), + 'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)), + 'episode': self._html_search_meta('episode_title', webpage, default=None), 'uploader_id': video_params.get('channel'), 'formats': formats, 'subtitles': subtitles, From 555787d717985531b3beba566cb976fd3f849aaa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 17:44:35 +0800 Subject: [PATCH 1841/3599] [streamable] Add helper for extracting embedded videos --- youtube_dl/extractor/streamable.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py index 1c61437a4..56b926448 100644 --- a/youtube_dl/extractor/streamable.py +++ b/youtube_dl/extractor/streamable.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -48,6 +50,15 @@ class StreamableIE(InfoExtractor): } ] + @staticmethod + def _extract_url(webpage): + print(webpage) + mobj = re.search( + r']+src=(?P[\'"])(?P(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)', + webpage) + if mobj: + return mobj.group('src') + def _real_extract(self, url): video_id = self._match_id(url) From c452e69d3d3e6bbbec298a5d4b032cb502cef0ab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 17:46:13 +0800 Subject: [PATCH 1842/3599] [footyroom] Fix extraction and update _TESTS (closes #10810) --- ChangeLog | 1 + youtube_dl/extractor/footyroom.py | 32 ++++++++++++++++++------------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 26f01790a..76c446a6d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [footyroom] Fix extraction (#10810) * [abc.net.au:iview] Fix for standalone (non series) videos (#10895) + [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py index d2503ae2e..118325b6d 100644 --- a/youtube_dl/extractor/footyroom.py +++ b/youtube_dl/extractor/footyroom.py @@ -2,25 +2,27 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .streamable import StreamableIE class FootyRoomIE(InfoExtractor): - _VALID_URL = r'https?://footyroom\.com/(?P[^/]+)' + _VALID_URL = r'https?://footyroom\.com/matches/(?P\d+)' _TESTS = [{ - 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/', + 'url': 'http://footyroom.com/matches/79922154/hull-city-vs-chelsea/review', 'info_dict': { - 'id': 'schalke-04-0-2-real-madrid-2015-02', - 'title': 'Schalke 04 0 – 2 Real Madrid', + 'id': '79922154', + 'title': 'VIDEO Hull City 0 - 2 Chelsea', }, - 'playlist_count': 3, - 'skip': 'Video for this match is not available', + 'playlist_count': 2, + 'add_ie': [StreamableIE.ie_key()], }, { - 'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/', + 'url': 'http://footyroom.com/matches/75817984/georgia-vs-germany/review', 'info_dict': { - 'id': 'georgia-0-2-germany-2015-03', - 'title': 'Georgia 0 – 2 Germany', + 'id': '75817984', + 'title': 'VIDEO Georgia 0 - 2 Germany', }, 'playlist_count': 1, + 'add_ie': ['Playwire'] }] def _real_extract(self, url): @@ -28,9 +30,8 @@ class FootyRoomIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - playlist = self._parse_json( - self._search_regex( - r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'), + playlist = self._parse_json(self._search_regex( + r'DataStore\.media\s*=\s*([^;]+)', webpage, 'media data'), playlist_id) playlist_title = self._og_search_title(webpage) @@ -40,11 +41,16 @@ class FootyRoomIE(InfoExtractor): payload = video.get('payload') if not payload: continue - playwire_url = self._search_regex( + playwire_url = self._html_search_regex( r'data-config="([^"]+)"', payload, 'playwire url', default=None) if playwire_url: entries.append(self.url_result(self._proto_relative_url( playwire_url, 'http:'), 'Playwire')) + streamable_url = StreamableIE._extract_url(payload) + if streamable_url: + entries.append(self.url_result( + streamable_url, StreamableIE.ie_key())) + return self.playlist_result(entries, playlist_id, playlist_title) From 3d643f4cec5ded2be9958d5cd0e31176b2074e37 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 17:46:52 +0800 Subject: [PATCH 1843/3599] [hbo] Add HBOEpisodeIE (#10892) --- youtube_dl/extractor/extractors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5c1d2abfb..08bed8b0c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -348,7 +348,10 @@ from .goshgay import GoshgayIE from .gputechconf import GPUTechConfIE from .groupon import GrouponIE from .hark import HarkIE -from .hbo import HBOIE +from .hbo import ( + HBOIE, + HBOEpisodeIE, +) from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE From 55642487f072565bea3b2826b836a1a3159a3807 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 11 Oct 2016 20:50:52 +0800 Subject: [PATCH 1844/3599] [nhl] Skip invalid m3u8 formats (closes #10713) --- ChangeLog | 1 + youtube_dl/extractor/nhl.py | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 76c446a6d..9a7e7133b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -4,6 +4,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [nhl] Correctly handle invalid formats (#10713) * [footyroom] Fix extraction (#10810) * [abc.net.au:iview] Fix for standalone (non series) videos (#10895) + [hbo] Add support for episode pages (#10892) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 26149c88f..62ce800c0 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -274,6 +274,18 @@ class NHLIE(InfoExtractor): 'upload_date': '20160204', 'timestamp': 1454544904, }, + }, { + # Some m3u8 URLs are invalid (https://github.com/rg3/youtube-dl/issues/10713) + 'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003', + 'md5': '50b2bb47f405121484dda3ccbea25459', + 'info_dict': { + 'id': '44315003', + 'ext': 'mp4', + 'title': 'Poile, Laviolette on Subban trade', + 'description': 'General manager David Poile and head coach Peter Laviolette share their thoughts on acquiring P.K. Subban from Montreal (06/29/16)', + 'timestamp': 1467242866, + 'upload_date': '20160629', + }, }, { 'url': 'https://www.wch2016.com/video/caneur-best-of-game-2-micd-up/t-281230378/c-44983703', 'only_matching': True, @@ -301,9 +313,11 @@ class NHLIE(InfoExtractor): continue ext = determine_ext(playback_url) if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( playback_url, video_id, 'mp4', 'm3u8_native', - m3u8_id=playback.get('name', 'hls'), fatal=False)) + m3u8_id=playback.get('name', 'hls'), fatal=False) + self._check_formats(m3u8_formats, video_id) + formats.extend(m3u8_formats) else: height = int_or_none(playback.get('height')) formats.append({ From cea364f70c97dad933fa38698f3c9df1bdb485cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 01:40:28 +0800 Subject: [PATCH 1845/3599] [extractor/common] Support HTML media elements without child nodes --- ChangeLog | 1 + youtube_dl/extractor/common.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 9a7e7133b..49488c888 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Core ++ Support HTML media elements without child nodes * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da192728f..431cef831 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1802,7 +1802,11 @@ class InfoExtractor(object): return is_plain_url, formats entries = [] - for media_tag, media_type, media_content in re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage): + media_tags = [(media_tag, media_type, '') + for media_tag, media_type + in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)] + media_tags.extend(re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)', webpage)) + for media_tag, media_type, media_content in media_tags: media_info = { 'formats': [], 'subtitles': {}, From 6f20b65e728ee30d9b987a39932a3355501f7f67 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 01:41:41 +0800 Subject: [PATCH 1846/3599] [test/test_http] Update tests After switching to HTML5 extraction helpers in generic.py, the result info_dict is always a playlist. --- test/test_http.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_http.py b/test/test_http.py index fdc68ccb4..bb0a098e4 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -87,7 +87,7 @@ class TestHTTP(unittest.TestCase): ydl = YoutubeDL({'logger': FakeLogger()}) r = ydl.extract_info('http://localhost:%d/302' % self.port) - self.assertEqual(r['url'], 'http://localhost:%d/vid.mp4' % self.port) + self.assertEqual(r['entries'][0]['url'], 'http://localhost:%d/vid.mp4' % self.port) class TestHTTPS(unittest.TestCase): @@ -111,7 +111,7 @@ class TestHTTPS(unittest.TestCase): ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True}) r = ydl.extract_info('https://localhost:%d/video.html' % self.port) - self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port) + self.assertEqual(r['entries'][0]['url'], 'https://localhost:%d/vid.mp4' % self.port) def _build_proxy_handler(name): From a093cfc78b584a6e5dbc4bbca525f9e40af9522d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 01:48:06 +0800 Subject: [PATCH 1847/3599] [vimeo:review] Fix extraction (#10900) Now Vimeo Review videos uses React. Thanks @davekaro for analyzing the problem! --- ChangeLog | 1 + youtube_dl/extractor/vimeo.py | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 49488c888..3e16a2cb3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [vimeo:review] Fix extraction (#10900) * [nhl] Correctly handle invalid formats (#10713) * [footyroom] Fix extraction (#10810) * [abc.net.au:iview] Fix for standalone (non series) videos (#10895) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a46c5c282..b566241cc 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -837,6 +837,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): 'params': { 'videopassword': 'holygrail', }, + 'skip': 'video gone', }] def _real_initialize(self): @@ -844,9 +845,10 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): def _get_config_url(self, webpage_url, video_id, video_password_verified=False): webpage = self._download_webpage(webpage_url, video_id) - config_url = self._html_search_regex( - r'data-config-url="([^"]+)"', webpage, 'config URL', - default=NO_DEFAULT if video_password_verified else None) + data = self._parse_json(self._search_regex( + r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data', + default=NO_DEFAULT if video_password_verified else '{}'), video_id) + config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl') if config_url is None: self._verify_video_password(webpage_url, video_id, webpage) config_url = self._get_config_url( From 9feb1c97318bbd575af6c2737dfe66412e1c0bb6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 12 Oct 2016 21:45:49 +0800 Subject: [PATCH 1848/3599] [dailymotion] Fix extraction and update _TESTS Closes #10901 Seems all videos use player V5 syntax now --- ChangeLog | 1 + youtube_dl/extractor/dailymotion.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3e16a2cb3..fd3e8c2fa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,6 +5,7 @@ Core * [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) Extractors +* [dailymotion] Fix extraction (#10901) * [vimeo:review] Fix extraction (#10900) * [nhl] Correctly handle invalid formats (#10713) * [footyroom] Fix extraction (#10810) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 62b0747a5..4a3314ea7 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -94,7 +94,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', 'uploader': 'HotWaves1012', 'age_limit': 18, - } + }, + 'skip': 'video gone', }, # geo-restricted, player v5 { @@ -144,7 +145,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): player_v5 = self._search_regex( [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826 r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', - r'buildPlayer\(({.+?})\);'], + r'buildPlayer\(({.+?})\);', + r'var\s+config\s*=\s*({.+?});'], webpage, 'player v5', default=None) if player_v5: player = self._parse_json(player_v5, video_id) From 591e384552f44fe5d77015d17fa7f71efa66f778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:22:12 +0700 Subject: [PATCH 1849/3599] [streamable] Remove debug output --- youtube_dl/extractor/streamable.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py index 56b926448..2c26fa689 100644 --- a/youtube_dl/extractor/streamable.py +++ b/youtube_dl/extractor/streamable.py @@ -52,7 +52,6 @@ class StreamableIE(InfoExtractor): @staticmethod def _extract_url(webpage): - print(webpage) mobj = re.search( r']+src=(?P[\'"])(?P(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)', webpage) From bcd6276520e67f59b95dffdb280703328cab82de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:22:33 +0700 Subject: [PATCH 1850/3599] [downloader/common] Remove debug output --- youtube_dl/downloader/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 8482cbd84..3dc144b4e 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -346,7 +346,6 @@ class FileDownloader(object): min_sleep_interval = self.params.get('sleep_interval') if min_sleep_interval: max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - print(min_sleep_interval, max_sleep_interval) sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) self.to_screen('[download] Sleeping %s seconds...' % sleep_interval) time.sleep(sleep_interval) From 7104ae799c18e36070d91d570d48c55d651cd4b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:25:04 +0700 Subject: [PATCH 1851/3599] [ChangeLog] Actualize --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index fd3e8c2fa..cc526429a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -13,6 +13,7 @@ Extractors + [hbo] Add support for episode pages (#10892) * [allocine] Fix extraction (#10860) + [nextmedia] Recognize action news on AppleDaily +* [lego] Improve info extraction and bypass geo restriction (#10872) version 2016.10.07 From 5c4bfd4da5d532bf8d5aaf1bb37396f7cfbc786b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Oct 2016 21:30:05 +0700 Subject: [PATCH 1852/3599] release 2016.10.12 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 2 +- ChangeLog | 2 +- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 15a93776b..865817681 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.07** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.10.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.10.12** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.10.07 +[debug] youtube-dl version 2016.10.12 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 95392030e..62acf9abd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -85,7 +85,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file If you want to create a build of youtube-dl yourself, you'll need * python -* make (both GNU make and BSD make are supported) +* make (only GNU make is supported) * pandoc * zip * nosetests diff --git a/ChangeLog b/ChangeLog index cc526429a..e3a733410 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.10.12 Core + Support HTML media elements without child nodes diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 5bbef0c41..9b540b3df 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -289,6 +289,7 @@ - **Groupon** - **Hark** - **HBO** + - **HBOEpisode** - **HearThisAt** - **Heise** - **HellPorno** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ac0921b7a..44cc18828 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.10.07' +__version__ = '2016.10.12' From 580d41193169d004c94145ef03d5c53f06d5a57c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 00:44:28 +0800 Subject: [PATCH 1853/3599] [parliamentliveuk] Recognize lower case URLs Closes #10912 Seems parliamentliveuk matches URLs case-insentive. For example this URL also works: http://parliamentlive.tv/EvEnt/Index/3F24936f-130f-40bf-9a5d-b3d6479da6a4 --- ChangeLog | 6 ++++++ youtube_dl/extractor/parliamentliveuk.py | 9 ++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index e3a733410..d2b78a489 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [parliamentliveuk] Lower case URLs are now recognized (#10912) + + version 2016.10.12 Core diff --git a/youtube_dl/extractor/parliamentliveuk.py b/youtube_dl/extractor/parliamentliveuk.py index 874aacc55..ebdab8db9 100644 --- a/youtube_dl/extractor/parliamentliveuk.py +++ b/youtube_dl/extractor/parliamentliveuk.py @@ -6,9 +6,9 @@ from .common import InfoExtractor class ParliamentLiveUKIE(InfoExtractor): IE_NAME = 'parliamentlive.tv' IE_DESC = 'UK parliament videos' - _VALID_URL = r'https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' - _TEST = { + _TESTS = [{ 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 'info_dict': { 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b', @@ -18,7 +18,10 @@ class ParliamentLiveUKIE(InfoExtractor): 'timestamp': 1422696664, 'upload_date': '20150131', }, - } + }, { + 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From b7f59a3bf69b5c935be085551d30ce4d0b8a97d4 Mon Sep 17 00:00:00 2001 From: Philip Xu Date: Thu, 13 Oct 2016 21:51:26 -0400 Subject: [PATCH 1854/3599] [huajiao] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/huajiao.py | 50 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 youtube_dl/extractor/huajiao.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 08bed8b0c..75e16af4e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -372,6 +372,7 @@ from .hrti import ( HRTiIE, HRTiPlaylistIE, ) +from .huajiao import HuajiaoIE from .huffpost import HuffPostIE from .hypem import HypemIE from .iconosquare import IconosquareIE diff --git a/youtube_dl/extractor/huajiao.py b/youtube_dl/extractor/huajiao.py new file mode 100644 index 000000000..352b48120 --- /dev/null +++ b/youtube_dl/extractor/huajiao.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ..utils import parse_duration, parse_iso8601 +from .common import InfoExtractor + + +class HuajiaoIE(InfoExtractor): + IE_DESC = '花椒直播' + _VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.huajiao.com/l/38941232', + 'md5': 'd08bf9ac98787d24d1e4c0283f2d372d', + 'info_dict': { + 'id': '38941232', + 'ext': 'mp4', + 'title': '#新人求关注#', + 'description': 're:.*', + 'duration': 2424.0, + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1475866459, + 'upload_date': '20161007', + 'uploader': 'Penny_余姿昀', + 'uploader_id': '75206005', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + feed_json = self._search_regex( + r'var\s*feed\s*=\s*({.*})', webpage, 'feed json str') + feed = self._parse_json(feed_json, video_id) + + description = self._html_search_meta( + 'description', webpage, 'description', fatal=False) + + return { + 'id': video_id, + 'title': feed['feed']['formated_title'], + 'description': description, + 'duration': parse_duration(feed['feed']['duration']), + 'thumbnail': feed['feed']['image'], + 'timestamp': parse_iso8601(feed['creatime'], ' '), + 'uploader': feed['author']['nickname'], + 'uploader_id': feed['author']['uid'], + 'formats': self._extract_m3u8_formats( + feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'), + } From a5f847314582d4464e587120c6e696399ff121cb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 18:20:01 +0800 Subject: [PATCH 1855/3599] [cbsinteractive] Fix extraction for cnet.com --- ChangeLog | 1 + youtube_dl/extractor/cbsinteractive.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index d2b78a489..edd547811 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [cbsinteractive] Fix extraction for cnet.com * [parliamentliveuk] Lower case URLs are now recognized (#10912) diff --git a/youtube_dl/extractor/cbsinteractive.py b/youtube_dl/extractor/cbsinteractive.py index 821db20b2..57b18e81d 100644 --- a/youtube_dl/extractor/cbsinteractive.py +++ b/youtube_dl/extractor/cbsinteractive.py @@ -63,7 +63,7 @@ class CBSInteractiveIE(ThePlatformIE): webpage = self._download_webpage(url, display_id) data_json = self._html_search_regex( - r"data-(?:cnet|zdnet)-video(?:-uvp)?-options='([^']+)'", + r"data-(?:cnet|zdnet)-video(?:-uvp(?:js)?)?-options='([^']+)'", webpage, 'data json') data = self._parse_json(data_json, display_id) vdata = data.get('video') or data['videos'][0] From e2004ccaf711ff9aa9c0b647c3d6219093fb6c2a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 20:26:12 +0800 Subject: [PATCH 1856/3599] [canalplus] Fix video_id and update _TESTS Some tests are gone, and some redirect to different videos --- ChangeLog | 1 + youtube_dl/extractor/canalplus.py | 64 +++++++++++++++---------------- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/ChangeLog b/ChangeLog index edd547811..32390f227 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [canalplus] Fix extraction for some videos * [cbsinteractive] Fix extraction for cnet.com * [parliamentliveuk] Lower case URLs are now recognized (#10912) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 6dab226af..1c3c41d26 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -6,11 +6,13 @@ import re from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( + dict_get, ExtractorError, HEADRequest, - unified_strdate, - qualities, int_or_none, + qualities, + remove_end, + unified_strdate, ) @@ -43,47 +45,46 @@ class CanalplusIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1192814', - 'md5': '41f438a4904f7664b91b4ed0dec969dc', 'info_dict': { - 'id': '1192814', + 'id': '1405510', + 'display_id': 'pid1830-c-zapping', 'ext': 'mp4', - 'title': "L'Année du Zapping 2014 - L'Année du Zapping 2014", - 'description': "Toute l'année 2014 dans un Zapping exceptionnel !", - 'upload_date': '20150105', + 'title': 'Zapping - 02/07/2016', + 'description': 'Le meilleur de toutes les chaînes, tous les jours', + 'upload_date': '20160702', }, }, { 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', 'info_dict': { 'id': '1108190', - 'ext': 'flv', - 'title': 'Le labyrinthe - Boing super ranger', + 'display_id': 'pid1405-le-labyrinthe-boing-super-ranger', + 'ext': 'mp4', + 'title': 'BOING SUPER RANGER - Ep : Le labyrinthe', 'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff', 'upload_date': '20140724', }, 'skip': 'Only works from France', }, { - 'url': 'http://www.d8.tv/d8-docs-mags/pid5198-d8-en-quete-d-actualite.html?vid=1390231', + 'url': 'http://www.c8.fr/c8-divertissement/ms-touche-pas-a-mon-poste/pid6318-videos-integrales.html', + 'md5': '4b47b12b4ee43002626b97fad8fb1de5', 'info_dict': { - 'id': '1390231', + 'id': '1420213', + 'display_id': 'pid6318-videos-integrales', 'ext': 'mp4', - 'title': "Vacances pas chères : prix discount ou grosses dépenses ? - En quête d'actualité", - 'description': 'md5:edb6cf1cb4a1e807b5dd089e1ac8bfc6', - 'upload_date': '20160512', - }, - 'params': { - 'skip_download': True, + 'title': 'TPMP ! Même le matin - Les 35H de Baba - 14/10/2016', + 'description': 'md5:f96736c1b0ffaa96fd5b9e60ad871799', + 'upload_date': '20161014', }, + 'skip': 'Only works from France', }, { - 'url': 'http://www.itele.fr/chroniques/invite-bruce-toussaint/thierry-solere-nicolas-sarkozy-officialisera-sa-candidature-a-la-primaire-quand-il-le-voudra-167224', + 'url': 'http://www.itele.fr/chroniques/invite-michael-darmon/rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510', 'info_dict': { - 'id': '1398334', + 'id': '1420176', + 'display_id': 'rachida-dati-nicolas-sarkozy-est-le-plus-en-phase-avec-les-inquietudes-des-francais-171510', 'ext': 'mp4', - 'title': "L'invité de Bruce Toussaint du 07/06/2016 - ", - 'description': 'md5:40ac7c9ad0feaeb6f605bad986f61324', - 'upload_date': '20160607', - }, - 'params': { - 'skip_download': True, + 'title': 'L\'invité de Michaël Darmon du 14/10/2016 - ', + 'description': 'Chaque matin du lundi au vendredi, Michaël Darmon reçoit un invité politique à 8h25.', + 'upload_date': '20161014', }, }, { 'url': 'http://m.canalplus.fr/?vid=1398231', @@ -95,18 +96,17 @@ class CanalplusIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.groupdict().get('id') or mobj.groupdict().get('vid') site_id = self._SITE_ID_MAP[compat_urllib_parse_urlparse(url).netloc.rsplit('.', 2)[-2]] # Beware, some subclasses do not define an id group - display_id = mobj.group('display_id') or video_id + display_id = remove_end(dict_get(mobj.groupdict(), ('display_id', 'id', 'vid')), '.html') - if video_id is None: - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - [r']+?videoId=(["\'])(?P\d+)', r'id=["\']canal_video_player(?P\d+)'], - webpage, 'video id', group='id') + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + [r']+?videoId=(["\'])(?P\d+)', + r'id=["\']canal_video_player(?P\d+)'], + webpage, 'video id', group='id') info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) video_data = self._download_json(info_url, video_id, 'Downloading video JSON') From 146969e05bc2e2774aa96c62030cdb85ca5c7667 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Oct 2016 23:42:11 +0800 Subject: [PATCH 1857/3599] [videomore] Support ' + PLAYER_REGEX = r'