From 63fc80005782430180ed0d13ac6ab5ca497d333a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 3 Mar 2015 23:17:19 +0800 Subject: [PATCH 0001/2721] [Letv] Fix test_Letv and test_Letv_1 failures in python 3 --- youtube_dl/extractor/letv.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index 85eee141b..9ed81a199 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -88,9 +88,10 @@ class LetvIE(InfoExtractor): play_json_req = compat_urllib_request.Request( 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params) ) - play_json_req.add_header( - 'Ytdl-request-proxy', - self._downloader.params.get('cn_verification_proxy')) + cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') + if cn_verification_proxy: + play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy) + play_json = self._download_json( play_json_req, media_id, 'playJson data') From 5ee6fc974e617ce2f8d9d62c416091a1daa6d802 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 6 Mar 2015 02:43:05 +0800 Subject: [PATCH 0002/2721] [sohu] Fix info extractor and add tests --- youtube_dl/extractor/sohu.py | 46 +++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index c04791997..ef7ec51df 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -4,22 +4,42 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from .common import compat_str +from ..utils import compat_str +from ..compat import compat_urllib_request class SohuIE(InfoExtractor): _VALID_URL = r'https?://(?Pmy\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P\d+)\.shtml.*?' - _TEST = { + _TESTS = [{ + 'note': 'This video is available only in Mainland China', 'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super', - 'md5': 'bde8d9a6ffd82c63a1eefaef4eeefec7', + 'md5': '29175c8cadd8b5cc4055001e85d6b372', 'info_dict': { 'id': '382479172', 'ext': 'mp4', 'title': 'MV:Far East Movement《The Illest》', }, - 'skip': 'Only available from China', - } + 'params': { + 'cn_verification_proxy': 'proxy.uku.im:8888' + } + }, { + 'url': 'http://tv.sohu.com/20150305/n409385080.shtml', + 'md5': '699060e75cf58858dd47fb9c03c42cfb', + 'info_dict': { + 'id': '409385080', + 'ext': 'mp4', + 'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》', + } + }, { + 'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml', + 'md5': '9bf34be48f2f4dadcb226c74127e203c', + 'info_dict': { + 'id': '78693464', + 'ext': 'mp4', + 'title': '【爱范品】第31期:MWC见不到的奇葩手机', + } + }] def _real_extract(self, url): @@ -29,9 +49,14 @@ class SohuIE(InfoExtractor): else: base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' - return self._download_json( - base_data_url + vid_id, video_id, - 'Downloading JSON data for %s' % vid_id) + req = compat_urllib_request.Request(base_data_url + vid_id) + + cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') + if cn_verification_proxy: + req.add_header('Ytdl-request-proxy', cn_verification_proxy) + + return self._download_json(req, video_id, + 'Downloading JSON data for %s' % vid_id) mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -77,6 +102,11 @@ class SohuIE(InfoExtractor): % (format_id, i + 1, part_count)) part_info = part_str.split('|') + + # Sanitize URL to prevent download failure + if part_info[0][-1] == '/' and su[i][0] == '/': + su[i] = su[i][1:] + video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3]) formats.append({ From 5c7495a19429e3b27c003a4bd5bb96ed1e3a4932 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 6 Mar 2015 02:48:27 +0800 Subject: [PATCH 0003/2721] [sohu] Correct wrong imports --- youtube_dl/extractor/sohu.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index ef7ec51df..335e84fab 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -4,8 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import compat_str -from ..compat import compat_urllib_request +from ..compat import ( + compat_str, + compat_urllib_request +) class SohuIE(InfoExtractor): From 55969016e96fded28b97b2ef3bbf66efa83d6afb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 6 Mar 2015 12:43:49 +0800 Subject: [PATCH 0004/2721] [utils] Add a function to sanitize consecutive slashes in URLs --- test/test_utils.py | 16 ++++++++++++++++ youtube_dl/extractor/sohu.py | 8 +++----- youtube_dl/utils.py | 15 +++++++++++++++ 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 64fad58ad..e02069c4d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -54,6 +54,7 @@ from youtube_dl.utils import ( xpath_with_ns, render_table, match_str, + url_sanitize_consecutive_slashes, ) @@ -501,6 +502,21 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') 'like_count > 100 & dislike_count Date: Sat, 7 Mar 2015 00:53:52 +0800 Subject: [PATCH 0005/2721] [Yam] Add an error detection and update test cases --- youtube_dl/extractor/yam.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py index b294767c5..19ad74d04 100644 --- a/youtube_dl/extractor/yam.py +++ b/youtube_dl/extractor/yam.py @@ -8,6 +8,7 @@ from ..compat import compat_urlparse from ..utils import ( float_or_none, month_by_abbreviation, + ExtractorError, ) @@ -28,23 +29,44 @@ class YamIE(InfoExtractor): } }, { # An external video hosted on YouTube - 'url': 'http://mymedia.yam.com/m/3598173', - 'md5': '0238ceec479c654e8c2f1223755bf3e9', + 'url': 'http://mymedia.yam.com/m/3599430', + 'md5': '03127cf10d8f35d120a9e8e52e3b17c6', 'info_dict': { - 'id': 'pJ2Deys283c', + 'id': 'CNpEoQlrIgA', 'ext': 'mp4', - 'upload_date': '20150202', + 'upload_date': '20150306', 'uploader': '新莊社大瑜伽社', - 'description': 'md5:f5cc72f0baf259a70fb731654b0d2eff', + 'description': 'md5:11e2e405311633ace874f2e6226c8b17', 'uploader_id': '2323agoy', - 'title': '外婆的澎湖灣KTV-潘安邦', + 'title': '20090412陽明山二子坪-1', } + }, { + 'url': 'http://mymedia.yam.com/m/3598173', + 'info_dict': { + 'id': '3598173', + 'ext': 'mp4', + }, + 'skip': 'cause Yam system error', + }, { + 'url': 'http://mymedia.yam.com/m/3599437', + 'info_dict': { + 'id': '3599437', + 'ext': 'mp4', + }, + 'skip': 'invalid YouTube URL', }] def _real_extract(self, url): video_id = self._match_id(url) page = self._download_webpage(url, video_id) + # Check for errors + system_msg = self._html_search_regex( + r'系統訊息(?:
|\n|\r)*([^<>]+)
', page, 'system message', + default=None) + if system_msg: + raise ExtractorError(system_msg, expected=True) + # Is it hosted externally on YouTube? youtube_url = self._html_search_regex( r' Date: Sat, 7 Mar 2015 14:05:56 +0800 Subject: [PATCH 0006/2721] [douyutv] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/douyutv.py | 59 ++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 youtube_dl/extractor/douyutv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ffcc7d9ab..7d0798176 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -106,6 +106,7 @@ from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .dfb import DFBIE from .dotsub import DotsubIE +from .douyutv import DouyutvIE from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE from .drtuber import DrTuberIE diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py new file mode 100644 index 000000000..e9b92eb3b --- /dev/null +++ b/youtube_dl/extractor/douyutv.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + ExtractorError, +) + +class DouyutvIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P[A-Za-z0-9]+)' + + ''' + show_status: 1 直播中 ,2 没有直播 + ''' + + _TEST = { + 'url': 'http://www.douyutv.com/iseven', + 'info_dict': { + 'id': 'iseven', + 'title': '清晨醒脑!T-ara根本停不下来!', + 'ext': 'flv', + 'thumbnail': 're:^https?://.*\.jpg$', + 'is_live': True, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + info_url = 'http://www.douyutv.com/api/client/room/' + video_id + + config = self._download_json(info_url, video_id) + + error_code = config.get('error') + show_status = config['data'].get('show_status') + if error_code is not 0: + raise ExtractorError('Server reported error %i' % error_code, + expected=True) + + if show_status == '2': + raise ExtractorError('The live show has not yet started', + expected=True) + + title = config['data'].get('room_name') + rtmp_url = config['data'].get('rtmp_url') + rtmp_live = config['data'].get('rtmp_live') + thumbnail = config['data'].get('room_src') + + url = rtmp_url+'/'+rtmp_live + + return { + 'id': video_id, + 'title': title, + 'ext':'flv', + 'url': url, + 'thumbnail': thumbnail, + 'is_live': True, + # TODO more properties (see youtube_dl/extractor/common.py) + } \ No newline at end of file From ab205b9dc8dcee37786dac597f2c3760c89b191c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 7 Mar 2015 22:18:16 +0100 Subject: [PATCH 0007/2721] Revert "[YoutubeDL] Sanitize outtmpl as it may contain forbidden characters" This reverts commit 7dcad95d4faa91adfaa4d87d411c4bc55ab000e4. The output template is most definitly allowed to contain forbidden characters; otherwise -o /foo/bar/vid.mp4 wouldn't work. --- youtube_dl/YoutubeDL.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index bae52e9c7..df2aebb59 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -553,20 +553,16 @@ class YoutubeDL(object): elif template_dict.get('width'): template_dict['resolution'] = '?x%d' % template_dict['width'] - restrict_filenames = self.params.get('restrictfilenames') - sanitize = lambda k, v: sanitize_filename( compat_str(v), - restricted=restrict_filenames, + restricted=self.params.get('restrictfilenames'), is_id=(k == 'id')) template_dict = dict((k, sanitize(k, v)) for k, v in template_dict.items() if v is not None) template_dict = collections.defaultdict(lambda: 'NA', template_dict) - outtmpl = sanitize_filename( - self.params.get('outtmpl', DEFAULT_OUTTMPL), - restricted=restrict_filenames) + outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict # Temporary fix for #4787 From d34e79492d1d9d9babf85af3737b90c4fe55eb42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 16:54:11 +0600 Subject: [PATCH 0008/2721] [twitch] Fix live streams (Closes #5158) --- youtube_dl/extractor/twitch.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index b058891bd..cbdaf9c7a 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -358,13 +358,12 @@ class TwitchStreamIE(TwitchBaseIE): 'p': random.randint(1000000, 10000000), 'player': 'twitchweb', 'segment_preference': '4', - 'sig': access_token['sig'], - 'token': access_token['token'], + 'sig': access_token['sig'].encode('utf-8'), + 'token': access_token['token'].encode('utf-8'), } - formats = self._extract_m3u8_formats( '%s/api/channel/hls/%s.m3u8?%s' - % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')), + % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)), channel_id, 'mp4') self._prefer_source(formats) From 1132eae56d0f693b5dc55529d5cfadf32b32700d Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sun, 8 Mar 2015 13:54:01 +0200 Subject: [PATCH 0009/2721] [gazeta] Add new extractor (Closes #4222) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/gazeta.py | 35 ++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 youtube_dl/extractor/gazeta.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b489d5770..e2475a634 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -175,6 +175,7 @@ from .gameone import ( from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gametrailers import GametrailersIE +from .gazeta import GazetaIE from .gdcvault import GDCVaultIE from .generic import GenericIE from .giantbomb import GiantBombIE diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py new file mode 100644 index 000000000..3f9e58061 --- /dev/null +++ b/youtube_dl/extractor/gazeta.py @@ -0,0 +1,35 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class GazetaIE(InfoExtractor): + _VALID_URL = r'(?Phttps?://(?:www\.)?gazeta\.ru/(?:(?P[^/]*)/)?video/(?:main/)?(?P[A-Za-z0-9-_]+)\.s?html)' + _TEST = { + 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml', + 'md5': 'd49c9bdc6e5a7888f27475dc215ee789', + 'info_dict': { + 'id': '205566', + 'ext': 'mp4', + 'title': '«70–80 процентов гражданских в Донецке на грани голода»', + 'description': 'md5:38617526050bd17b234728e7f9620a71', + 'thumbnail': 're:^https?://.*\.jpg', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + display_id = mobj.group('id') + embed_url = '%s?p=embed' % mobj.group('url') + embed_page = self._download_webpage( + embed_url, display_id, 'Downloading embed page') + + video_id = self._search_regex( + r']*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id') + + return self.url_result( + 'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform') From 28778d6bae2ff2a0ad57d2c5a694e22ac4ead749 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 18:03:12 +0600 Subject: [PATCH 0010/2721] [pladform] Add extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/pladform.py | 86 ++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 youtube_dl/extractor/pladform.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e2475a634..14172ca56 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -364,6 +364,7 @@ from .pbs import PBSIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .planetaplay import PlanetaPlayIE +from .pladform import PladformIE from .played import PlayedIE from .playfm import PlayFMIE from .playvid import PlayvidIE diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py new file mode 100644 index 000000000..926e368a2 --- /dev/null +++ b/youtube_dl/extractor/pladform.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + xpath_text, +) + + +class PladformIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?: + out\.pladform\.ru/player| + static\.pladform\.ru/player\.swf + ) + \?.*\bvideoid=| + video\.pladform\.ru/catalog/video/videoid/ + ) + (?P\d+) + ''' + _TESTS = [{ + # http://muz-tv.ru/kinozal/view/7400/ + 'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293', + 'md5': '61f37b575dd27f1bb2e1854777fe31f4', + 'info_dict': { + 'id': '100183293', + 'ext': 'mp4', + 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть', + 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 694, + 'age_limit': 0, + }, + }, { + 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0', + 'only_matching': True, + }, { + 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_xml( + 'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id, + video_id) + + if video.tag == 'error': + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, video.text), + expected=True) + + formats = [{ + 'url': src.text, + 'format_id': src.get('quality'), + } for src in video.findall('./src')] + self._sort_formats(formats) + + webpage = self._download_webpage( + 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id, + video_id) + + title = self._og_search_title(webpage, fatal=False) or xpath_text( + video, './/title', 'title', fatal=True) + description = self._search_regex( + r'\s*

([^<]+)

', webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) or xpath_text( + video, './/cover', 'cover') + + duration = int_or_none(xpath_text(video, './/time', 'duration')) + age_limit = int_or_none(xpath_text(video, './/age18', 'age limit')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'age_limit': age_limit, + 'formats': formats, + } From f8388757269de8a5e87b9a507db55021b3090980 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 18:07:10 +0600 Subject: [PATCH 0011/2721] [pladform] Add support for embeds --- youtube_dl/extractor/generic.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 013198b0d..4e6927b08 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -596,6 +596,19 @@ class GenericIE(InfoExtractor): 'view_count': int, }, }, + # Pladform embed + { + 'url': 'http://muz-tv.ru/kinozal/view/7400/', + 'info_dict': { + 'id': '100183293', + 'ext': 'mp4', + 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть', + 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 694, + 'age_limit': 0, + }, + }, # RSS feed with enclosure { 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', @@ -1193,6 +1206,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform') + # Look for Pladform embeds + mobj = re.search( + r']+src="(?Phttps?://out\.pladform\.ru/player\?.+?)"', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Pladform') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True From 11101076a1b0eb36dd56b1b5aa1b7559f2d230b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 18:09:47 +0600 Subject: [PATCH 0012/2721] [pladform] Fix format quality sorting --- youtube_dl/extractor/pladform.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py index 926e368a2..abde34b94 100644 --- a/youtube_dl/extractor/pladform.py +++ b/youtube_dl/extractor/pladform.py @@ -6,6 +6,7 @@ from ..utils import ( ExtractorError, int_or_none, xpath_text, + qualities, ) @@ -55,9 +56,12 @@ class PladformIE(InfoExtractor): '%s returned error: %s' % (self.IE_NAME, video.text), expected=True) + quality = qualities(('ld', 'sd', 'hd')) + formats = [{ 'url': src.text, 'format_id': src.get('quality'), + 'quality': quality(src.get('quality')), } for src in video.findall('./src')] self._sort_formats(formats) From 24993e3b396407af9ccdf8dd893588df31aff8af Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sun, 8 Mar 2015 14:08:45 +0200 Subject: [PATCH 0013/2721] [vidme] Fix view_count extraction and remove comment_count extraction (Fixes #5133) Comment counts seem to no longer be listed on vid.me --- youtube_dl/extractor/vidme.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index 339c3d897..bd953fb4c 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -41,13 +41,10 @@ class VidmeIE(InfoExtractor): duration = float_or_none(self._html_search_regex( r'data-duration="([^"]+)"', webpage, 'duration', fatal=False)) view_count = str_to_int(self._html_search_regex( - r'\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False)) + r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False)) like_count = str_to_int(self._html_search_regex( r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">', webpage, 'like count', fatal=False)) - comment_count = str_to_int(self._html_search_regex( - r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">', - webpage, 'comment count', fatal=False)) return { 'id': video_id, @@ -61,5 +58,4 @@ class VidmeIE(InfoExtractor): 'duration': duration, 'view_count': view_count, 'like_count': like_count, - 'comment_count': comment_count, } From 8b910bda0cdba859c1971a410aa5d2a51c6e4918 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sun, 8 Mar 2015 14:28:53 +0200 Subject: [PATCH 0014/2721] [teamcoco] Fix extraction --- youtube_dl/extractor/teamcoco.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 5793dbc10..7cb06f351 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -53,10 +53,10 @@ class TeamcocoIE(InfoExtractor): embed = self._download_webpage( embed_url, video_id, 'Downloading embed page') - encoded_data = self._search_regex( - r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data') + player_data = self._parse_json(self._search_regex( + r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id) data = self._parse_json( - base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id) + base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id) formats = [] get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) From bdf6eee0aeed8df586569982eaaac04eecc0062d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 19:17:54 +0600 Subject: [PATCH 0015/2721] [gazeta] Extend _VALID_URL --- youtube_dl/extractor/gazeta.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py index 3f9e58061..ea32b621c 100644 --- a/youtube_dl/extractor/gazeta.py +++ b/youtube_dl/extractor/gazeta.py @@ -7,8 +7,8 @@ from .common import InfoExtractor class GazetaIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?gazeta\.ru/(?:(?P[^/]*)/)?video/(?:main/)?(?P[A-Za-z0-9-_]+)\.s?html)' - _TEST = { + _VALID_URL = r'(?Phttps?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P[A-Za-z0-9-_.]+)\.s?html)' + _TESTS = [{ 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml', 'md5': 'd49c9bdc6e5a7888f27475dc215ee789', 'info_dict': { @@ -18,7 +18,10 @@ class GazetaIE(InfoExtractor): 'description': 'md5:38617526050bd17b234728e7f9620a71', 'thumbnail': 're:^https?://.*\.jpg', }, - } + }, { + 'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From a2aaf4dbc6e5f5d345329b5a845111851453b6a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 20:55:22 +0600 Subject: [PATCH 0016/2721] [utils] Add sanitize_path --- test/test_utils.py | 21 +++++++++++++++++++++ youtube_dl/utils.py | 18 ++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 64fad58ad..5ebb8d498 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -38,6 +38,7 @@ from youtube_dl.utils import ( parse_iso8601, read_batch_urls, sanitize_filename, + sanitize_path, shell_quote, smuggle_url, str_to_int, @@ -131,6 +132,26 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI') + def test_sanitize_path(self): + if sys.platform != 'win32': + return + + self.assertEqual(sanitize_path('abc'), 'abc') + self.assertEqual(sanitize_path('abc/def'), 'abc\\def') + self.assertEqual(sanitize_path('abc\\def'), 'abc\\def') + self.assertEqual(sanitize_path('abc|def'), 'abc#def') + self.assertEqual(sanitize_path('<>:"|?*'), '#######') + self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def') + self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def') + + self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc') + self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc') + + self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') + self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc') + self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f') + self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') + def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 7426e2a1f..0f49d602e 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -311,6 +311,24 @@ def sanitize_filename(s, restricted=False, is_id=False): return result +def sanitize_path(s): + """Sanitizes and normalizes path on Windows""" + if sys.platform != 'win32': + return s + drive, _ = os.path.splitdrive(s) + unc, _ = os.path.splitunc(s) + unc_or_drive = unc or drive + norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep) + if unc_or_drive: + norm_path.pop(0) + sanitized_path = [ + re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part) + for path_part in norm_path] + if unc_or_drive: + sanitized_path.insert(0, unc_or_drive + os.path.sep) + return os.path.join(*sanitized_path) + + def orderedSet(iterable): """ Remove all duplicates from the input iterable """ res = [] From d55de57b67bceca5d9116ddcc2ada2fa1957d89d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 20:56:28 +0600 Subject: [PATCH 0017/2721] [utils] Fix sanitize_open --- youtube_dl/utils.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0f49d602e..e511232ca 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -252,15 +252,12 @@ def sanitize_open(filename, open_mode): raise # In case of error, try to remove win32 forbidden chars - alt_filename = os.path.join( - re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part) - for path_part in os.path.split(filename) - ) + alt_filename = sanitize_path(filename) if alt_filename == filename: raise else: # An exception here should be caught in the caller - stream = open(encodeFilename(filename), open_mode) + stream = open(encodeFilename(alt_filename), open_mode) return (stream, alt_filename) From 1bb5c511a551ba7c5d3179d2d3a124f0977e74b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 20:57:30 +0600 Subject: [PATCH 0018/2721] [YoutubeDL] Sanitize outtmpl as path --- youtube_dl/YoutubeDL.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index bae52e9c7..4b9151163 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -61,6 +61,7 @@ from .utils import ( render_table, SameFileError, sanitize_filename, + sanitize_path, std_headers, subtitles_filename, takewhile_inclusive, @@ -553,20 +554,16 @@ class YoutubeDL(object): elif template_dict.get('width'): template_dict['resolution'] = '?x%d' % template_dict['width'] - restrict_filenames = self.params.get('restrictfilenames') - sanitize = lambda k, v: sanitize_filename( compat_str(v), - restricted=restrict_filenames, + restricted=self.params.get('restrictfilenames'), is_id=(k == 'id')) template_dict = dict((k, sanitize(k, v)) for k, v in template_dict.items() if v is not None) template_dict = collections.defaultdict(lambda: 'NA', template_dict) - outtmpl = sanitize_filename( - self.params.get('outtmpl', DEFAULT_OUTTMPL), - restricted=restrict_filenames) + outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL)) tmpl = compat_expanduser(outtmpl) filename = tmpl % template_dict # Temporary fix for #4787 From f18ef2d14463a13d80e967d1b18ece6a076f60fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 22:08:48 +0600 Subject: [PATCH 0019/2721] [utils] Disallow trailing dot in sanitize_path for a path part --- test/test_utils.py | 11 +++++++++++ youtube_dl/utils.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 5ebb8d498..28bda654e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -152,6 +152,17 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f') self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') + self.assertEqual( + sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'), + 'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s') + + self.assertEqual( + sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'), + 'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part') + self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#') + self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def') + self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#') + def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e511232ca..d5597d514 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -319,7 +319,7 @@ def sanitize_path(s): if unc_or_drive: norm_path.pop(0) sanitized_path = [ - re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part) + re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part) for path_part in norm_path] if unc_or_drive: sanitized_path.insert(0, unc_or_drive + os.path.sep) From e5a11a2293069b1acfa5eb5d2694ad4082dd9755 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 8 Mar 2015 22:09:42 +0600 Subject: [PATCH 0020/2721] [YoutubeDL] Sanitize path before creating non-existent paths (Closes #4324) --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4b9151163..bce7587fd 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1262,7 +1262,7 @@ class YoutubeDL(object): return try: - dn = os.path.dirname(encodeFilename(filename)) + dn = os.path.dirname(sanitize_path(encodeFilename(filename))) if dn and not os.path.exists(dn): os.makedirs(dn) except (OSError, IOError) as err: From 43d6280d0a03335ec5143383f15e2ca9a49f4046 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 8 Mar 2015 18:25:11 +0100 Subject: [PATCH 0021/2721] [downloader/f4m] Fix use of base64 in python 3.2 (fixes #5132) b64decode needs a byte string, but on 3.4 it also accepts strings. --- youtube_dl/downloader/f4m.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 3dc796faa..4ab000d67 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -281,7 +281,7 @@ class F4mFD(FileDownloader): boot_info = self._get_bootstrap_from_url(bootstrap_url) else: bootstrap_url = None - bootstrap = base64.b64decode(node.text) + bootstrap = base64.b64decode(node.text.encode('ascii')) boot_info = read_bootstrap_info(bootstrap) return (boot_info, bootstrap_url) @@ -308,7 +308,7 @@ class F4mFD(FileDownloader): live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: - metadata = base64.b64decode(metadata_node.text) + metadata = base64.b64decode(metadata_node.text.encode('ascii')) else: metadata = None From 8bba753ccaab38a837efd5d334eff8f1b1c48fad Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 8 Mar 2015 18:37:43 +0100 Subject: [PATCH 0022/2721] [options] Rename --dump-intermediate-pages to --dump-pages for consistence with --write-pages --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index a4ca8adc4..eefe008d5 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -563,7 +563,7 @@ def parseOpts(overrideArguments=None): action='store_true', dest='verbose', default=False, help='print various debugging information') verbosity.add_option( - '--dump-intermediate-pages', + '--dump-pages', '--dump-intermediate-pages', action='store_true', dest='dump_intermediate_pages', default=False, help='print downloaded pages to debug problems (very verbose)') verbosity.add_option( From cc08b11d16efd125d765cc1a69ac795a592f012c Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sun, 8 Mar 2015 21:32:42 +0200 Subject: [PATCH 0023/2721] [adultswim] Improve video_info extraction (Fixes #5152) Look for video_info inside `slugged_video`, if slug is not found among collections. Also, simplify a bit. --- youtube_dl/extractor/adultswim.py | 44 ++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index 34b8b0115..39335b827 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -2,13 +2,12 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import ( ExtractorError, - xpath_text, float_or_none, + xpath_text, ) @@ -60,6 +59,24 @@ class AdultSwimIE(InfoExtractor): 'title': 'American Dad - Putting Francine Out of Business', 'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' }, + }, { + 'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/', + 'playlist': [ + { + 'md5': '3e346a2ab0087d687a05e1e7f3b3e529', + 'info_dict': { + 'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0', + 'ext': 'flv', + 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine', + 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n', + }, + } + ], + 'info_dict': { + 'id': 'sY3cMUR_TbuE4YmdjzbIcQ', + 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine', + 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n', + }, }] @staticmethod @@ -80,6 +97,7 @@ class AdultSwimIE(InfoExtractor): for video in collection.get('videos'): if video.get('slug') == slug: return collection, video + return None, None def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -90,28 +108,30 @@ class AdultSwimIE(InfoExtractor): webpage = self._download_webpage(url, episode_path) # Extract the value of `bootstrappedData` from the Javascript in the page. - bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path) - - try: - bootstrappedData = json.loads(bootstrappedDataJS) - except ValueError as ve: - errmsg = '%s: Failed to parse JSON ' % episode_path - raise ExtractorError(errmsg, cause=ve) + bootstrapped_data = self._parse_json(self._search_regex( + r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path) # Downloading videos from a /videos/playlist/ URL needs to be handled differently. # NOTE: We are only downloading one video (the current one) not the playlist if is_playlist: - collections = bootstrappedData['playlists']['collections'] + collections = bootstrapped_data['playlists']['collections'] collection = self.find_collection_by_linkURL(collections, show_path) video_info = self.find_video_info(collection, episode_path) show_title = video_info['showTitle'] segment_ids = [video_info['videoPlaybackID']] else: - collections = bootstrappedData['show']['collections'] + collections = bootstrapped_data['show']['collections'] collection, video_info = self.find_collection_containing_video(collections, episode_path) - show = bootstrappedData['show'] + # Video wasn't found in the collections, let's try `slugged_video`. + if video_info is None: + if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path: + video_info = bootstrapped_data['slugged_video'] + else: + raise ExtractorError('Unable to find video info') + + show = bootstrapped_data['show'] show_title = show['title'] segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']] From dd7831fe94a0fb8270e7fa3699677c7476a5cd83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 9 Mar 2015 04:55:35 +0600 Subject: [PATCH 0024/2721] [breakcom] Process only play purpose media formats (Closes #5164) --- youtube_dl/extractor/breakcom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py index 4bcc897c9..809287d14 100644 --- a/youtube_dl/extractor/breakcom.py +++ b/youtube_dl/extractor/breakcom.py @@ -41,7 +41,7 @@ class BreakIE(InfoExtractor): 'tbr': media['bitRate'], 'width': media['width'], 'height': media['height'], - } for media in info['media']] + } for media in info['media'] if media.get('mediaPurpose') == 'play'] if not formats: formats.append({ From d475b3384cd372d89c35b9b1c26499f1e9cc1915 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 9 Mar 2015 03:00:01 +0100 Subject: [PATCH 0025/2721] [README] Better bug reporting instructions Also address private emails which I get more and more these days. --- README.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5b9dd2cea..1a5a90c2e 100644 --- a/README.md +++ b/README.md @@ -404,6 +404,18 @@ A note on the service that they don't host the infringing content, but just link Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content. +### How can I speed up work on my issue? + +(Also known as: Help, my important issue not being solved!) The youtube-dl core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do: + +First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the youtube-dl project has grown too large to use personal email as an effective communication channel. + +Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the youtube-dl developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues. + +If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so). + +Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in youtube-dl version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`. + ### How can I detect whether a given URL is supported by youtube-dl? For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. @@ -572,7 +584,9 @@ If your report is shorter than two lines, it is almost certainly missing some of For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. -Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL. +If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). + +**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL. ### Are you using the latest version? From f848215dfce232dfc6618c1cc49da44c3f7df75a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 9 Mar 2015 03:02:03 +0100 Subject: [PATCH 0026/2721] release 2015.03.09 --- CONTRIBUTING.md | 4 +++- README.md | 2 +- docs/supportedsites.md | 5 +++++ youtube_dl/version.py | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 351229f21..588b15bde 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -18,7 +18,9 @@ If your report is shorter than two lines, it is almost certainly missing some of For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. -Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL. +If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). + +**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL. ### Are you using the latest version? diff --git a/README.md b/README.md index 1a5a90c2e..f41e7ecee 100644 --- a/README.md +++ b/README.md @@ -167,7 +167,7 @@ which means you can modify it, redistribute it or use it however you like. --no-progress do not print progress bar --console-title display progress in console titlebar -v, --verbose print various debugging information - --dump-intermediate-pages print downloaded pages to debug problems (very verbose) + --dump-pages print downloaded pages to debug problems (very verbose) --write-pages Write downloaded intermediary pages to files in the current directory to debug problems --print-traffic Display sent and read HTTP traffic -C, --call-home Contact the youtube-dl server for debugging. diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 062cb3d62..80a696ee3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -117,6 +117,7 @@ - **DRTV** - **Dump** - **dvtv**: http://video.aktualne.cz/ + - **EaglePlatform** - **EbaumsWorld** - **EchoMsk** - **eHow** @@ -161,6 +162,7 @@ - **GameSpot** - **GameStar** - **Gametrailers** + - **Gazeta** - **GDCVault** - **generic**: Generic downloader that works on some sites - **GiantBomb** @@ -315,6 +317,7 @@ - **Ooyala** - **OpenFilm** - **orf:fm4**: radio FM4 + - **orf:iptv**: iptv.ORF.at - **orf:oe1**: Radio Österreich 1 - **orf:tvthek**: ORF TVthek - **parliamentlive.tv**: UK parliament videos @@ -322,10 +325,12 @@ - **PBS** - **Phoenix** - **Photobucket** + - **Pladform** - **PlanetaPlay** - **play.fm** - **played.to** - **Playvid** + - **Playwire** - **plus.google**: Google Plus - **pluzz.francetv.fr** - **podomatic** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 252933993..1f0c88a4d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.03.03.1' +__version__ = '2015.03.09' From 4c6039385466e4be3a0e20459e75886052653de7 Mon Sep 17 00:00:00 2001 From: Mamay Alexander Date: Mon, 9 Mar 2015 19:06:49 +0600 Subject: [PATCH 0027/2721] [YandexMusic] Add new extractor --- youtube_dl/extractor/__init__.py | 5 ++ youtube_dl/extractor/yamusic.py | 104 +++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 youtube_dl/extractor/yamusic.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 14172ca56..2db938516 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -611,6 +611,11 @@ from .yahoo import ( YahooSearchIE, ) from .yam import YamIE +from .yamusic import ( + YandexMusicTrackIE, + YandexMusicAlbumIE, + YandexMusicPlaylistIE, +) from .yesjapan import YesJapanIE from .ynet import YnetIE from .youjizz import YouJizzIE diff --git a/youtube_dl/extractor/yamusic.py b/youtube_dl/extractor/yamusic.py new file mode 100644 index 000000000..5af6df89d --- /dev/null +++ b/youtube_dl/extractor/yamusic.py @@ -0,0 +1,104 @@ +# coding=utf-8 +from __future__ import unicode_literals + +import re +import hashlib +import time + +from .common import InfoExtractor + +class YandexMusicAlbumIE(InfoExtractor): + _VALID_URL = r'http://music.yandex.ru/album/(?P\d+)' + + def _get_track_url(self, storage_dir, track_id): + data = self._download_json('http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?requestId=2&nc=%d&action=getTrackSrc&p=download-info/%s/2.mp3' % (time.time(), storage_dir), track_id) + + hsh = hashlib.md5() + hsh.update('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']) + hash = hsh.hexdigest() + storage = storage_dir.split('.') + + return 'http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default' % (data['host'], hash, data['ts'] + data['path'], storage[1]) + + def _get_album_id_and_data(self, url): + matched = re.match(self._VALID_URL, url) + id = matched.group('id') + + webpage = self._download_webpage(url, id) + data = self._parse_json( + self._search_regex( + r'var\s+Mu\s+=\s+(.+?);\s+<\/script>', webpage, 'player'), + id) + return id, data['pageData'] + + def _real_extract(self, url): + + id, data = self._get_album_id_and_data(url) + + entries = [] + + for track in data['volumes'][0]: + entries.append({ + 'id': track['id'], + 'ext': 'mp3', + 'url': self._get_track_url(track['storageDir'], track['id']), + 'title': track['artists'][0]['name'] + ' - ' + track['title'], + }) + + return { + '_type': 'playlist', + 'entries': entries, + 'id': id, + 'title': data['title'], + } + +class YandexMusicPlaylistIE(YandexMusicAlbumIE): + _VALID_URL = r'http://music.yandex.ru/users/(?P[^/]+)/playlists/(?P\d+)' + + def _real_extract(self, url): + id, data = self._get_album_id_and_data(url) + data = data['playlist'] + + entries = [] + + for track in data['tracks']: + entries.append({ + 'id': track['id'], + 'ext': 'mp3', + 'url': self._get_track_url(track['storageDir'], track['id']), + 'title': track['artists'][0]['name'] + ' - ' + track['title'], + }) + + return { + '_type': 'playlist', + 'entries': entries, + 'id': id, + 'title': data['title'], + } + +class YandexMusicTrackIE(YandexMusicAlbumIE): + _VALID_URL = r'http://music.yandex.ru/album/(?P\d+)/track/(?P\d+)' + _TEST = { + 'url': 'http://music.yandex.ru/album/540508/track/4878838', + 'info_dict': { + 'id': '4878838', + 'ext': 'mp3', + 'title': 'Carlo Ambrosio - Gypsy Eyes 1', + } + } + + def _real_extract(self, url): + + id, data = self._get_album_id_and_data(url) + + for track in data['volumes'][0]: + if track['id'] == id: + track_url = self._get_track_url(track['storageDir'], id) + break + + return { + 'id': id, + 'ext': 'mp3', + 'url': track_url, + 'title': track['artists'][0]['name'] + ' - ' + track['title'], + } From 47fe42e1ab3e182aacbc0c749b9a4f17f1b9dad2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 9 Mar 2015 21:43:46 +0600 Subject: [PATCH 0028/2721] [yamusic] Improve, simplify, fix python3 issues and add tests --- youtube_dl/extractor/yamusic.py | 168 ++++++++++++++++++-------------- 1 file changed, 96 insertions(+), 72 deletions(-) diff --git a/youtube_dl/extractor/yamusic.py b/youtube_dl/extractor/yamusic.py index 5af6df89d..e25c0be44 100644 --- a/youtube_dl/extractor/yamusic.py +++ b/youtube_dl/extractor/yamusic.py @@ -3,102 +3,126 @@ from __future__ import unicode_literals import re import hashlib -import time from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + float_or_none, +) -class YandexMusicAlbumIE(InfoExtractor): - _VALID_URL = r'http://music.yandex.ru/album/(?P\d+)' +class YandexMusicBaseIE(InfoExtractor): def _get_track_url(self, storage_dir, track_id): - data = self._download_json('http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?requestId=2&nc=%d&action=getTrackSrc&p=download-info/%s/2.mp3' % (time.time(), storage_dir), track_id) + data = self._download_json( + 'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s' + % storage_dir, + track_id, 'Downloading track location JSON') - hsh = hashlib.md5() - hsh.update('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']) - hash = hsh.hexdigest() + key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest() storage = storage_dir.split('.') - return 'http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default' % (data['host'], hash, data['ts'] + data['path'], storage[1]) - - def _get_album_id_and_data(self, url): - matched = re.match(self._VALID_URL, url) - id = matched.group('id') - - webpage = self._download_webpage(url, id) - data = self._parse_json( - self._search_regex( - r'var\s+Mu\s+=\s+(.+?);\s+<\/script>', webpage, 'player'), - id) - return id, data['pageData'] - - def _real_extract(self, url): - - id, data = self._get_album_id_and_data(url) - - entries = [] - - for track in data['volumes'][0]: - entries.append({ - 'id': track['id'], - 'ext': 'mp3', - 'url': self._get_track_url(track['storageDir'], track['id']), - 'title': track['artists'][0]['name'] + ' - ' + track['title'], - }) + return ('http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default' + % (data['host'], key, data['ts'] + data['path'], storage[1])) + def _get_track_info(self, track): return { - '_type': 'playlist', - 'entries': entries, - 'id': id, - 'title': data['title'], + 'id': track['id'], + 'ext': 'mp3', + 'url': self._get_track_url(track['storageDir'], track['id']), + 'title': '%s - %s' % (track['artists'][0]['name'], track['title']), + 'filesize': int_or_none(track.get('fileSize')), + 'duration': float_or_none(track.get('durationMs'), 1000), } -class YandexMusicPlaylistIE(YandexMusicAlbumIE): - _VALID_URL = r'http://music.yandex.ru/users/(?P[^/]+)/playlists/(?P\d+)' - def _real_extract(self, url): - id, data = self._get_album_id_and_data(url) - data = data['playlist'] +class YandexMusicTrackIE(YandexMusicBaseIE): + IE_NAME = 'yandexmusic:track' + IE_DESC = 'Яндекс.Музыка - Трек' + _VALID_URL = r'https?://music\.yandex\.ru/album/(?P\d+)/track/(?P\d+)' - entries = [] - - for track in data['tracks']: - entries.append({ - 'id': track['id'], - 'ext': 'mp3', - 'url': self._get_track_url(track['storageDir'], track['id']), - 'title': track['artists'][0]['name'] + ' - ' + track['title'], - }) - - return { - '_type': 'playlist', - 'entries': entries, - 'id': id, - 'title': data['title'], - } - -class YandexMusicTrackIE(YandexMusicAlbumIE): - _VALID_URL = r'http://music.yandex.ru/album/(?P\d+)/track/(?P\d+)' _TEST = { 'url': 'http://music.yandex.ru/album/540508/track/4878838', + 'md5': 'f496818aa2f60b6c0062980d2e00dc20', 'info_dict': { 'id': '4878838', 'ext': 'mp3', 'title': 'Carlo Ambrosio - Gypsy Eyes 1', + 'filesize': 4628061, + 'duration': 193.04, } } def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + album_id, track_id = mobj.group('album_id'), mobj.group('id') - id, data = self._get_album_id_and_data(url) + track = self._download_json( + 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id), + track_id, 'Downloading track JSON')['track'] - for track in data['volumes'][0]: - if track['id'] == id: - track_url = self._get_track_url(track['storageDir'], id) - break + return self._get_track_info(track) + + +class YandexMusicAlbumIE(YandexMusicBaseIE): + IE_NAME = 'yandexmusic:album' + IE_DESC = 'Яндекс.Музыка - Альбом' + _VALID_URL = r'https?://music\.yandex\.ru/album/(?P\d+)' + + _TEST = { + 'url': 'http://music.yandex.ru/album/540508', + 'info_dict': { + 'id': '540508', + 'title': 'Carlo Ambrosio - Gypsy Soul (2009)', + }, + 'playlist_count': 50, + } + + def _real_extract(self, url): + album_id = self._match_id(url) + + album = self._download_json( + 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id, + album_id, 'Downloading album JSON') + + entries = [self._get_track_info(track) for track in album['volumes'][0]] + + title = '%s - %s' % (album['artists'][0]['name'], album['title']) + year = album.get('year') + if year: + title += ' (%s)' % year + + return self.playlist_result(entries, compat_str(album['id']), title) + + +class YandexMusicPlaylistIE(YandexMusicBaseIE): + IE_NAME = 'yandexmusic:playlist' + IE_DESC = 'Яндекс.Музыка - Плейлист' + _VALID_URL = r'https?://music\.yandex\.ru/users/[^/]+/playlists/(?P\d+)' + + _TEST = { + 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245', + 'info_dict': { + 'id': '1245', + 'title': 'Что слушают Enter Shikari', + 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9', + }, + 'playlist_count': 6, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + playlist = self._parse_json( + self._search_regex( + r'var\s+Mu\s*=\s*({.+?});\s*', webpage, 'player'), + playlist_id)['pageData']['playlist'] + + entries = [self._get_track_info(track) for track in playlist['tracks']] + + return self.playlist_result( + entries, compat_str(playlist_id), + playlist['title'], playlist.get('description')) - return { - 'id': id, - 'ext': 'mp3', - 'url': track_url, - 'title': track['artists'][0]['name'] + ' - ' + track['title'], - } From d1e2e8f583524e837d452d37fa7175a55ebc80ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 9 Mar 2015 21:44:59 +0600 Subject: [PATCH 0029/2721] [yamusic] Rename to yandexmusic --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/{yamusic.py => yandexmusic.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename youtube_dl/extractor/{yamusic.py => yandexmusic.py} (100%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2db938516..20bc73dce 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -611,7 +611,7 @@ from .yahoo import ( YahooSearchIE, ) from .yam import YamIE -from .yamusic import ( +from .yandexmusic import ( YandexMusicTrackIE, YandexMusicAlbumIE, YandexMusicPlaylistIE, diff --git a/youtube_dl/extractor/yamusic.py b/youtube_dl/extractor/yandexmusic.py similarity index 100% rename from youtube_dl/extractor/yamusic.py rename to youtube_dl/extractor/yandexmusic.py From 41b2194f86c9440d2d095db3eaec90784703fa7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 9 Mar 2015 21:46:31 +0600 Subject: [PATCH 0030/2721] Credit @MamayAlexander for yandexmusic (#5168) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 4674a5af3..421df69a6 100644 --- a/AUTHORS +++ b/AUTHORS @@ -113,3 +113,4 @@ Robin de Rooij Ryan Schmidt Leslie P. Polzer Duncan Keall +Alexander Mamay From dd77f14c641da26429ba32d2464563bf04e1741c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 9 Mar 2015 18:07:31 +0100 Subject: [PATCH 0031/2721] [yandexmusic] PEP8: remove blank line at the end of file --- youtube_dl/extractor/yandexmusic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index e25c0be44..1ee8d1bc0 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -125,4 +125,3 @@ class YandexMusicPlaylistIE(YandexMusicBaseIE): return self.playlist_result( entries, compat_str(playlist_id), playlist['title'], playlist.get('description')) - From 937daef4a7725aeecc4b2ce0caa29c6f1aaf0b87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 9 Mar 2015 18:12:41 +0100 Subject: [PATCH 0032/2721] [niconico] Use '_match_id' --- youtube_dl/extractor/niconico.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 4c1890416..7fb4e57df 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -41,7 +41,7 @@ class NiconicoIE(InfoExtractor): }, } - _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)' + _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' _NETRC_MACHINE = 'niconico' # Determine whether the downloader used authentication to download video _AUTHENTICATED = False @@ -76,8 +76,7 @@ class NiconicoIE(InfoExtractor): return True def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = self._match_id(url) # Get video webpage. We are not actually interested in it, but need # the cookies in order to be able to download the info webpage From f5d8f58a170c9585e0913dbe1e50e0ed05afb698 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 9 Mar 2015 18:17:22 +0100 Subject: [PATCH 0033/2721] [yandexmusic:album] Improve _VALID_URL to avoid matching tracks urls --- youtube_dl/extractor/yandexmusic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 1ee8d1bc0..b47aecb15 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -67,7 +67,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE): class YandexMusicAlbumIE(YandexMusicBaseIE): IE_NAME = 'yandexmusic:album' IE_DESC = 'Яндекс.Музыка - Альбом' - _VALID_URL = r'https?://music\.yandex\.ru/album/(?P\d+)' + _VALID_URL = r'https?://music\.yandex\.ru/album/(?P\d+)/?(\?|$)' _TEST = { 'url': 'http://music.yandex.ru/album/540508', From 2ebfeacabc7b74d03fa7cb096d8b9d1ecbdbb6e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 10 Mar 2015 00:50:11 +0600 Subject: [PATCH 0034/2721] [utils] Keep dot and dotdot unmodified (Closes #5171) --- test/test_utils.py | 5 +++++ youtube_dl/utils.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 28bda654e..8f790bf0a 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -163,6 +163,11 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def') self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#') + self.assertEqual(sanitize_path('../abc'), '..\\abc') + self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc') + self.assertEqual(sanitize_path('./abc'), 'abc') + self.assertEqual(sanitize_path('./../abc'), '..\\abc') + def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d5597d514..c3135effc 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -319,7 +319,7 @@ def sanitize_path(s): if unc_or_drive: norm_path.pop(0) sanitized_path = [ - re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part) + path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part) for path_part in norm_path] if unc_or_drive: sanitized_path.insert(0, unc_or_drive + os.path.sep) From 614a7e1e230e095d9a11b59b20f4ff7462be8b21 Mon Sep 17 00:00:00 2001 From: "PishPosh.McGee" Date: Tue, 10 Mar 2015 15:22:46 -0500 Subject: [PATCH 0035/2721] Added subtitles for FunnyOrDie --- youtube_dl/extractor/funnyordie.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index a49fc1151..f832ac5c3 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -50,7 +50,6 @@ class FunnyOrDieIE(InfoExtractor): bitrates.sort() formats = [] - for bitrate in bitrates: for link in links: formats.append({ @@ -59,6 +58,16 @@ class FunnyOrDieIE(InfoExtractor): 'vbr': bitrate, }) + subtitles={} + subtitle_matches=re.findall(r' Date: Tue, 10 Mar 2015 15:35:35 -0500 Subject: [PATCH 0036/2721] Update funnyordie.py --- youtube_dl/extractor/funnyordie.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index f832ac5c3..0a22d99d2 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -65,8 +65,7 @@ class FunnyOrDieIE(InfoExtractor): if not sublang in subtitles.keys(): subtitles[sublang]=[] subext=suburl.split('/')[-1] - print subext - subtitles[sublang].append({'url': suburl,'ext': subext}) + subtitles[sublang].append({'url': 'http://www.funnyordie.com'+suburl,'ext': subext}) post_json = self._search_regex( r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details') From 32aaeca775c03ce784b8a30ea987c03a25a00e98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 11 Mar 2015 20:34:32 +0600 Subject: [PATCH 0037/2721] [npo] Improve smooth stream skipping and set low preference for streams other than hds ans hls (Closes #5175) --- youtube_dl/extractor/npo.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 9c01eb0af..557dffa46 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -219,7 +219,8 @@ class NPOLiveIE(NPOBaseIE): if streams: for stream in streams: stream_type = stream.get('type').lower() - if stream_type == 'ss': + # smooth streaming is not supported + if stream_type in ['ss', 'ms']: continue stream_info = self._download_json( 'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp' @@ -242,6 +243,7 @@ class NPOLiveIE(NPOBaseIE): else: formats.append({ 'url': stream_url, + 'preference': -10, }) self._sort_formats(formats) From c792b5011fe56af654d99378c1e77837475d7b69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 11 Mar 2015 21:15:36 +0600 Subject: [PATCH 0038/2721] [ssa] Add extractor (Closes #5169) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ssa.py | 58 ++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 youtube_dl/extractor/ssa.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 20bc73dce..7adcc4dbf 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -460,6 +460,7 @@ from .sport5 import Sport5IE from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE from .srmediathek import SRMediathekIE +from .ssa import SSAIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE from .streamcloud import StreamcloudIE diff --git a/youtube_dl/extractor/ssa.py b/youtube_dl/extractor/ssa.py new file mode 100644 index 000000000..13101c714 --- /dev/null +++ b/youtube_dl/extractor/ssa.py @@ -0,0 +1,58 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + unescapeHTML, + parse_duration, +) + + +class SSAIE(InfoExtractor): + _VALID_URL = r'http://ssa\.nls\.uk/film/(?P\d+)' + _TEST = { + 'url': 'http://ssa.nls.uk/film/3561', + 'info_dict': { + 'id': '3561', + 'ext': 'flv', + 'title': 'SHETLAND WOOL', + 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04', + 'duration': 900, + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + streamer = self._search_regex( + r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer') + play_path = self._search_regex( + r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0] + + def search_field(field_name, fatal=False): + return self._search_regex( + r'%s:\s*([^<]+)' % field_name, + webpage, 'title', fatal=fatal) + + title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]') + description = unescapeHTML(search_field('Description')) + duration = parse_duration(search_field('Running time')) + thumbnail = self._search_regex( + r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False) + + return { + 'id': video_id, + 'url': streamer, + 'play_path': play_path, + 'ext': 'flv', + 'title': title, + 'description': description, + 'duration': duration, + 'thumbnail': thumbnail, + } From a3fbd188241b6151b024f6e3ea21c0edc60fae09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 11 Mar 2015 21:56:22 +0600 Subject: [PATCH 0039/2721] [funnyordie] Simplify subtitles --- youtube_dl/extractor/funnyordie.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 0a22d99d2..dd87257c4 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -58,14 +58,12 @@ class FunnyOrDieIE(InfoExtractor): 'vbr': bitrate, }) - subtitles={} - subtitle_matches=re.findall(r' Date: Wed, 11 Mar 2015 22:00:37 +0600 Subject: [PATCH 0040/2721] [funnyordie] Add subtitles test --- test/test_subtitles.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 3f2d8a2ba..891ee620b 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -26,6 +26,7 @@ from youtube_dl.extractor import ( VikiIE, ThePlatformIE, RTVEALaCartaIE, + FunnyOrDieIE, ) @@ -320,5 +321,17 @@ class TestRtveSubtitles(BaseTestSubtitles): self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') +class TestFunnyOrDieSubtitles(BaseTestSubtitles): + url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine' + IE = FunnyOrDieIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4') + + if __name__ == '__main__': unittest.main() From 3a77719c5ac60f7e634a7a8def82acb08ccc9ffc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 11 Mar 2015 17:38:35 +0100 Subject: [PATCH 0041/2721] Don't accept '-1' as format, 'all' is clearer --- youtube_dl/YoutubeDL.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index bce7587fd..be5b3c1ab 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1086,8 +1086,7 @@ class YoutubeDL(object): if req_format is None: req_format = 'best' formats_to_download = [] - # The -1 is for supporting YoutubeIE - if req_format in ('-1', 'all'): + if req_format == 'all': formats_to_download = formats else: for rfstr in req_format.split(','): From 71705fa70d3f8b410a25a9c1fb01c33dd5bb6026 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 12 Mar 2015 21:56:56 +0600 Subject: [PATCH 0042/2721] [footyroom] Add extractor (Closes #5000) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/footyroom.py | 41 +++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 youtube_dl/extractor/footyroom.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7adcc4dbf..7f9523c2b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -151,6 +151,7 @@ from .fktv import ( ) from .flickr import FlickrIE from .folketinget import FolketingetIE +from .footyroom import FootyRoomIE from .fourtube import FourTubeIE from .foxgay import FoxgayIE from .foxnews import FoxNewsIE diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py new file mode 100644 index 000000000..2b4691ae8 --- /dev/null +++ b/youtube_dl/extractor/footyroom.py @@ -0,0 +1,41 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class FootyRoomIE(InfoExtractor): + _VALID_URL = r'http://footyroom\.com/(?P[^/]+)' + _TEST = { + 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/', + 'info_dict': { + 'id': 'schalke-04-0-2-real-madrid-2015-02', + 'title': 'Schalke 04 0 – 2 Real Madrid', + }, + 'playlist_count': 3, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + playlist = self._parse_json( + self._search_regex( + r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'), + playlist_id) + + playlist_title = self._og_search_title(webpage) + + entries = [] + for video in playlist: + payload = video.get('payload') + if not payload: + continue + playwire_url = self._search_regex( + r'data-config="([^"]+)"', payload, + 'playwire url', default=None) + if playwire_url: + entries.append(self.url_result(playwire_url, 'Playwire')) + + return self.playlist_result(entries, playlist_id, playlist_title) From 1138491631c43c2f758d933675131f0bd8249920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 12 Mar 2015 21:59:46 +0600 Subject: [PATCH 0043/2721] [yam] Skip test --- youtube_dl/extractor/yam.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py index 19ad74d04..19f8762ae 100644 --- a/youtube_dl/extractor/yam.py +++ b/youtube_dl/extractor/yam.py @@ -39,7 +39,8 @@ class YamIE(InfoExtractor): 'description': 'md5:11e2e405311633ace874f2e6226c8b17', 'uploader_id': '2323agoy', 'title': '20090412陽明山二子坪-1', - } + }, + 'skip': 'Video does not exist', }, { 'url': 'http://mymedia.yam.com/m/3598173', 'info_dict': { From d7d79106c7463b455ecc967c5f68b744c1375147 Mon Sep 17 00:00:00 2001 From: Leonardo Amaral Date: Thu, 12 Mar 2015 14:23:42 -0300 Subject: [PATCH 0044/2721] * Change globo.py flash ver to 17.0.0.132 - Chrome 42.0.2311.22 --- youtube_dl/extractor/globo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 29638a194..8a95793ca 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -20,7 +20,7 @@ class GloboIE(InfoExtractor): _VALID_URL = 'https?://.+?\.globo\.com/(?P.+)' _API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist' - _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=2.9.9.50&resource_id=%s' + _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s' _VIDEOID_REGEXES = [ r'\bdata-video-id="(\d+)"', From b84037013edf25866772ba12db3b0c55f94a3e12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 12 Mar 2015 18:45:00 +0100 Subject: [PATCH 0045/2721] [vimeo] Fix login (#3886) --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index b84a83ba6..fc720e7e7 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -38,7 +38,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): self.report_login() login_url = 'https://vimeo.com/log_in' webpage = self._download_webpage(login_url, None, False) - token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') + token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token') data = urlencode_postdata({ 'email': username, 'password': password, From 3946864c8a752c068b188126013ddb747b53385d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 12 Mar 2015 19:08:16 +0100 Subject: [PATCH 0046/2721] [vimeo] Use https for all vimeo.com urls Unfortunately vimeopro.com doesn't support it yet. --- test/test_all_urls.py | 10 +++++----- youtube_dl/extractor/vimeo.py | 36 +++++++++++++++++------------------ 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index e66264b4b..6ae168b7f 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -104,11 +104,11 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch(':tds', ['ComedyCentralShows']) def test_vimeo_matching(self): - self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel']) - self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel']) - self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo']) - self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user']) - self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user']) + self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel']) + self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel']) + self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo']) + self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user']) + self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user']) self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) # https://github.com/rg3/youtube-dl/issues/1930 diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index fc720e7e7..bd09652cd 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -373,7 +373,7 @@ class VimeoIE(VimeoBaseInfoExtractor): for tt in text_tracks: subtitles[tt['lang']] = [{ 'ext': 'vtt', - 'url': 'http://vimeo.com' + tt['url'], + 'url': 'https://vimeo.com' + tt['url'], }] return { @@ -396,11 +396,11 @@ class VimeoIE(VimeoBaseInfoExtractor): class VimeoChannelIE(InfoExtractor): IE_NAME = 'vimeo:channel' - _VALID_URL = r'https?://vimeo\.com/channels/(?P[^/?#]+)/?(?:$|[?#])' + _VALID_URL = r'https://vimeo\.com/channels/(?P[^/?#]+)/?(?:$|[?#])' _MORE_PAGES_INDICATOR = r']+?title="(.*?)"' _TESTS = [{ - 'url': 'http://vimeo.com/channels/tributes', + 'url': 'https://vimeo.com/channels/tributes', 'info_dict': { 'id': 'tributes', 'title': 'Vimeo Tributes', @@ -459,7 +459,7 @@ class VimeoChannelIE(InfoExtractor): if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: break - entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') + entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo') for video_id in video_ids] return {'_type': 'playlist', 'id': list_id, @@ -470,15 +470,15 @@ class VimeoChannelIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) channel_id = mobj.group('id') - return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id) + return self._extract_videos(channel_id, 'https://vimeo.com/channels/%s' % channel_id) class VimeoUserIE(VimeoChannelIE): IE_NAME = 'vimeo:user' - _VALID_URL = r'https?://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P[^/]+)(?:/videos|[#?]|$)' + _VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P[^/]+)(?:/videos|[#?]|$)' _TITLE_RE = r']+?class="user">([^<>]+?)' _TESTS = [{ - 'url': 'http://vimeo.com/nkistudio/videos', + 'url': 'https://vimeo.com/nkistudio/videos', 'info_dict': { 'title': 'Nki', 'id': 'nkistudio', @@ -489,7 +489,7 @@ class VimeoUserIE(VimeoChannelIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) name = mobj.group('name') - return self._extract_videos(name, 'http://vimeo.com/%s' % name) + return self._extract_videos(name, 'https://vimeo.com/%s' % name) class VimeoAlbumIE(VimeoChannelIE): @@ -526,9 +526,9 @@ class VimeoAlbumIE(VimeoChannelIE): class VimeoGroupsIE(VimeoAlbumIE): IE_NAME = 'vimeo:group' - _VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P[^/]+)' + _VALID_URL = r'https://vimeo\.com/groups/(?P[^/]+)' _TESTS = [{ - 'url': 'http://vimeo.com/groups/rolexawards', + 'url': 'https://vimeo.com/groups/rolexawards', 'info_dict': { 'id': 'rolexawards', 'title': 'Rolex Awards for Enterprise', @@ -542,13 +542,13 @@ class VimeoGroupsIE(VimeoAlbumIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) name = mobj.group('name') - return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name) + return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name) class VimeoReviewIE(InfoExtractor): IE_NAME = 'vimeo:review' IE_DESC = 'Review pages on vimeo' - _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P[^/]+)' + _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P[^/]+)' _TESTS = [{ 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 'md5': 'c507a72f780cacc12b2248bb4006d253', @@ -560,7 +560,7 @@ class VimeoReviewIE(InfoExtractor): } }, { 'note': 'video player needs Referer', - 'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053', + 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053', 'md5': '6295fdab8f4bf6a002d058b2c6dce276', 'info_dict': { 'id': '91613211', @@ -582,11 +582,11 @@ class VimeoReviewIE(InfoExtractor): class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE): IE_NAME = 'vimeo:watchlater' IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)' - _VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater' + _VALID_URL = r'https://vimeo\.com/home/watchlater|:vimeowatchlater' _LOGIN_REQUIRED = True _TITLE_RE = r'href="/home/watchlater".*?>(.*?)<' _TESTS = [{ - 'url': 'http://vimeo.com/home/watchlater', + 'url': 'https://vimeo.com/home/watchlater', 'only_matching': True, }] @@ -606,7 +606,7 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE): class VimeoLikesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P[0-9]+)/likes/?(?:$|[?#]|sort:)' + _VALID_URL = r'https://(?:www\.)?vimeo\.com/user(?P[0-9]+)/likes/?(?:$|[?#]|sort:)' IE_NAME = 'vimeo:likes' IE_DESC = 'Vimeo user likes' _TEST = { @@ -634,8 +634,8 @@ class VimeoLikesIE(InfoExtractor): description = self._html_search_meta('description', webpage) def _get_page(idx): - page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % ( - self.http_scheme(), user_id, idx + 1) + page_url = 'https://vimeo.com/user%s/likes/page:%d/sort:date' % ( + user_id, idx + 1) webpage = self._download_webpage( page_url, user_id, note='Downloading page %d/%d' % (idx + 1, page_count)) From 11984c7467184100ca4a61ae939a8c260480f42c Mon Sep 17 00:00:00 2001 From: "Devin J. Pohly" Date: Thu, 12 Mar 2015 15:43:13 -0400 Subject: [PATCH 0047/2721] [BeatportPro] Add new extractor This extractor is for Beatport's 2-minute, low-quality track previews only. To obtain an entire track, you obviously have to purchase and download it normally through the Beatport store! Possible future improvements: - Playlists for albums or other track-list pages - User login to play from My Beatport, Hold Bin, or Cart --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/beatportpro.py | 101 ++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 youtube_dl/extractor/beatportpro.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7f9523c2b..ac765fdb8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -37,6 +37,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE from .bbccouk import BBCCoUkIE from .beeg import BeegIE from .behindkink import BehindKinkIE +from .beatportpro import BeatportProIE from .bet import BetIE from .bild import BildIE from .bilibili import BiliBiliIE diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py new file mode 100644 index 000000000..21048b732 --- /dev/null +++ b/youtube_dl/extractor/beatportpro.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +import re +import json + + +class BeatportProIE(InfoExtractor): + _VALID_URL = r'https?://pro\.beatport\.com/track/.*/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371', + 'md5': 'b3c34d8639a2f6a7f734382358478887', + 'info_dict': { + 'id': 5379371, + 'display-id': 'synesthesia-original-mix', + 'ext': 'mp4', + 'title': 'Froxic - Synesthesia (Original Mix)', + }, + }, { + 'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896', + 'md5': 'e44c3025dfa38c6577fbaeb43da43514', + 'info_dict': { + 'id': 3756896, + 'display-id': 'love-and-war-original-mix', + 'ext': 'mp3', + 'title': 'Wolfgang Gartner - Love & War (Original Mix)', + }, + }, { + 'url': 'https://pro.beatport.com/track/birds-original-mix/4991738', + 'md5': 'a1fd8e8046de3950fd039304c186c05f', + 'info_dict': { + 'id': 4991738, + 'display-id': 'birds-original-mix', + 'ext': 'mp4', + 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)", + } + }] + + def _real_extract(self, url): + track_id = self._match_id(url) + webpage = self._download_webpage(url, track_id) + + # Extract "Playables" JSON information from the page + playables = self._search_regex(r'window\.Playables = ({.*?});', webpage, + 'playables info', flags=re.DOTALL) + playables = json.loads(playables) + + # Find first track with matching ID (always the first one listed?) + track = next(filter(lambda t: t['id'] == int(track_id), playables['tracks'])) + + # Construct title from artist(s), track name, and mix name + title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name'] + if track['mix']: + title += ' (' + track['mix'] + ')' + + # Get format information + formats = [] + for ext, info in track['preview'].items(): + if info['url'] is None: + continue + fmt = { + 'url': info['url'], + 'ext': ext, + 'format_id': ext, + 'vcodec': 'none', + } + if ext == 'mp3': + fmt['preference'] = 0 + fmt['acodec'] = 'mp3' + fmt['abr'] = 96 + fmt['asr'] = 44100 + elif ext == 'mp4': + fmt['preference'] = 1 + fmt['acodec'] = 'aac' + fmt['abr'] = 96 + fmt['asr'] = 44100 + formats += [fmt] + formats.sort(key=lambda f: f['preference']) + + # Get album art as thumbnails + imgs = [] + for name, info in track['images'].items(): + if name == 'dynamic' or info['url'] is None: + continue + img = { + 'id': name, + 'url': info['url'], + 'height': info['height'], + 'width': info['width'], + } + imgs += [img] + + return { + 'id': track['id'], + 'display-id': track['slug'], + 'title': title, + 'formats': formats, + 'thumbnails': imgs, + } From 65c5e044c7ab6d3140d30c98abda07785f2974c6 Mon Sep 17 00:00:00 2001 From: "Devin J. Pohly" Date: Thu, 12 Mar 2015 16:42:55 -0400 Subject: [PATCH 0048/2721] fix python2 --- youtube_dl/extractor/beatportpro.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py index 21048b732..c3c70fb33 100644 --- a/youtube_dl/extractor/beatportpro.py +++ b/youtube_dl/extractor/beatportpro.py @@ -48,7 +48,7 @@ class BeatportProIE(InfoExtractor): playables = json.loads(playables) # Find first track with matching ID (always the first one listed?) - track = next(filter(lambda t: t['id'] == int(track_id), playables['tracks'])) + track = next(t for t in playables['tracks'] if t['id'] == int(track_id)) # Construct title from artist(s), track name, and mix name title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name'] From 054b99a33079dcc4755e46aaf588424b4bb12020 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 12 Mar 2015 22:33:59 +0100 Subject: [PATCH 0049/2721] [jeuxvideo] Fix extraction (fixes #5190) --- youtube_dl/extractor/jeuxvideo.py | 36 ++++++++++++++++--------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index 8094cc2e4..d0720ff56 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals -import json import re from .common import InfoExtractor @@ -15,10 +14,10 @@ class JeuxVideoIE(InfoExtractor): 'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', 'md5': '046e491afb32a8aaac1f44dd4ddd54ee', 'info_dict': { - 'id': '5182', + 'id': '114765', 'ext': 'mp4', - 'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité', - 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n', + 'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité', + 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.', }, } @@ -26,26 +25,29 @@ class JeuxVideoIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) title = mobj.group(1) webpage = self._download_webpage(url, title) - xml_link = self._html_search_regex( - r'', + title = self._html_search_meta('name', webpage) + config_url = self._html_search_regex( + r'data-src="(/contenu/medias/video.php.*?)"', webpage, 'config URL') + config_url = 'http://www.jeuxvideo.com' + config_url video_id = self._search_regex( - r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml', - xml_link, 'video ID') + r'id=(\d+)', + config_url, 'video ID') - config = self._download_xml( - xml_link, title, 'Downloading XML config') - info_json = config.find('format.json').text - info = json.loads(info_json)['versions'][0] + config = self._download_json( + config_url, title, 'Downloading JSON config') - video_url = 'http://video720.jeuxvideo.com/' + info['file'] + formats = [{ + 'url': source['file'], + 'format_id': source['label'], + 'resolution': source['label'], + } for source in reversed(config['sources'])] return { 'id': video_id, - 'title': config.find('titre_video').text, - 'ext': 'mp4', - 'url': video_url, + 'title': title, + 'formats': formats, 'description': self._og_search_description(webpage), - 'thumbnail': config.find('image').text, + 'thumbnail': config.get('image'), } From 486dd09e0bd8032a3234363231c2b592f725cad6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 13 Mar 2015 08:40:20 +0100 Subject: [PATCH 0050/2721] [YoutubeDL] Check for bytes instead of unicode output templates (#5192) Also adapt the embedding examples for those poor souls still using 2.x. --- README.md | 2 ++ youtube_dl/YoutubeDL.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/README.md b/README.md index f41e7ecee..f4fbf0034 100644 --- a/README.md +++ b/README.md @@ -515,6 +515,7 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho From a Python program, you can embed youtube-dl in a more powerful fashion, like this: ```python +from __future__ import unicode_literals import youtube_dl ydl_opts = {} @@ -527,6 +528,7 @@ Most likely, you'll want to use various options. For a list of what can be done, Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: ```python +from __future__ import unicode_literals import youtube_dl diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index be5b3c1ab..e779fc9a8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -323,6 +323,11 @@ class YoutubeDL(object): 'Set the LC_ALL environment variable to fix this.') self.params['restrictfilenames'] = True + if isinstance(params.get('outtmpl'), bytes): + self.report_warning( + 'Parameter outtmpl is bytes, but should be a unicode string. ' + 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.') + if '%(stitle)s' in self.params.get('outtmpl', ''): self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') From fb7e68833cc67c23428462bf6c1306cbce207369 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Mar 2015 20:51:44 +0600 Subject: [PATCH 0051/2721] [kanalplay] Add extractor (Closes #5188) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/kanalplay.py | 72 +++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 youtube_dl/extractor/kanalplay.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7f9523c2b..5dc56e330 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -231,6 +231,7 @@ from .jove import JoveIE from .jukebox import JukeboxIE from .jpopsukitv import JpopsukiIE from .kaltura import KalturaIE +from .kanalplay import KanalPlayIE from .kankan import KankanIE from .karaoketv import KaraoketvIE from .keezmovies import KeezMoviesIE diff --git a/youtube_dl/extractor/kanalplay.py b/youtube_dl/extractor/kanalplay.py new file mode 100644 index 000000000..869757ec0 --- /dev/null +++ b/youtube_dl/extractor/kanalplay.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, +) + + +class KanalPlayIE(InfoExtractor): + IE_DESC = 'Kanal 5/9/11 Play' + _VALID_URL = r'https?://(?:www\.)?kanal(?P5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277', + 'md5': '', + 'info_dict': { + 'id': '2609989', + 'ext': 'flv', + 'title': 'Saknar både dusch och avlopp', + 'description': 'md5:', + 'duration': 2636.36, + }, + }, { + 'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042', + 'only_matching': True, + }, { + 'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + channel_id = mobj.group('channel_id') + + video = self._download_json( + 'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id), + video_id) + + reasons_for_no_streams = video.get('reasonsForNoStreams') + if reasons_for_no_streams: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)), + expected=True) + + title = video['title'] + description = video.get('description') + duration = float_or_none(video.get('length'), 1000) + thumbnail = video.get('posterUrl') + + stream_base_url = video['streamBaseUrl'] + + formats = [{ + 'url': stream_base_url, + 'play_path': stream['source'], + 'ext': 'flv', + 'tbr': float_or_none(stream.get('bitrate'), 1000), + 'rtmp_real_time': True, + } for stream in video['streams']] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + } From 4a34f69ea6213ffcf5785b30a126b439ceef77e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Mar 2015 21:38:28 +0600 Subject: [PATCH 0052/2721] [extractor/common] Add subtitles timecode formatter --- youtube_dl/extractor/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f9e8e2bad..e5245ec3f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1062,6 +1062,9 @@ class InfoExtractor(object): def _get_automatic_captions(self, *args, **kwargs): raise NotImplementedError("This method must be implemented by subclasses") + def _subtitles_timecode(self, seconds): + return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) + class SearchInfoExtractor(InfoExtractor): """ From 3f4327520c316043912ca9a8cd6f70d8121c3aa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Mar 2015 21:39:29 +0600 Subject: [PATCH 0053/2721] [kanalplay] Extract subtitles --- youtube_dl/extractor/kanalplay.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/kanalplay.py b/youtube_dl/extractor/kanalplay.py index 869757ec0..772d5aca4 100644 --- a/youtube_dl/extractor/kanalplay.py +++ b/youtube_dl/extractor/kanalplay.py @@ -31,6 +31,22 @@ class KanalPlayIE(InfoExtractor): 'only_matching': True, }] + def _fix_subtitles(self, subs): + return '\r\n\r\n'.join( + '%s\r\n%s --> %s\r\n%s' + % ( + num, + self._subtitles_timecode(item['startMillis'] / 1000.0), + self._subtitles_timecode(item['endMillis'] / 1000.0), + item['text'], + ) for num, item in enumerate(subs, 1)) + + def _get_subtitles(self, channel_id, video_id): + subs = self._download_json( + 'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id), + video_id, 'Downloading subtitles JSON', fatal=False) + return {'se': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {} + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -62,6 +78,10 @@ class KanalPlayIE(InfoExtractor): } for stream in video['streams']] self._sort_formats(formats) + subtitles = {} + if video.get('hasSubtitle'): + subtitles = self.extract_subtitles(channel_id, video_id) + return { 'id': video_id, 'title': title, @@ -69,4 +89,5 @@ class KanalPlayIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, + 'subtitles': subtitles, } From 545315a9856743541f7f7b9b0ab42f2af9dd05c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Mar 2015 21:40:34 +0600 Subject: [PATCH 0054/2721] [nrk] Use generic subtitles timecode formatter --- youtube_dl/extractor/nrk.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 1e4cfa2e7..bff36f9d3 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -149,9 +149,6 @@ class NRKTVIE(InfoExtractor): } ] - def _seconds2str(self, s): - return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000) - def _debug_print(self, txt): if self._downloader.params.get('verbose', False): self.to_screen('[debug] %s' % txt) @@ -168,8 +165,8 @@ class NRKTVIE(InfoExtractor): for pos, p in enumerate(ps): begin = parse_duration(p.get('begin')) duration = parse_duration(p.get('dur')) - starttime = self._seconds2str(begin) - endtime = self._seconds2str(begin + duration) + starttime = self._subtitles_timecode(begin) + endtime = self._subtitles_timecode(begin + duration) srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text) return {lang: [ {'ext': 'ttml', 'url': url}, From 1b53778175e43e2bf2cb71885a760d96727ee837 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Mar 2015 21:51:49 +0600 Subject: [PATCH 0055/2721] [beatenpro] Use generic format sort --- youtube_dl/extractor/beatportpro.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py index c3c70fb33..bc201572e 100644 --- a/youtube_dl/extractor/beatportpro.py +++ b/youtube_dl/extractor/beatportpro.py @@ -77,7 +77,7 @@ class BeatportProIE(InfoExtractor): fmt['abr'] = 96 fmt['asr'] = 44100 formats += [fmt] - formats.sort(key=lambda f: f['preference']) + self._sort_formats(formats) # Get album art as thumbnails imgs = [] From 517bcca29925548f9b9b121beec1391ef3ecedec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Mar 2015 22:01:15 +0600 Subject: [PATCH 0056/2721] [beatenpro] Simplify and improve --- youtube_dl/extractor/beatportpro.py | 34 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py index bc201572e..69657cbde 100644 --- a/youtube_dl/extractor/beatportpro.py +++ b/youtube_dl/extractor/beatportpro.py @@ -1,14 +1,15 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor - import re import json +from .common import InfoExtractor +from ..utils import int_or_none + class BeatportProIE(InfoExtractor): - _VALID_URL = r'https?://pro\.beatport\.com/track/.*/(?P[0-9]+)' + _VALID_URL = r'https?://pro\.beatport\.com/track/.+/(?P[0-9]+)' _TESTS = [{ 'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371', 'md5': 'b3c34d8639a2f6a7f734382358478887', @@ -42,20 +43,17 @@ class BeatportProIE(InfoExtractor): track_id = self._match_id(url) webpage = self._download_webpage(url, track_id) - # Extract "Playables" JSON information from the page - playables = self._search_regex(r'window\.Playables = ({.*?});', webpage, - 'playables info', flags=re.DOTALL) + playables = self._search_regex( + r'window\.Playables\s*=\s*({.*?});', webpage, + 'playables info', flags=re.DOTALL) playables = json.loads(playables) - # Find first track with matching ID (always the first one listed?) track = next(t for t in playables['tracks'] if t['id'] == int(track_id)) - # Construct title from artist(s), track name, and mix name title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name'] if track['mix']: title += ' (' + track['mix'] + ')' - # Get format information formats = [] for ext, info in track['preview'].items(): if info['url'] is None: @@ -76,26 +74,26 @@ class BeatportProIE(InfoExtractor): fmt['acodec'] = 'aac' fmt['abr'] = 96 fmt['asr'] = 44100 - formats += [fmt] + formats.append(fmt) self._sort_formats(formats) - # Get album art as thumbnails - imgs = [] + images = [] for name, info in track['images'].items(): - if name == 'dynamic' or info['url'] is None: + image_url = info.get('url') + if name == 'dynamic' or not image_url: continue img = { 'id': name, - 'url': info['url'], - 'height': info['height'], - 'width': info['width'], + 'url': image_url, + 'height': int_or_none(info.get('height')), + 'width': int_or_none(info.get('width')), } - imgs += [img] + images.append(img) return { 'id': track['id'], 'display-id': track['slug'], 'title': title, 'formats': formats, - 'thumbnails': imgs, + 'thumbnails': images, } From ba1d4c04883cafb55e40734776d9d8ba2ef85582 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Mar 2015 22:03:58 +0600 Subject: [PATCH 0057/2721] [beatenpro] Improve display_id --- youtube_dl/extractor/beatportpro.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py index 69657cbde..5c072b131 100644 --- a/youtube_dl/extractor/beatportpro.py +++ b/youtube_dl/extractor/beatportpro.py @@ -9,7 +9,7 @@ from ..utils import int_or_none class BeatportProIE(InfoExtractor): - _VALID_URL = r'https?://pro\.beatport\.com/track/.+/(?P[0-9]+)' + _VALID_URL = r'https?://pro\.beatport\.com/track/(?P[^/]+)/(?P[0-9]+)' _TESTS = [{ 'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371', 'md5': 'b3c34d8639a2f6a7f734382358478887', @@ -40,8 +40,11 @@ class BeatportProIE(InfoExtractor): }] def _real_extract(self, url): - track_id = self._match_id(url) - webpage = self._download_webpage(url, track_id) + mobj = re.match(self._VALID_URL, url) + track_id = mobj.group('id') + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id) playables = self._search_regex( r'window\.Playables\s*=\s*({.*?});', webpage, @@ -92,7 +95,7 @@ class BeatportProIE(InfoExtractor): return { 'id': track['id'], - 'display-id': track['slug'], + 'display_id': track.get('slug') or display_id, 'title': title, 'formats': formats, 'thumbnails': images, From fcd877013e4a8f654c7778019055b57031492889 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Mar 2015 22:11:56 +0600 Subject: [PATCH 0058/2721] [beatenpro] Simplify --- youtube_dl/extractor/beatportpro.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py index 5c072b131..12a7faa4f 100644 --- a/youtube_dl/extractor/beatportpro.py +++ b/youtube_dl/extractor/beatportpro.py @@ -2,9 +2,9 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor +from ..compat import compat_str from ..utils import int_or_none @@ -46,10 +46,11 @@ class BeatportProIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - playables = self._search_regex( - r'window\.Playables\s*=\s*({.*?});', webpage, - 'playables info', flags=re.DOTALL) - playables = json.loads(playables) + playables = self._parse_json( + self._search_regex( + r'window\.Playables\s*=\s*({.+?});', webpage, + 'playables info', flags=re.DOTALL), + track_id) track = next(t for t in playables['tracks'] if t['id'] == int(track_id)) @@ -59,7 +60,7 @@ class BeatportProIE(InfoExtractor): formats = [] for ext, info in track['preview'].items(): - if info['url'] is None: + if not info['url']: continue fmt = { 'url': info['url'], @@ -85,16 +86,16 @@ class BeatportProIE(InfoExtractor): image_url = info.get('url') if name == 'dynamic' or not image_url: continue - img = { + image = { 'id': name, 'url': image_url, 'height': int_or_none(info.get('height')), 'width': int_or_none(info.get('width')), } - images.append(img) + images.append(image) return { - 'id': track['id'], + 'id': compat_str(track.get('id')) or track_id, 'display_id': track.get('slug') or display_id, 'title': title, 'formats': formats, From bba3fc7960c8cd6f0752c31c55ada804ba7e2ae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Mar 2015 22:13:50 +0600 Subject: [PATCH 0059/2721] [beatenpro] Fix tests --- youtube_dl/extractor/beatportpro.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py index 12a7faa4f..3c7775d3e 100644 --- a/youtube_dl/extractor/beatportpro.py +++ b/youtube_dl/extractor/beatportpro.py @@ -14,8 +14,8 @@ class BeatportProIE(InfoExtractor): 'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371', 'md5': 'b3c34d8639a2f6a7f734382358478887', 'info_dict': { - 'id': 5379371, - 'display-id': 'synesthesia-original-mix', + 'id': '5379371', + 'display_id': 'synesthesia-original-mix', 'ext': 'mp4', 'title': 'Froxic - Synesthesia (Original Mix)', }, @@ -23,8 +23,8 @@ class BeatportProIE(InfoExtractor): 'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896', 'md5': 'e44c3025dfa38c6577fbaeb43da43514', 'info_dict': { - 'id': 3756896, - 'display-id': 'love-and-war-original-mix', + 'id': '3756896', + 'display_id': 'love-and-war-original-mix', 'ext': 'mp3', 'title': 'Wolfgang Gartner - Love & War (Original Mix)', }, @@ -32,8 +32,8 @@ class BeatportProIE(InfoExtractor): 'url': 'https://pro.beatport.com/track/birds-original-mix/4991738', 'md5': 'a1fd8e8046de3950fd039304c186c05f', 'info_dict': { - 'id': 4991738, - 'display-id': 'birds-original-mix', + 'id': '4991738', + 'display_id': 'birds-original-mix', 'ext': 'mp4', 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)", } From 28c6411e4959e342ec8bc016eb8ca5dbcbdc5d7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 13 Mar 2015 22:14:51 +0600 Subject: [PATCH 0060/2721] Credit @djpohly for BeatportPro (#5189) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 421df69a6..c10f03b98 100644 --- a/AUTHORS +++ b/AUTHORS @@ -114,3 +114,4 @@ Ryan Schmidt Leslie P. Polzer Duncan Keall Alexander Mamay +Devin J. Pohly From 13598940e3e019ce19eaf87ae872ea52b7cb8740 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Mar 2015 01:27:21 +0600 Subject: [PATCH 0061/2721] [kanalplay] Fix test --- youtube_dl/extractor/kanalplay.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/kanalplay.py b/youtube_dl/extractor/kanalplay.py index 772d5aca4..2bb078036 100644 --- a/youtube_dl/extractor/kanalplay.py +++ b/youtube_dl/extractor/kanalplay.py @@ -15,14 +15,17 @@ class KanalPlayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?kanal(?P5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P\d+)' _TESTS = [{ 'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277', - 'md5': '', 'info_dict': { - 'id': '2609989', + 'id': '3270012277', 'ext': 'flv', 'title': 'Saknar både dusch och avlopp', - 'description': 'md5:', + 'description': 'md5:6023a95832a06059832ae93bc3c7efb7', 'duration': 2636.36, }, + 'params': { + # rtmp download + 'skip_download': True, + } }, { 'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042', 'only_matching': True, From 3647136f24b332be81f9866ae5e2d596ca5c884a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Mar 2015 02:12:11 +0600 Subject: [PATCH 0062/2721] [viewster] Add extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/viewster.py | 53 ++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 youtube_dl/extractor/viewster.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4682996cd..ad133603f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -559,6 +559,7 @@ from .videoweed import VideoWeedIE from .vidme import VidmeIE from .vidzi import VidziIE from .vier import VierIE, VierVideosIE +from .viewster import ViewsterIE from .vimeo import ( VimeoIE, VimeoAlbumIE, diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py new file mode 100644 index 000000000..d7864d6a6 --- /dev/null +++ b/youtube_dl/extractor/viewster.py @@ -0,0 +1,53 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_request + + +class ViewsterIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?viewster\.com/movie/(?P\d+-\d+-\d+)' + _TEST = { + 'url': 'http://www.viewster.com/movie/1293-19341-000/hout-wood/', + 'md5': '8f9d94b282d80c42b378dffdbb11caf3', + 'info_dict': { + 'id': '1293-19341-000', + 'ext': 'flv', + 'title': "'Hout' (Wood)", + 'description': 'md5:925733185a9242ef96f436937683f33b', + }, + } + + _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01' + + def _real_extract(self, url): + video_id = self._match_id(url) + + request = compat_urllib_request.Request( + 'http://api.live.viewster.com/api/v1/movielink?movieid=%s&action=movierent&paymethod=fre&price=0¤cy=&language=en&subtitlelanguage=x&ischromecast=false' % video_id) + request.add_header('Accept', self._ACCEPT_HEADER) + + movie_link = self._download_json( + request, video_id, 'Downloading movie link JSON') + + formats = self._extract_f4m_formats( + movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id) + self._sort_formats(formats) + + request = compat_urllib_request.Request( + 'http://api.live.viewster.com/api/v1/movie/%s' % video_id) + request.add_header('Accept', self._ACCEPT_HEADER) + + movie = self._download_json( + request, video_id, 'Downloading movie metadata JSON') + + title = movie.get('title') or movie['original_title'] + description = movie.get('synopsis') + thumbnail = movie.get('large_artwork') or movie.get('artwork') + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats, + } From 7be5a62ed71d574132b648864ce3b741635604f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Mar 2015 03:18:04 +0600 Subject: [PATCH 0063/2721] [viewster] Improve extraction --- youtube_dl/extractor/viewster.py | 122 +++++++++++++++++++++++++------ 1 file changed, 99 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index d7864d6a6..1742e66f4 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -6,33 +6,75 @@ from ..compat import compat_urllib_request class ViewsterIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?viewster\.com/movie/(?P\d+-\d+-\d+)' - _TEST = { + _TESTS = [{ + # movielink, paymethod=fre 'url': 'http://www.viewster.com/movie/1293-19341-000/hout-wood/', - 'md5': '8f9d94b282d80c42b378dffdbb11caf3', + 'playlist': [{ + 'md5': '8f9d94b282d80c42b378dffdbb11caf3', + 'info_dict': { + 'id': '1293-19341-000-movie', + 'ext': 'flv', + 'title': "'Hout' (Wood) - Movie", + }, + }], 'info_dict': { 'id': '1293-19341-000', - 'ext': 'flv', 'title': "'Hout' (Wood)", 'description': 'md5:925733185a9242ef96f436937683f33b', - }, - } + } + }, { + # movielink, paymethod=adv + 'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/', + 'playlist': [{ + 'md5': '77a005453ca7396cbe3d35c9bea30aef', + 'info_dict': { + 'id': '1140-11855-000-movie', + 'ext': 'flv', + 'title': "THE LISTENING PROJECT - Movie", + }, + }], + 'info_dict': { + 'id': '1140-11855-000', + 'title': "THE LISTENING PROJECT", + 'description': 'md5:714421ae9957e112e672551094bf3b08', + } + }, { + # direct links, no movielink + 'url': 'http://www.viewster.com/movie/1198-56411-000/sinister/', + 'playlist': [{ + 'md5': '0307b7eac6bfb21ab0577a71f6eebd8f', + 'info_dict': { + 'id': '1198-56411-000-trailer', + 'ext': 'mp4', + 'title': "Sinister - Trailer", + }, + }, { + 'md5': '80b9ee3ad69fb368f104cb5d9732ae95', + 'info_dict': { + 'id': '1198-56411-000-behind-scenes', + 'ext': 'mp4', + 'title': "Sinister - Behind Scenes", + }, + }, { + 'md5': '3b3ea897ecaa91fca57a8a94ac1b15c5', + 'info_dict': { + 'id': '1198-56411-000-scene-from-movie', + 'ext': 'mp4', + 'title': "Sinister - Scene from movie", + }, + }], + 'info_dict': { + 'id': '1198-56411-000', + 'title': "Sinister", + 'description': 'md5:014c40b0488848de9683566a42e33372', + } + }] _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01' def _real_extract(self, url): video_id = self._match_id(url) - request = compat_urllib_request.Request( - 'http://api.live.viewster.com/api/v1/movielink?movieid=%s&action=movierent&paymethod=fre&price=0¤cy=&language=en&subtitlelanguage=x&ischromecast=false' % video_id) - request.add_header('Accept', self._ACCEPT_HEADER) - - movie_link = self._download_json( - request, video_id, 'Downloading movie link JSON') - - formats = self._extract_f4m_formats( - movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id) - self._sort_formats(formats) - request = compat_urllib_request.Request( 'http://api.live.viewster.com/api/v1/movie/%s' % video_id) request.add_header('Accept', self._ACCEPT_HEADER) @@ -44,10 +86,44 @@ class ViewsterIE(InfoExtractor): description = movie.get('synopsis') thumbnail = movie.get('large_artwork') or movie.get('artwork') - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'formats': formats, - } + entries = [] + for clip in movie['play_list']: + entry = None + + # movielink api + link_request = clip.get('link_request') + if link_request: + request = compat_urllib_request.Request( + 'http://api.live.viewster.com/api/v1/movielink?movieid=%(movieid)s&action=%(action)s&paymethod=%(paymethod)s&price=%(price)s¤cy=%(currency)s&language=%(language)s&subtitlelanguage=%(subtitlelanguage)s&ischromecast=%(ischromecast)s' + % link_request) + request.add_header('Accept', self._ACCEPT_HEADER) + + movie_link = self._download_json( + request, video_id, 'Downloading movie link JSON', fatal=False) + + if movie_link: + formats = self._extract_f4m_formats( + movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id) + self._sort_formats(formats) + entry = { + 'formats': formats, + } + + # direct link + clip_url = clip.get('clip_data', {}).get('url') + if clip_url: + entry = { + 'url': clip_url, + 'ext': 'mp4', + } + + if entry: + entry.update({ + 'id': '%s-%s' % (video_id, clip['canonical_title']), + 'title': '%s - %s' % (title, clip['title']), + }) + entries.append(entry) + + playlist = self.playlist_result(entries, video_id, title, description) + playlist['thumbnail'] = thumbnail + return playlist From 29171bc2d2f07a4eebb3b353fab989d7652a2083 Mon Sep 17 00:00:00 2001 From: Mamay Alexander Date: Sat, 14 Mar 2015 13:56:04 +0600 Subject: [PATCH 0064/2721] [yandexmusic] Site mirrors --- youtube_dl/extractor/yandexmusic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index b47aecb15..f4c0f5702 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -39,7 +39,7 @@ class YandexMusicBaseIE(InfoExtractor): class YandexMusicTrackIE(YandexMusicBaseIE): IE_NAME = 'yandexmusic:track' IE_DESC = 'Яндекс.Музыка - Трек' - _VALID_URL = r'https?://music\.yandex\.ru/album/(?P\d+)/track/(?P\d+)' + _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P\d+)/track/(?P\d+)' _TEST = { 'url': 'http://music.yandex.ru/album/540508/track/4878838', @@ -67,7 +67,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE): class YandexMusicAlbumIE(YandexMusicBaseIE): IE_NAME = 'yandexmusic:album' IE_DESC = 'Яндекс.Музыка - Альбом' - _VALID_URL = r'https?://music\.yandex\.ru/album/(?P\d+)/?(\?|$)' + _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P\d+)/?(\?|$)' _TEST = { 'url': 'http://music.yandex.ru/album/540508', @@ -98,7 +98,7 @@ class YandexMusicAlbumIE(YandexMusicBaseIE): class YandexMusicPlaylistIE(YandexMusicBaseIE): IE_NAME = 'yandexmusic:playlist' IE_DESC = 'Яндекс.Музыка - Плейлист' - _VALID_URL = r'https?://music\.yandex\.ru/users/[^/]+/playlists/(?P\d+)' + _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P\d+)' _TEST = { 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245', From 85741b998660bfb1564ca2c02e233db42059be05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Mar 2015 15:52:06 +0600 Subject: [PATCH 0065/2721] [8tracks] Use predefined avg duration when duration is negative (Closes #5200) --- youtube_dl/extractor/eighttracks.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py index fb5dbbe2b..869ff72d6 100644 --- a/youtube_dl/extractor/eighttracks.py +++ b/youtube_dl/extractor/eighttracks.py @@ -117,6 +117,9 @@ class EightTracksIE(InfoExtractor): track_count = data['tracks_count'] duration = data['duration'] avg_song_duration = float(duration) / track_count + # duration is sometimes negative, use predefined avg duration + if avg_song_duration <= 0: + avg_song_duration = 300 first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id) next_url = first_url entries = [] From 05be67e77dcf5c2a87d0549f6ae0f805ce38f758 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Mar 2015 15:54:23 +0600 Subject: [PATCH 0066/2721] [8tracks] Improve extraction --- youtube_dl/extractor/eighttracks.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py index 869ff72d6..49ec2be4c 100644 --- a/youtube_dl/extractor/eighttracks.py +++ b/youtube_dl/extractor/eighttracks.py @@ -108,9 +108,10 @@ class EightTracksIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - json_like = self._search_regex( - r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information') - data = json.loads(json_like) + data = self._parse_json( + self._search_regex( + r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'), + playlist_id) session = str(random.randint(0, 1000000000)) mix_id = data['id'] From a7e01c438d2f7eb28a44056a1feb1b48f51e1096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 14 Mar 2015 15:55:21 +0600 Subject: [PATCH 0067/2721] [8tracks] Modernize --- youtube_dl/extractor/eighttracks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py index 49ec2be4c..c84d16142 100644 --- a/youtube_dl/extractor/eighttracks.py +++ b/youtube_dl/extractor/eighttracks.py @@ -103,8 +103,7 @@ class EightTracksIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') + playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) From 9202b1b7878e2763a4406ebe2d59962202f6004b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 14 Mar 2015 12:04:49 +0100 Subject: [PATCH 0068/2721] [eighttracks] Remove unused import --- youtube_dl/extractor/eighttracks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py index c84d16142..0b61ea0ba 100644 --- a/youtube_dl/extractor/eighttracks.py +++ b/youtube_dl/extractor/eighttracks.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import json import random -import re from .common import InfoExtractor from ..compat import ( From 082b1155a36dc9b51424151f80860e52ee30b55e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 14 Mar 2015 12:06:01 +0100 Subject: [PATCH 0069/2721] [livestream] Extract all videos in events (fixes #5198) The webpage only contains the most recent ones, but if you scroll down more will appear. --- youtube_dl/extractor/livestream.py | 41 ++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 3642089f7..2467f8bdd 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re import json +import itertools from .common import InfoExtractor from ..compat import ( @@ -40,6 +41,13 @@ class LivestreamIE(InfoExtractor): 'id': '2245590', }, 'playlist_mincount': 4, + }, { + 'url': 'http://new.livestream.com/chess24/tatasteelchess', + 'info_dict': { + 'title': 'Tata Steel Chess', + 'id': '3705884', + }, + 'playlist_mincount': 60, }, { 'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640', 'only_matching': True, @@ -117,6 +125,30 @@ class LivestreamIE(InfoExtractor): 'view_count': video_data.get('views'), } + def _extract_event(self, info): + event_id = compat_str(info['id']) + account = compat_str(info['owner_account_id']) + root_url = ( + 'https://new.livestream.com/api/accounts/{account}/events/{event}/' + 'feed.json'.format(account=account, event=event_id)) + + def _extract_videos(): + last_video = None + for i in itertools.count(1): + if last_video is None: + info_url = root_url + else: + info_url = '{root}?&id={id}&newer=-1&type=video'.format( + root=root_url, id=last_video) + videos_info = self._download_json(info_url, event_id, 'Downloading page {0}'.format(i))['data'] + videos_info = [v['data'] for v in videos_info if v['type'] == 'video'] + if not videos_info: + break + for v in videos_info: + yield self._extract_video_info(v) + last_video = videos_info[-1]['id'] + return self.playlist_result(_extract_videos(), event_id, info['full_name']) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -144,14 +176,13 @@ class LivestreamIE(InfoExtractor): result = result and compat_str(vdata['data']['id']) == vid return result - videos = [self._extract_video_info(video_data['data']) - for video_data in info['feed']['data'] - if is_relevant(video_data, video_id)] if video_id is None: # This is an event page: - return self.playlist_result( - videos, '%s' % info['id'], info['full_name']) + return self._extract_event(info) else: + videos = [self._extract_video_info(video_data['data']) + for video_data in info['feed']['data'] + if is_relevant(video_data, video_id)] if not videos: raise ExtractorError('Cannot find video %s' % video_id) return videos[0] From cd65491c306f644d7bb3c7ad98795a3f8660be49 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 15 Mar 2015 00:59:49 +0800 Subject: [PATCH 0070/2721] [Sohu] Add a multiplart video test case --- youtube_dl/extractor/sohu.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 5adc734d9..7db5b2f13 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -42,6 +42,37 @@ class SohuIE(InfoExtractor): 'ext': 'mp4', 'title': '【爱范品】第31期:MWC见不到的奇葩手机', } + }, { + 'note': 'Multipart video', + 'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml', + 'info_dict': { + 'id': '78910339', + }, + 'playlist': [{ + 'md5': 'bdbfb8f39924725e6589c146bc1883ad', + 'info_dict': { + 'id': '78910339_part1', + 'ext': 'mp4', + 'duration': 294, + 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', + } + }, { + 'md5': '3e1f46aaeb95354fd10e7fca9fc1804e', + 'info_dict': { + 'id': '78910339_part2', + 'ext': 'mp4', + 'duration': 300, + 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', + } + }, { + 'md5': '8407e634175fdac706766481b9443450', + 'info_dict': { + 'id': '78910339_part3', + 'ext': 'mp4', + 'duration': 150, + 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', + } + }] }] def _real_extract(self, url): From 2cb434e53ee861c8bcbd538455be107085f444ae Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 15 Mar 2015 01:05:01 +0800 Subject: [PATCH 0071/2721] [Sohu] Fix title extraction --- youtube_dl/extractor/sohu.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 7db5b2f13..ea5cc06b9 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -73,6 +73,17 @@ class SohuIE(InfoExtractor): 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', } }] + }, { + 'info': 'Video with title containing dash', + 'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml', + 'info_dict': { + 'id': '78932792', + 'ext': 'mp4', + 'title': 'youtube-dl testing video', + }, + 'params': { + 'skip_download': True + } }] def _real_extract(self, url): @@ -97,10 +108,8 @@ class SohuIE(InfoExtractor): mytv = mobj.group('mytv') is not None webpage = self._download_webpage(url, video_id) - raw_title = self._html_search_regex( - r'(?s)(.+?)', - webpage, 'video title') - title = raw_title.partition('-')[0].strip() + + title = self._og_search_title(webpage) vid = self._html_search_regex( r'var vid ?= ?["\'](\d+)["\']', From e7db87f7000143341505cff812d1fa0371ac901e Mon Sep 17 00:00:00 2001 From: phiresky Date: Wed, 4 Mar 2015 22:33:56 +0100 Subject: [PATCH 0072/2721] Add metadata from title parser (Closes #5125) --- youtube_dl/__init__.py | 5 ++ youtube_dl/options.py | 9 ++++ youtube_dl/postprocessor/__init__.py | 2 + youtube_dl/postprocessor/ffmpeg.py | 10 +++- youtube_dl/postprocessor/metadatafromtitle.py | 48 +++++++++++++++++++ 5 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 youtube_dl/postprocessor/metadatafromtitle.py diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a08ddd670..852b2fc3d 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -213,6 +213,11 @@ def _real_main(argv=None): # PostProcessors postprocessors = [] # Add the metadata pp first, the other pps will copy it + if opts.metafromtitle: + postprocessors.append({ + 'key': 'MetadataFromTitle', + 'titleformat': opts.metafromtitle + }) if opts.addmetadata: postprocessors.append({'key': 'FFmpegMetadata'}) if opts.extractaudio: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index eefe008d5..be9402fdb 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -735,6 +735,15 @@ def parseOpts(overrideArguments=None): '--add-metadata', action='store_true', dest='addmetadata', default=False, help='write metadata to the video file') + postproc.add_option( + '--metadata-from-title', + metavar='FORMAT', dest='metafromtitle', + help='parse additional metadata like song title / artist from the video title. \n' + 'The format syntax is the same as --output, ' + 'the parsed parameters replace existing values.\n' + 'Additional templates: %(songtitle), %(album), %(artist). \n' + 'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like ' + '"Coldplay - Paradise"') postproc.add_option( '--xattrs', action='store_true', dest='xattrs', default=False, diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py index 708df3dd4..f39acadce 100644 --- a/youtube_dl/postprocessor/__init__.py +++ b/youtube_dl/postprocessor/__init__.py @@ -15,6 +15,7 @@ from .ffmpeg import ( ) from .xattrpp import XAttrMetadataPP from .execafterdownload import ExecAfterDownloadPP +from .metadatafromtitle import MetadataFromTitlePP def get_postprocessor(key): @@ -34,5 +35,6 @@ __all__ = [ 'FFmpegPostProcessor', 'FFmpegSubtitlesConvertorPP', 'FFmpegVideoConvertorPP', + 'MetadataFromTitlePP', 'XAttrMetadataPP', ] diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 30094c2f3..a17113cbf 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -541,11 +541,15 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): class FFmpegMetadataPP(FFmpegPostProcessor): def run(self, info): metadata = {} - if info.get('title') is not None: + if info.get('songtitle') is not None: + metadata['title'] = info['songtitle'] + elif info.get('title') is not None: metadata['title'] = info['title'] if info.get('upload_date') is not None: metadata['date'] = info['upload_date'] - if info.get('uploader') is not None: + if info.get('artist') is not None: + metadata['artist'] = info['artist'] + elif info.get('uploader') is not None: metadata['artist'] = info['uploader'] elif info.get('uploader_id') is not None: metadata['artist'] = info['uploader_id'] @@ -554,6 +558,8 @@ class FFmpegMetadataPP(FFmpegPostProcessor): metadata['comment'] = info['description'] if info.get('webpage_url') is not None: metadata['purl'] = info['webpage_url'] + if info.get('album') is not None: + metadata['album'] = info['album'] if not metadata: self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add') diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py new file mode 100644 index 000000000..4c9d3aafe --- /dev/null +++ b/youtube_dl/postprocessor/metadatafromtitle.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- + +import re + +from .common import PostProcessor +from ..utils import PostProcessingError + + +class MetadataFromTitlePPError(PostProcessingError): + pass + + +class MetadataFromTitlePP(PostProcessor): + def __init__(self, downloader, titleformat): + self._titleformat = titleformat + self._titleregex = self.fmtToRegex(titleformat) + + def fmtToRegex(self, fmt): + """ + Converts a string like + '%(title)s - %(artist)s' + to a regex like + '(?P.+)\ \-\ (?P<artist>.+)' + and a list of the named groups [title, artist] + """ + lastpos = 0 + regex = "" + groups = [] + # replace %(..)s with regex group and escape other string parts + for match in re.finditer(r'%\((\w+)\)s', fmt): + regex += re.escape(fmt[lastpos:match.start()]) + regex += r'(?P<' + match.group(1) + '>.+)' + lastpos = match.end() + if lastpos < len(fmt): + regex += re.escape(fmt[lastpos:len(fmt)]) + return regex + + def run(self, info): + title = info['title'] + match = re.match(self._titleregex, title) + if match is None: + raise MetadataFromTitlePPError('Could not interpret title of video as "%s"' % self._titleformat) + for attribute, value in match.groupdict().items(): + value = match.group(attribute) + info[attribute] = value + self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value) + + return True, info From 88cf6fb3685c4e012c9f574cbc5f1836c42fc06d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 14 Mar 2015 19:55:42 +0100 Subject: [PATCH 0073/2721] [metadatafromtitle] Some improvements and cleanup * Remove the 'songtitle' field, 'title' can be used instead. * Remove newlines in the help text, for consistency with other options. * Add 'from __future__ import unicode_literals'. * Call '__init__' from the parent class. * Add test for the format_to_regex method --- test/test_postprocessors.py | 17 +++++++++++++++++ youtube_dl/options.py | 6 +++--- youtube_dl/postprocessor/ffmpeg.py | 4 +--- youtube_dl/postprocessor/metadatafromtitle.py | 9 ++++----- 4 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 test/test_postprocessors.py diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py new file mode 100644 index 000000000..addb69d6f --- /dev/null +++ b/test/test_postprocessors.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.postprocessor import MetadataFromTitlePP + + +class TestMetadataFromTitle(unittest.TestCase): + def test_format_to_regex(self): + pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s') + self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)') diff --git a/youtube_dl/options.py b/youtube_dl/options.py index be9402fdb..4e6e47d6f 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -738,10 +738,10 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--metadata-from-title', metavar='FORMAT', dest='metafromtitle', - help='parse additional metadata like song title / artist from the video title. \n' + help='parse additional metadata like song title / artist from the video title. ' 'The format syntax is the same as --output, ' - 'the parsed parameters replace existing values.\n' - 'Additional templates: %(songtitle), %(album), %(artist). \n' + 'the parsed parameters replace existing values. ' + 'Additional templates: %(album), %(artist). ' 'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like ' '"Coldplay - Paradise"') postproc.add_option( diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index a17113cbf..b6f51cfd5 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -541,9 +541,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): class FFmpegMetadataPP(FFmpegPostProcessor): def run(self, info): metadata = {} - if info.get('songtitle') is not None: - metadata['title'] = info['songtitle'] - elif info.get('title') is not None: + if info.get('title') is not None: metadata['title'] = info['title'] if info.get('upload_date') is not None: metadata['date'] = info['upload_date'] diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py index 4c9d3aafe..5019433d3 100644 --- a/youtube_dl/postprocessor/metadatafromtitle.py +++ b/youtube_dl/postprocessor/metadatafromtitle.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +from __future__ import unicode_literals import re @@ -12,20 +12,19 @@ class MetadataFromTitlePPError(PostProcessingError): class MetadataFromTitlePP(PostProcessor): def __init__(self, downloader, titleformat): + super(MetadataFromTitlePP, self).__init__(downloader) self._titleformat = titleformat - self._titleregex = self.fmtToRegex(titleformat) + self._titleregex = self.format_to_regex(titleformat) - def fmtToRegex(self, fmt): + def format_to_regex(self, fmt): """ Converts a string like '%(title)s - %(artist)s' to a regex like '(?P<title>.+)\ \-\ (?P<artist>.+)' - and a list of the named groups [title, artist] """ lastpos = 0 regex = "" - groups = [] # replace %(..)s with regex group and escape other string parts for match in re.finditer(r'%\((\w+)\)s', fmt): regex += re.escape(fmt[lastpos:match.start()]) From 4d1652484f4a81b8d014159e680f7e20feae85e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 14 Mar 2015 20:25:37 +0100 Subject: [PATCH 0074/2721] [test/unicode_literals] Don't look into the .git and .tox directories The .tox directory contains python code that we can't control --- test/test_unicode_literals.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py index 7f816698e..6c1b7ec91 100644 --- a/test/test_unicode_literals.py +++ b/test/test_unicode_literals.py @@ -17,13 +17,22 @@ IGNORED_FILES = [ 'buildserver.py', ] +IGNORED_DIRS = [ + '.git', + '.tox', +] from test.helper import assertRegexpMatches class TestUnicodeLiterals(unittest.TestCase): def test_all_files(self): - for dirpath, _, filenames in os.walk(rootDir): + for dirpath, dirnames, filenames in os.walk(rootDir): + for ignore_dir in IGNORED_DIRS: + if ignore_dir in dirnames: + # If we remove the directory from dirnames os.walk won't + # recurse into it + dirnames.remove(ignore_dir) for basename in filenames: if not basename.endswith('.py'): continue From 8508557e77362bf1e5bcc919e89e6b4aa5d101ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 14 Mar 2015 20:51:42 +0100 Subject: [PATCH 0075/2721] [test/YoutubeDL] Use valid urls It failed on python 3.4 when building the http_headers field --- test/test_YoutubeDL.py | 46 ++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 055e42555..db8a47d2d 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -15,6 +15,8 @@ from youtube_dl import YoutubeDL from youtube_dl.extractor import YoutubeIE from youtube_dl.postprocessor.common import PostProcessor +TEST_URL = 'http://localhost/sample.mp4' + class YDL(FakeYDL): def __init__(self, *args, **kwargs): @@ -46,8 +48,8 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL() ydl.params['prefer_free_formats'] = True formats = [ - {'ext': 'webm', 'height': 460, 'url': 'x'}, - {'ext': 'mp4', 'height': 460, 'url': 'y'}, + {'ext': 'webm', 'height': 460, 'url': TEST_URL}, + {'ext': 'mp4', 'height': 460, 'url': TEST_URL}, ] info_dict = _make_result(formats) yie = YoutubeIE(ydl) @@ -60,8 +62,8 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL() ydl.params['prefer_free_formats'] = True formats = [ - {'ext': 'webm', 'height': 720, 'url': 'a'}, - {'ext': 'mp4', 'height': 1080, 'url': 'b'}, + {'ext': 'webm', 'height': 720, 'url': TEST_URL}, + {'ext': 'mp4', 'height': 1080, 'url': TEST_URL}, ] info_dict['formats'] = formats yie = YoutubeIE(ydl) @@ -74,9 +76,9 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL() ydl.params['prefer_free_formats'] = False formats = [ - {'ext': 'webm', 'height': 720, 'url': '_'}, - {'ext': 'mp4', 'height': 720, 'url': '_'}, - {'ext': 'flv', 'height': 720, 'url': '_'}, + {'ext': 'webm', 'height': 720, 'url': TEST_URL}, + {'ext': 'mp4', 'height': 720, 'url': TEST_URL}, + {'ext': 'flv', 'height': 720, 'url': TEST_URL}, ] info_dict['formats'] = formats yie = YoutubeIE(ydl) @@ -88,8 +90,8 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL() ydl.params['prefer_free_formats'] = False formats = [ - {'ext': 'flv', 'height': 720, 'url': '_'}, - {'ext': 'webm', 'height': 720, 'url': '_'}, + {'ext': 'flv', 'height': 720, 'url': TEST_URL}, + {'ext': 'webm', 'height': 720, 'url': TEST_URL}, ] info_dict['formats'] = formats yie = YoutubeIE(ydl) @@ -133,10 +135,10 @@ class TestFormatSelection(unittest.TestCase): def test_format_selection(self): formats = [ - {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'}, - {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'}, - {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'}, - {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'}, + {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, + {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL}, + {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL}, + {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL}, ] info_dict = _make_result(formats) @@ -167,10 +169,10 @@ class TestFormatSelection(unittest.TestCase): def test_format_selection_audio(self): formats = [ - {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'}, - {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'}, - {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'}, - {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'}, + {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, + {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, + {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL}, + {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL}, ] info_dict = _make_result(formats) @@ -185,8 +187,8 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(downloaded['format_id'], 'audio-low') formats = [ - {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'}, - {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'}, + {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, + {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL}, ] info_dict = _make_result(formats) @@ -228,9 +230,9 @@ class TestFormatSelection(unittest.TestCase): def test_format_selection_video(self): formats = [ - {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'}, - {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'}, - {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'}, + {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL}, + {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL}, + {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL}, ] info_dict = _make_result(formats) From 873383e9bd2496a31870e6456007463c1f873b33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 14 Mar 2015 21:41:15 +0100 Subject: [PATCH 0076/2721] tox.ini: Run the same command as 'make offlinetest' by default --- tox.ini | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index ed01e3386..6fe7fc8f5 100644 --- a/tox.ini +++ b/tox.ini @@ -4,5 +4,8 @@ envlist = py26,py27,py33 deps = nose coverage -commands = nosetests --verbose {posargs:test} # --with-coverage --cover-package=youtube_dl --cover-html +defaultargs = test --exclude test_download.py --exclude test_age_restriction.py + --exclude test_subtitles.py --exclude test_write_annotations.py + --exclude test_youtube_lists.py +commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html # test.test_download:TestDownload.test_NowVideo From 7c42327e0e2f62a64771769e17cda33ea2a4a10d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 14 Mar 2015 21:41:56 +0100 Subject: [PATCH 0077/2721] tox.ini: Add python 3.4 --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 6fe7fc8f5..00c6e00e3 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py26,py27,py33 +envlist = py26,py27,py33,py34 [testenv] deps = nose From 8f4cc22455f1c08f8e95f867f40c9f378bfe7a49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 15 Mar 2015 10:08:14 +0600 Subject: [PATCH 0078/2721] [aftenposten] Adapt to new URL format --- youtube_dl/extractor/aftenposten.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/aftenposten.py b/youtube_dl/extractor/aftenposten.py index 2b257ede7..e15c015fb 100644 --- a/youtube_dl/extractor/aftenposten.py +++ b/youtube_dl/extractor/aftenposten.py @@ -14,10 +14,10 @@ from ..utils import ( class AftenpostenIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html' + _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)' _TEST = { - 'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=§ion=webtv_serierogprogrammer_sweatshop_sweatshopenglish', + 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more', 'md5': 'fd828cd29774a729bf4d4425fe192972', 'info_dict': { 'id': '21039', @@ -30,12 +30,7 @@ class AftenpostenIE(InfoExtractor): } def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - video_id = self._html_search_regex( - r'data-xs-id="(\d+)"', webpage, 'video id') + video_id = self._match_id(url) data = self._download_xml( 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id) From 613b2d9dc6eaf515898516c654f79cee8951a269 Mon Sep 17 00:00:00 2001 From: Kang Hyojun <admire9@gmail.com> Date: Sun, 15 Mar 2015 20:18:23 +0900 Subject: [PATCH 0079/2721] Fix mistyped docstring indent --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e779fc9a8..5a83bc956 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -635,7 +635,7 @@ class YoutubeDL(object): Returns a list with a dictionary for each video we find. If 'download', also downloads the videos. extra_info is a dict containing the extra values to add to each result - ''' + ''' if ie_key: ies = [self.get_info_extractor(ie_key)] From ec1b9577ba31b7b078eec370680f84909b2e70e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 15 Mar 2015 22:42:13 +0600 Subject: [PATCH 0080/2721] [cloudy] Fix key extraction (Closes #5211) --- youtube_dl/extractor/cloudy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py index abf8cc280..0fa720ee8 100644 --- a/youtube_dl/extractor/cloudy.py +++ b/youtube_dl/extractor/cloudy.py @@ -105,6 +105,7 @@ class CloudyIE(InfoExtractor): webpage = self._download_webpage(url, video_id) file_key = self._search_regex( - r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key') + [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'], + webpage, 'file_key') return self._extract_video(video_host, video_id, file_key) From 39aa42ffbbe4df95ded3e70d4126808898a43f24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 16 Mar 2015 00:21:38 +0600 Subject: [PATCH 0081/2721] [ard] Capture and output time restricted videos (Closes #5213) --- youtube_dl/extractor/ard.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 783b53e23..6a35ea463 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -50,6 +50,9 @@ class ARDMediathekIE(InfoExtractor): if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage: raise ExtractorError('Video %s is no longer available' % video_id, expected=True) + if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage: + raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True) + if re.search(r'[\?&]rss($|[=&])', url): doc = parse_xml(webpage) if doc.tag == 'rss': From 1de4ac1385bf0bedd753f41496055c53a14c20ca Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 15 Mar 2015 19:38:50 +0100 Subject: [PATCH 0082/2721] release 2015.03.15 --- README.md | 3 +++ docs/supportedsites.md | 8 ++++++++ youtube_dl/version.py | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f4fbf0034..4f9fc8174 100644 --- a/README.md +++ b/README.md @@ -228,6 +228,9 @@ which means you can modify it, redistribute it or use it however you like. --embed-subs embed subtitles in the video (only for mp4 videos) --embed-thumbnail embed thumbnail in the audio as cover art --add-metadata write metadata to the video file + --metadata-from-title FORMAT parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed + parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s - + %(title)s" matches a title like "Coldplay - Paradise" --xattrs write metadata to the video file's xattrs (using dublin core and xdg standards) --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; fix file if we can, warn otherwise) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 80a696ee3..d6a1e67c6 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -47,6 +47,7 @@ - **Bandcamp** - **Bandcamp:album** - **bbc.co.uk**: BBC iPlayer + - **BeatportPro** - **Beeg** - **BehindKink** - **Bet** @@ -145,6 +146,7 @@ - **Firstpost** - **Flickr** - **Folketinget**: Folketinget (ft.dk; Danish parliament) + - **FootyRoom** - **Foxgay** - **FoxNews** - **france2.fr:generation-quoi** @@ -213,6 +215,7 @@ - **jpopsuki.tv** - **Jukebox** - **Kaltura** + - **KanalPlay**: Kanal 5/9/11 Play - **Kankan** - **Karaoketv** - **keek** @@ -414,6 +417,7 @@ - **SportBox** - **SportDeutschland** - **SRMediathek**: Saarländischer Rundfunk + - **SSA** - **stanfordoc**: Stanford Open ClassRoom - **Steam** - **streamcloud.eu** @@ -510,6 +514,7 @@ - **Vidzi** - **vier** - **vier:videos** + - **Viewster** - **viki** - **vimeo** - **vimeo:album** @@ -556,6 +561,9 @@ - **XXXYMovies** - **Yahoo**: Yahoo screen and movies - **Yam** + - **yandexmusic:album**: Яндекс.Музыка - Альбом + - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист + - **yandexmusic:track**: Яндекс.Музыка - Трек - **YesJapan** - **Ynet** - **YouJizz** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1f0c88a4d..7ed07c375 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.03.09' +__version__ = '2015.03.15' From d5b559393b793f21f6566321fa08a6bbbe6f524e Mon Sep 17 00:00:00 2001 From: Eduardo Ferro <eduardo.ferro.aldama@gmail.com> Date: Sun, 15 Mar 2015 21:45:14 +0100 Subject: [PATCH 0083/2721] [rtve] Add new extractor for rtve infantil --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/rtve.py | 43 ++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ad133603f..1bb3e1a1c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -405,7 +405,7 @@ from .rtlnow import RTLnowIE from .rtl2 import RTL2IE from .rtp import RTPIE from .rts import RTSIE -from .rtve import RTVEALaCartaIE, RTVELiveIE +from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE from .ruhd import RUHDIE from .rutube import ( RutubeIE, diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index b42442d12..b701a322a 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -126,6 +126,49 @@ class RTVEALaCartaIE(InfoExtractor): (s['lang'], [{'ext': 'vtt', 'url': s['src']}]) for s in subs) +class RTVEInfantilIE(InfoExtractor): + IE_NAME = 'rtve.es:alacarta' + IE_DESC = 'RTVE a la carta' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_tittle>[^/]*)/(?P<id>[0-9]+)/' + + _TESTS = [{ + 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', + 'md5': '915319587b33720b8e0357caaa6617e6', + 'info_dict': { + 'id': '3040283', + 'ext': 'mp4', + 'title': 'Maneras de vivir', + 'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG', + 'duration': 357.958, + }, + },] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + short_tittle = mobj.group('short_tittle') + info = self._download_json( + 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, + video_id)['page']['items'][0] + + webpage = self._download_webpage(url, video_id) + vidplayer_id = self._search_regex( + r' id="vidplayer([0-9]+)"', webpage, 'internal video ID') + + png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id + png = self._download_webpage(png_url, video_id, 'Downloading url information') + video_url = _decrypt_url(png) + + return { + 'id': video_id, + 'ext': 'mp4', + 'title': info['title'], + 'url': video_url, + 'thumbnail': info.get('image'), + 'duration': float_or_none(info.get('duration'), scale=1000), + } + + class RTVELiveIE(InfoExtractor): IE_NAME = 'rtve.es:live' From b68eedba23cbb4cc419e3568055bfb86c5975054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 15 Mar 2015 22:17:40 +0100 Subject: [PATCH 0084/2721] [rtve.es:infantil] Minor fixes (closes #5214) --- youtube_dl/extractor/rtve.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index b701a322a..13f071077 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -126,10 +126,11 @@ class RTVEALaCartaIE(InfoExtractor): (s['lang'], [{'ext': 'vtt', 'url': s['src']}]) for s in subs) + class RTVEInfantilIE(InfoExtractor): - IE_NAME = 'rtve.es:alacarta' - IE_DESC = 'RTVE a la carta' - _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_tittle>[^/]*)/(?P<id>[0-9]+)/' + IE_NAME = 'rtve.es:infantil' + IE_DESC = 'RTVE infantil' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/' _TESTS = [{ 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', @@ -141,12 +142,10 @@ class RTVEInfantilIE(InfoExtractor): 'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG', 'duration': 357.958, }, - },] + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - short_tittle = mobj.group('short_tittle') + video_id = self._match_id(url) info = self._download_json( 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, video_id)['page']['items'][0] @@ -169,7 +168,6 @@ class RTVEInfantilIE(InfoExtractor): } - class RTVELiveIE(InfoExtractor): IE_NAME = 'rtve.es:live' IE_DESC = 'RTVE.es live streams' From 90183a46d85758c59814187d9c777e4237a66afe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 15 Mar 2015 22:49:03 +0100 Subject: [PATCH 0085/2721] Credit @eferro for the rtve.es:infantil extractor (#5214) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index c10f03b98..872da6071 100644 --- a/AUTHORS +++ b/AUTHORS @@ -115,3 +115,4 @@ Leslie P. Polzer Duncan Keall Alexander Mamay Devin J. Pohly +Eduardo Ferro Aldama From 2e90dff2c2ecade8afb444b086fbc0ad6d2c812d Mon Sep 17 00:00:00 2001 From: felix <m.p.isaev@yandex.com> Date: Mon, 16 Mar 2015 20:05:02 +0100 Subject: [PATCH 0086/2721] The Daily Show Podcast support --- youtube_dl/extractor/__init__.py | 3 +- youtube_dl/extractor/comedycentral.py | 21 ++++++++++++++ youtube_dl/extractor/libsyn.py | 41 +++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/libsyn.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1bb3e1a1c..e94779d40 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -84,7 +84,7 @@ from .cnn import ( ) from .collegehumor import CollegeHumorIE from .collegerama import CollegeRamaIE -from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE +from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE, TheDailyShowPodcastIE from .comcarcoff import ComCarCoffIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .condenast import CondeNastIE @@ -250,6 +250,7 @@ from .letv import ( LetvPlaylistIE ) from .lifenews import LifeNewsIE +from .libsyn import LibsynIE from .liveleak import LiveLeakIE from .livestream import ( LivestreamIE, diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index e5edcc84b..e427b9821 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +from .common import InfoExtractor from .mtv import MTVServicesInfoExtractor from ..compat import ( compat_str, @@ -272,3 +273,23 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): 'title': show_name + ' ' + title, 'description': description, } + +class TheDailyShowPodcastIE(InfoExtractor): + _VALID_URL = r'(?P<scheme>https?:)?//thedailyshow\.cc\.com/podcast/(?P<id>[a-z\-]+)' + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + player_url = self._search_regex(r'<iframe(?:\s+[^>]+)?\s*src="((?:https?:)?//html5-player\.libsyn\.com/embed/episode/id/[0-9]+)', webpage, 'player URL') + if player_url.startswith('//'): + mobj = re.match(self._VALID_URL, url) + scheme = mobj.group('scheme') + if not scheme: + scheme = 'https:' + player_url = scheme + player_url + + return { + '_type': 'url_transparent', + 'url': player_url, + } diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py new file mode 100644 index 000000000..4b5029f89 --- /dev/null +++ b/youtube_dl/extractor/libsyn.py @@ -0,0 +1,41 @@ +# encoding: utf-8 +from .common import InfoExtractor +from ..utils import ( + unified_strdate, +) + +class LibsynIE(InfoExtractor): + _VALID_URL = r'(?:https?:)?//html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)(?:/.*)?' + + def _real_extract(self, url): + if url.startswith('//'): + url = 'https:' + url + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + podcast_title = self._search_regex(r'<h2>(.*?)</h2>', webpage, 'show title') + podcast_episode_title = self._search_regex(r'<h3>(.*?)</h3>', webpage, 'episode title') + podcast_date = unified_strdate(self._search_regex(r'<div class="release_date">Released: (.*?)</div>', webpage, 'release date')) + podcast_description = self._search_regex(r'<div id="info_text_body">(.*?)</div>', webpage, 'description') + + url0 = self._search_regex(r'var mediaURLLibsyn = "(?P<url0>https?://.*)";', webpage, 'first media URL') + url1 = self._search_regex(r'var mediaURL = "(?P<url1>https?://.*)";', webpage, 'second media URL') + + if url0 != url1: + formats = [{ + 'url': url0 + }, { + 'url': url1 + }] + else: + formats = [{ + 'url': url0 + }] + + return { + 'id': display_id, + 'title': podcast_episode_title, + 'description': podcast_description, + 'upload_date': podcast_date, + 'formats': formats, + } From c06a9f8730f41fb24835d482114403f227bc8421 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 17 Mar 2015 19:42:50 +0600 Subject: [PATCH 0087/2721] [arte+7] Check formats (Closes #5224) --- youtube_dl/extractor/arte.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 929dd3cc5..8273bd6c9 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor): formats.append(format) + self._check_formats(formats, video_id) self._sort_formats(formats) info_dict['formats'] = formats From 576904bce64ca036a7b21b43fa3f8c023e0bcdb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 17 Mar 2015 20:01:31 +0600 Subject: [PATCH 0088/2721] [letv] Clarify download message --- youtube_dl/extractor/letv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index 9ed81a199..1484ac0d2 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -94,7 +94,7 @@ class LetvIE(InfoExtractor): play_json = self._download_json( play_json_req, - media_id, 'playJson data') + media_id, 'Downloading playJson data') # Check for errors playstatus = play_json['playstatus'] From 733be371af58ec63be5faa52ee24cab4dd85d388 Mon Sep 17 00:00:00 2001 From: Jeff Buchbinder <jeff@ourexchange.net> Date: Thu, 15 Jan 2015 21:28:57 -0500 Subject: [PATCH 0089/2721] Add megavideoz.eu support. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/megavideozeu.py | 39 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 youtube_dl/extractor/megavideozeu.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1bb3e1a1c..5316af2d1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -267,6 +267,7 @@ from .macgamestore import MacGameStoreIE from .mailru import MailRuIE from .malemotion import MalemotionIE from .mdr import MDRIE +from .megavideozeu import MegavideozeuIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE diff --git a/youtube_dl/extractor/megavideozeu.py b/youtube_dl/extractor/megavideozeu.py new file mode 100644 index 000000000..e77b5f734 --- /dev/null +++ b/youtube_dl/extractor/megavideozeu.py @@ -0,0 +1,39 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_filesize, + unified_strdate, +) + + +class MegavideozeuIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?megavideoz\.eu/video/(?P<id>.*)(?:.*)' + + def _real_extract(self, url): + tmp_video_id = self._match_id(url) + + webpage = self._download_webpage(url, tmp_video_id) + + config_php = self._html_search_regex( + r'var cnf = \'([^\']+)\'', webpage, 'config.php url') + + configpage = self._download_webpage(config_php, tmp_video_id) + + video_id = self._html_search_regex( + r'<mediaid>([^<]+)', configpage, 'video id') + video_url = self._html_search_regex( + r'<file>([^<]+)', configpage, 'video URL') + title = self._html_search_regex( + r'<title><!\[CDATA\[([^\]]+)', configpage, 'title') + duration = int_or_none(self._html_search_regex( + r'<duration>([0-9]+)', configpage, 'duration', fatal=False)) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'duration': duration + } From d41a3fa1b47fc050e206bd16feed02b6a62e7d2d Mon Sep 17 00:00:00 2001 From: Jeff Buchbinder <jeff@ourexchange.net> Date: Wed, 4 Mar 2015 15:02:13 -0500 Subject: [PATCH 0090/2721] [Primesharetv] Add primeshare.tv extractor, still need test data --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/primesharetv.py | 46 ++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 youtube_dl/extractor/primesharetv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5316af2d1..b02812365 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -381,6 +381,7 @@ from .pornhub import ( ) from .pornotube import PornotubeIE from .pornoxo import PornoXOIE +from .primesharetv import PrimesharetvIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .puls4 import Puls4IE diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py new file mode 100644 index 000000000..967125abc --- /dev/null +++ b/youtube_dl/extractor/primesharetv.py @@ -0,0 +1,46 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_filesize, + unified_strdate, + urlencode_postdata, +) +from ..compat import ( + compat_urllib_request, +) + +class PrimesharetvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>.*)(?:.*)' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + self._sleep(9, video_id) + + hashtoken = self._search_regex(r' name="hash" value="(.*?)" ', webpage, 'hash token') + data = urlencode_postdata({ + 'hash': hashtoken, + }) + headers = { + 'Referer': url, + 'Content-Type': 'application/x-www-form-urlencoded', + } + video_page_request = compat_urllib_request.Request(url, data, headers=headers) + video_page = self._download_webpage(video_page_request, None, False, '') + + video_url = self._html_search_regex( + r'url: \'(http://l\.primeshare\.tv[^\']+)\',', video_page, 'video url') + + title = self._html_search_regex( + r'<h1>Watch [^\(]+\(([^/)]+)\) ', video_page, 'title') + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'ext': 'mp4', + } From af69cab21d48da78ff7d2b54effb9dfd5fcfd1d8 Mon Sep 17 00:00:00 2001 From: Jeff Buchbinder <jeff@ourexchange.net> Date: Wed, 4 Mar 2015 15:18:06 -0500 Subject: [PATCH 0091/2721] [Primesharetv] Add public domain example video --- youtube_dl/extractor/primesharetv.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py index 967125abc..7c545761b 100644 --- a/youtube_dl/extractor/primesharetv.py +++ b/youtube_dl/extractor/primesharetv.py @@ -15,6 +15,19 @@ from ..compat import ( class PrimesharetvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>.*)(?:.*)' + _TESTS = [ + { + 'url': 'http://primeshare.tv/download/238790B611', + 'md5': 'bb41f9f6c0dd434c729f04ce5b677192', + 'info_dict': { + 'id': '238790B611', + 'ext': 'mp4', + "title": "Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona [...]", + "duration": 10, + }, + } + ] + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -33,7 +46,7 @@ class PrimesharetvIE(InfoExtractor): video_page = self._download_webpage(video_page_request, None, False, '') video_url = self._html_search_regex( - r'url: \'(http://l\.primeshare\.tv[^\']+)\',', video_page, 'video url') + r'url: \'(http://[a-z0-9]+\.primeshare\.tv:443/file/get/[^\']+)\',', video_page, 'video url') title = self._html_search_regex( r'<h1>Watch [^\(]+\(([^/)]+)\) ', video_page, 'title') From 13047f41359e32bf56bdc8dd555bae6c1065512d Mon Sep 17 00:00:00 2001 From: Jeff Buchbinder <jeff@ourexchange.net> Date: Wed, 4 Mar 2015 17:38:21 -0500 Subject: [PATCH 0092/2721] [Primesharetv] Handle file not existing properly. --- youtube_dl/extractor/primesharetv.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py index 7c545761b..570fd2210 100644 --- a/youtube_dl/extractor/primesharetv.py +++ b/youtube_dl/extractor/primesharetv.py @@ -1,8 +1,11 @@ # encoding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, parse_filesize, unified_strdate, @@ -31,10 +34,12 @@ class PrimesharetvIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + if re.search(r'<h1>File not exist</h1>', webpage) is not None: + raise ExtractorError('The file does not exist', expected=True) + hashtoken = self._search_regex(r' name="hash" value="(.*?)" ', webpage, 'hash token') self._sleep(9, video_id) - hashtoken = self._search_regex(r' name="hash" value="(.*?)" ', webpage, 'hash token') data = urlencode_postdata({ 'hash': hashtoken, }) @@ -44,7 +49,6 @@ class PrimesharetvIE(InfoExtractor): } video_page_request = compat_urllib_request.Request(url, data, headers=headers) video_page = self._download_webpage(video_page_request, None, False, '') - video_url = self._html_search_regex( r'url: \'(http://[a-z0-9]+\.primeshare\.tv:443/file/get/[^\']+)\',', video_page, 'video url') @@ -57,3 +61,8 @@ class PrimesharetvIE(InfoExtractor): 'title': title, 'ext': 'mp4', } + + def _debug_print(self, txt): + if self._downloader.params.get('verbose'): + self.to_screen('[debug] %s' % txt) + From 0499cd866e7e746658f33e6c2f44f7e1e699ad1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 17 Mar 2015 21:06:38 +0600 Subject: [PATCH 0093/2721] [primesharetv] Clean up --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/primesharetv.py | 83 ++++++++++++++-------------- 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b02812365..3c5401145 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -381,7 +381,7 @@ from .pornhub import ( ) from .pornotube import PornotubeIE from .pornoxo import PornoXOIE -from .primesharetv import PrimesharetvIE +from .primesharetv import PrimeShareTVIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .puls4 import Puls4IE diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py index 570fd2210..01cc3d9ea 100644 --- a/youtube_dl/extractor/primesharetv.py +++ b/youtube_dl/extractor/primesharetv.py @@ -1,59 +1,65 @@ -# encoding: utf-8 from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - parse_filesize, - unified_strdate, - urlencode_postdata, -) from ..compat import ( + compat_urllib_parse, compat_urllib_request, ) +from ..utils import ExtractorError -class PrimesharetvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>.*)(?:.*)' - _TESTS = [ - { - 'url': 'http://primeshare.tv/download/238790B611', - 'md5': 'bb41f9f6c0dd434c729f04ce5b677192', - 'info_dict': { - 'id': '238790B611', - 'ext': 'mp4', - "title": "Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona [...]", - "duration": 10, - }, - } - ] +class PrimeShareTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)' + + _TEST = { + 'url': 'http://primeshare.tv/download/238790B611', + 'md5': 'b92d9bf5461137c36228009f31533fbc', + 'info_dict': { + 'id': '238790B611', + 'ext': 'mp4', + 'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona', + }, + } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - if re.search(r'<h1>File not exist</h1>', webpage) is not None: - raise ExtractorError('The file does not exist', expected=True) - hashtoken = self._search_regex(r' name="hash" value="(.*?)" ', webpage, 'hash token') - - self._sleep(9, video_id) - - data = urlencode_postdata({ - 'hash': hashtoken, - }) + + if '>File not exist<' in webpage: + raise ExtractorError('Video %s does not exist' % video_id, expected=True) + + fields = dict(re.findall(r'''(?x)<input\s+ + type="hidden"\s+ + name="([^"]+)"\s+ + (?:id="[^"]+"\s+)? + value="([^"]*)" + ''', webpage)) + headers = { 'Referer': url, 'Content-Type': 'application/x-www-form-urlencoded', } - video_page_request = compat_urllib_request.Request(url, data, headers=headers) - video_page = self._download_webpage(video_page_request, None, False, '') - video_url = self._html_search_regex( - r'url: \'(http://[a-z0-9]+\.primeshare\.tv:443/file/get/[^\']+)\',', video_page, 'video url') + + wait_time = int(self._search_regex( + r'var\s+cWaitTime\s*=\s*(\d+)', + webpage, 'wait time', default=7)) + 1 + self._sleep(wait_time, video_id) + + req = compat_urllib_request.Request( + url, compat_urllib_parse.urlencode(fields), headers) + video_page = self._download_webpage( + req, video_id, 'Downloading video page') + + video_url = self._search_regex( + r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'", + video_page, 'video url') title = self._html_search_regex( - r'<h1>Watch [^\(]+\(([^/)]+)\) ', video_page, 'title') + r'<h1>Watch\s*(?: )?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?: )?\s*<strong>', + video_page, 'title') return { 'id': video_id, @@ -61,8 +67,3 @@ class PrimesharetvIE(InfoExtractor): 'title': title, 'ext': 'mp4', } - - def _debug_print(self, txt): - if self._downloader.params.get('verbose'): - self.to_screen('[debug] %s' % txt) - From 219da6bb685765186b7ffb878399c32f44351802 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 17 Mar 2015 21:13:42 +0600 Subject: [PATCH 0094/2721] [megavideoeu] Remove extractor --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/megavideozeu.py | 39 ---------------------------- 2 files changed, 40 deletions(-) delete mode 100644 youtube_dl/extractor/megavideozeu.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3c5401145..bceed92e1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -267,7 +267,6 @@ from .macgamestore import MacGameStoreIE from .mailru import MailRuIE from .malemotion import MalemotionIE from .mdr import MDRIE -from .megavideozeu import MegavideozeuIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE diff --git a/youtube_dl/extractor/megavideozeu.py b/youtube_dl/extractor/megavideozeu.py deleted file mode 100644 index e77b5f734..000000000 --- a/youtube_dl/extractor/megavideozeu.py +++ /dev/null @@ -1,39 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_filesize, - unified_strdate, -) - - -class MegavideozeuIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?megavideoz\.eu/video/(?P<id>.*)(?:.*)' - - def _real_extract(self, url): - tmp_video_id = self._match_id(url) - - webpage = self._download_webpage(url, tmp_video_id) - - config_php = self._html_search_regex( - r'var cnf = \'([^\']+)\'', webpage, 'config.php url') - - configpage = self._download_webpage(config_php, tmp_video_id) - - video_id = self._html_search_regex( - r'<mediaid>([^<]+)', configpage, 'video id') - video_url = self._html_search_regex( - r'<file>([^<]+)', configpage, 'video URL') - title = self._html_search_regex( - r'<title><!\[CDATA\[([^\]]+)', configpage, 'title') - duration = int_or_none(self._html_search_regex( - r'<duration>([0-9]+)', configpage, 'duration', fatal=False)) - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'duration': duration - } From 92a4793b3cac36677bed5423b12b3d97d15c6036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 17 Mar 2015 21:34:22 +0600 Subject: [PATCH 0095/2721] [utils] Place sanitize url function near other sanitizing functions --- test/test_utils.py | 37 +++++++++++++++++++++---------------- youtube_dl/utils.py | 22 +++++++--------------- 2 files changed, 28 insertions(+), 31 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 4f0ffd482..3431ad24e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -39,6 +39,7 @@ from youtube_dl.utils import ( read_batch_urls, sanitize_filename, sanitize_path, + sanitize_url_path_consecutive_slashes, shell_quote, smuggle_url, str_to_int, @@ -55,7 +56,6 @@ from youtube_dl.utils import ( xpath_with_ns, render_table, match_str, - url_sanitize_consecutive_slashes, ) @@ -169,6 +169,26 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc') + def test_sanitize_url_path_consecutive_slashes(self): + self.assertEqual( + sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'), + 'http://hostname/foo/bar/filename.html') + self.assertEqual( + sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'), + 'http://hostname/foo/bar/filename.html') + self.assertEqual( + sanitize_url_path_consecutive_slashes('http://hostname//'), + 'http://hostname/') + self.assertEqual( + sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'), + 'http://hostname/foo/bar/filename.html') + self.assertEqual( + sanitize_url_path_consecutive_slashes('http://hostname/'), + 'http://hostname/') + self.assertEqual( + sanitize_url_path_consecutive_slashes('http://hostname/abc//'), + 'http://hostname/abc/') + def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) @@ -539,21 +559,6 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') 'like_count > 100 & dislike_count <? 50 & description', {'like_count': 190, 'dislike_count': 10})) - def test_url_sanitize_consecutive_slashes(self): - self.assertEqual(url_sanitize_consecutive_slashes( - 'http://hostname/foo//bar/filename.html'), - 'http://hostname/foo/bar/filename.html') - self.assertEqual(url_sanitize_consecutive_slashes( - 'http://hostname//foo/bar/filename.html'), - 'http://hostname/foo/bar/filename.html') - self.assertEqual(url_sanitize_consecutive_slashes( - 'http://hostname//'), 'http://hostname/') - self.assertEqual(url_sanitize_consecutive_slashes( - 'http://hostname/foo/bar/filename.html'), - 'http://hostname/foo/bar/filename.html') - self.assertEqual(url_sanitize_consecutive_slashes( - 'http://hostname/'), 'http://hostname/') - if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e82e3998a..472d4df41 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -326,6 +326,13 @@ def sanitize_path(s): return os.path.join(*sanitized_path) +def sanitize_url_path_consecutive_slashes(url): + """Collapses consecutive slashes in URLs' path""" + parsed_url = list(compat_urlparse.urlparse(url)) + parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2]) + return compat_urlparse.urlunparse(parsed_url) + + def orderedSet(iterable): """ Remove all duplicates from the input iterable """ res = [] @@ -1804,18 +1811,3 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): return None # No Proxy return compat_urllib_request.ProxyHandler.proxy_open( self, req, proxy, type) - - -def url_sanitize_consecutive_slashes(url): - """Sanitize URLs with consecutive slashes - - For example, transform both - http://hostname/foo//bar/filename.html - and - http://hostname//foo/bar/filename.html - into - http://hostname/foo/bar/filename.html - """ - parsed_url = list(compat_urlparse.urlparse(url)) - parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2]) - return compat_urlparse.urlunparse(parsed_url) From cd459b1d490ca8c0639220a835f5e6bee3e9a80d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 17 Mar 2015 21:39:31 +0600 Subject: [PATCH 0096/2721] [sohu] Fix test's note info --- youtube_dl/extractor/sohu.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index ea5cc06b9..11edf616a 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -8,7 +8,7 @@ from ..compat import ( compat_str, compat_urllib_request ) -from ..utils import url_sanitize_consecutive_slashes +from ..utils import sanitize_url_path_consecutive_slashes class SohuIE(InfoExtractor): @@ -74,7 +74,7 @@ class SohuIE(InfoExtractor): } }] }, { - 'info': 'Video with title containing dash', + 'note': 'Video with title containing dash', 'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml', 'info_dict': { 'id': '78932792', @@ -100,8 +100,9 @@ class SohuIE(InfoExtractor): if cn_verification_proxy: req.add_header('Ytdl-request-proxy', cn_verification_proxy) - return self._download_json(req, video_id, - 'Downloading JSON data for %s' % vid_id) + return self._download_json( + req, video_id, + 'Downloading JSON data for %s' % vid_id) mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -146,7 +147,7 @@ class SohuIE(InfoExtractor): part_info = part_str.split('|') - video_url = url_sanitize_consecutive_slashes( + video_url = sanitize_url_path_consecutive_slashes( '%s%s?key=%s' % (part_info[0], su[i], part_info[3])) formats.append({ From 2ca1c5aa9f8d7db08608de0bd99f6c994ab496d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 17 Mar 2015 22:27:33 +0600 Subject: [PATCH 0097/2721] [douyutv] Improve and extract all formats --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/douyutv.py | 72 ++++++++++++++++++++------------ 2 files changed, 46 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f3901bfc3..73c17aa84 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -107,7 +107,7 @@ from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .dfb import DFBIE from .dotsub import DotsubIE -from .douyutv import DouyutvIE +from .douyutv import DouyuTVIE from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE from .drtuber import DrTuberIE diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index e9b92eb3b..d7956e6e4 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -2,58 +2,76 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ExtractorError -from ..utils import ( - ExtractorError, -) -class DouyutvIE(InfoExtractor): +class DouyuTVIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)' - - ''' - show_status: 1 直播中 ,2 没有直播 - ''' - _TEST = { 'url': 'http://www.douyutv.com/iseven', 'info_dict': { 'id': 'iseven', - 'title': '清晨醒脑!T-ara根本停不下来!', 'ext': 'flv', + 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:9e525642c25a0a24302869937cf69d17', 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': '7师傅', + 'uploader_id': '431925', 'is_live': True, + }, + 'params': { + 'skip_download': True, } } def _real_extract(self, url): video_id = self._match_id(url) - info_url = 'http://www.douyutv.com/api/client/room/' + video_id - config = self._download_json(info_url, video_id) + config = self._download_json( + 'http://www.douyutv.com/api/client/room/%s' % video_id, video_id) - error_code = config.get('error') - show_status = config['data'].get('show_status') + data = config['data'] + + error_code = config.get('error', 0) + show_status = data.get('show_status') if error_code is not 0: - raise ExtractorError('Server reported error %i' % error_code, - expected=True) + raise ExtractorError( + 'Server reported error %i' % error_code, expected=True) + # 1 = live, 2 = offline if show_status == '2': - raise ExtractorError('The live show has not yet started', - expected=True) + raise ExtractorError( + 'Live stream is offline', expected=True) - title = config['data'].get('room_name') - rtmp_url = config['data'].get('rtmp_url') - rtmp_live = config['data'].get('rtmp_live') - thumbnail = config['data'].get('room_src') + base_url = data['rtmp_url'] + live_path = data['rtmp_live'] - url = rtmp_url+'/'+rtmp_live + title = self._live_title(data['room_name']) + description = data.get('show_details') + thumbnail = data.get('room_src') + + uploader = data.get('nickname') + uploader_id = data.get('owner_uid') + + multi_formats = data.get('rtmp_multi_bitrate') + if not isinstance(multi_formats, dict): + multi_formats = {} + multi_formats['live'] = live_path + + formats = [{ + 'url': '%s/%s' % (base_url, format_path), + 'format_id': format_id, + 'preference': 1 if format_id == 'live' else 0, + } for format_id, format_path in multi_formats.items()] + self._sort_formats(formats) return { 'id': video_id, 'title': title, - 'ext':'flv', - 'url': url, + 'description': description, 'thumbnail': thumbnail, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'formats': formats, 'is_live': True, - # TODO more properties (see youtube_dl/extractor/common.py) - } \ No newline at end of file + } From 98f02fdde25bf00fbccfb211abb574f80819a177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 17 Mar 2015 22:33:05 +0600 Subject: [PATCH 0098/2721] Credit @jbuchbinder for primesharetv (#5123) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 872da6071..512469f4c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -116,3 +116,4 @@ Duncan Keall Alexander Mamay Devin J. Pohly Eduardo Ferro Aldama +Jeff Buchbinder From b1337948ebeeacfcf104d513fac5913d3a5818a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 17 Mar 2015 23:13:43 +0600 Subject: [PATCH 0099/2721] [grooveshark] Fix extraction --- youtube_dl/extractor/grooveshark.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/grooveshark.py b/youtube_dl/extractor/grooveshark.py index 848d17beb..36ad4915c 100644 --- a/youtube_dl/extractor/grooveshark.py +++ b/youtube_dl/extractor/grooveshark.py @@ -140,9 +140,9 @@ class GroovesharkIE(InfoExtractor): if webpage is not None: o = GroovesharkHtmlParser.extract_object_tags(webpage) - return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']) + return webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'] - return (webpage, None) + return webpage, None def _real_initialize(self): self.ts = int(time.time() * 1000) # timestamp in millis @@ -154,7 +154,7 @@ class GroovesharkIE(InfoExtractor): swf_referer = None if self.do_playerpage_request: (_, player_objs) = self._get_playerpage(url) - if player_objs is not None: + if player_objs: swf_referer = self._build_swf_referer(url, player_objs[0]) self.to_screen('SWF Referer: %s' % swf_referer) From 84f810160667f9129e6a8d841fae2d0e2e1fec86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 17 Mar 2015 23:51:40 +0600 Subject: [PATCH 0100/2721] [generic] Follow redirects specified by `Refresh` HTTP header --- youtube_dl/extractor/generic.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4e6927b08..0b8d96c27 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1270,8 +1270,14 @@ class GenericIE(InfoExtractor): if not found: found = re.search( r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' - r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)', + r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)', webpage) + if not found: + # Look also in Refresh HTTP header + refresh_header = head_response.headers.get('Refresh') + if refresh_header: + found = re.search( + r'[0-9]{,2};\s*(?:URL|url)=(.+)', refresh_header) if found: new_url = found.group(1) self.report_following_redirect(new_url) From 9ef4f12b534578ae3d3e47815492c90826c03c36 Mon Sep 17 00:00:00 2001 From: felix <m.p.isaev@yandex.com> Date: Tue, 17 Mar 2015 18:54:36 +0100 Subject: [PATCH 0101/2721] testcases for libsyn and The Daily Show Podcast extractors --- youtube_dl/extractor/comedycentral.py | 4 ++++ youtube_dl/extractor/libsyn.py | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index e427b9821..bd3817b56 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -276,6 +276,10 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): class TheDailyShowPodcastIE(InfoExtractor): _VALID_URL = r'(?P<scheme>https?:)?//thedailyshow\.cc\.com/podcast/(?P<id>[a-z\-]+)' + _TESTS = [{ + "url": "http://thedailyshow.cc.com/podcast/episodetwelve", + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py index 4b5029f89..88379f276 100644 --- a/youtube_dl/extractor/libsyn.py +++ b/youtube_dl/extractor/libsyn.py @@ -6,6 +6,15 @@ from ..utils import ( class LibsynIE(InfoExtractor): _VALID_URL = r'(?:https?:)?//html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)(?:/.*)?' + _TESTS = [{ + 'url': "http://html5-player.libsyn.com/embed/episode/id/3377616/", + 'info_dict': { + 'id': "3377616", + 'ext': "mp3", + 'title': "Episode 12: Bassem Youssef: Egypt's Jon Stewart", + 'description': "<p>Bassem Youssef joins executive producer Steve Bodow and senior producer Sara Taksler for a conversation about how <em style=\"font-family: Tahoma, Geneva, sans-serif; font-size: 12.8000001907349px;\">The Daily Show</em> inspired Bassem to create <em style=\"font-family: Tahoma, Geneva, sans-serif; font-size: 12.8000001907349px;\">Al-Bernameg</em>, his massively popular (and now banned) Egyptian news satire program. Sara discusses her soon-to-be-released documentary, <em style=\"font-family: Tahoma, Geneva, sans-serif; font-size: 12.8000001907349px;\">Tickling Giants</em>, which chronicles how Bassem and his staff risked their safety every day to tell jokes.</p>", + }, + }] def _real_extract(self, url): if url.startswith('//'): From ed9a25dd612fb06d9cf007a6491ac9982535a8f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 18 Mar 2015 00:05:40 +0600 Subject: [PATCH 0102/2721] [generic] Generalize redirect regex --- youtube_dl/extractor/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0b8d96c27..dc5755d12 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1268,16 +1268,16 @@ class GenericIE(InfoExtractor): # HTML5 video found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage) if not found: + REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' found = re.search( r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' - r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)', + r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX, webpage) if not found: # Look also in Refresh HTTP header refresh_header = head_response.headers.get('Refresh') if refresh_header: - found = re.search( - r'[0-9]{,2};\s*(?:URL|url)=(.+)', refresh_header) + found = re.search(REDIRECT_REGEX, refresh_header) if found: new_url = found.group(1) self.report_following_redirect(new_url) From c3c5c31517ce121740666440a71becaba11176ac Mon Sep 17 00:00:00 2001 From: Todoroki <todoroki.auone@gmail.com> Date: Wed, 18 Mar 2015 22:19:55 +0900 Subject: [PATCH 0103/2721] fix nm video DL issue when logged in --- youtube_dl/extractor/niconico.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 7fb4e57df..a48df7f62 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -89,7 +89,7 @@ class NiconicoIE(InfoExtractor): if self._AUTHENTICATED: # Get flv info flv_info_webpage = self._download_webpage( - 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, + 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1', video_id, 'Downloading flv info') else: # Get external player info @@ -97,7 +97,6 @@ class NiconicoIE(InfoExtractor): 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) thumb_play_key = self._search_regex( r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') - # Get flv info flv_info_data = compat_urllib_parse.urlencode({ 'k': thumb_play_key, From ee580538fa741fc35faa8210e0245e252bf23e40 Mon Sep 17 00:00:00 2001 From: Todoroki <todoroki.auone@gmail.com> Date: Wed, 18 Mar 2015 22:24:17 +0900 Subject: [PATCH 0104/2721] fix nm video DL issue when logged in --- youtube_dl/extractor/niconico.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index a48df7f62..4d57ac703 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -97,6 +97,7 @@ class NiconicoIE(InfoExtractor): 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) thumb_play_key = self._search_regex( r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') + # Get flv info flv_info_data = compat_urllib_parse.urlencode({ 'k': thumb_play_key, From 2c2c06e359dc3843a86585a9a4b6419f03af510e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 18 Mar 2015 20:28:00 +0600 Subject: [PATCH 0105/2721] [krasview] Fix extraction (Closes #5228) --- youtube_dl/extractor/krasview.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py index e46954b47..96f95979a 100644 --- a/youtube_dl/extractor/krasview.py +++ b/youtube_dl/extractor/krasview.py @@ -40,8 +40,10 @@ class KrasViewIE(InfoExtractor): description = self._og_search_description(webpage, default=None) thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage) duration = int_or_none(flashvars.get('duration')) - width = int_or_none(self._og_search_property('video:width', webpage, 'video width')) - height = int_or_none(self._og_search_property('video:height', webpage, 'video height')) + width = int_or_none(self._og_search_property( + 'video:width', webpage, 'video width', default=None)) + height = int_or_none(self._og_search_property( + 'video:height', webpage, 'video height', default=None)) return { 'id': video_id, From fa8ce2690447fa06a10398efd5b6dfa96c2ed1b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20Le=20N=C3=A9grate?= <roman.lenegrate@gmail.com> Date: Sun, 15 Mar 2015 22:32:06 +0100 Subject: [PATCH 0106/2721] [mixcloud] Fix extraction like-count --- youtube_dl/extractor/mixcloud.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 1831c6749..7c6c885f9 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -99,8 +99,7 @@ class MixcloudIE(InfoExtractor): r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) description = self._og_search_description(webpage) like_count = str_to_int(self._search_regex( - [r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"', - r'/favorites/?">([0-9]+)<'], + r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"', webpage, 'like count', fatal=False)) view_count = str_to_int(self._search_regex( [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', From 7a757b7194b1f1151857503baed3fa51d57397aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 18 Mar 2015 16:50:23 +0100 Subject: [PATCH 0107/2721] [mixcloud] Fix extraction of some metadata The second test had some wrong info. I couldn't find the timestamp, so I have removed it. --- youtube_dl/extractor/mixcloud.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 7c6c885f9..2fe55179a 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -10,7 +10,6 @@ from ..utils import ( ExtractorError, HEADRequest, str_to_int, - parse_iso8601, ) @@ -27,8 +26,6 @@ class MixcloudIE(InfoExtractor): 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', 'uploader': 'Daniel Holbach', 'uploader_id': 'dholbach', - 'upload_date': '20111115', - 'timestamp': 1321359578, 'thumbnail': 're:https?://.*\.jpg', 'view_count': int, 'like_count': int, @@ -37,12 +34,12 @@ class MixcloudIE(InfoExtractor): 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/', 'info_dict': { 'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat', - 'ext': 'm4a', - 'title': 'Electric Relaxation vol. 3', + 'ext': 'mp3', + 'title': 'Caribou 7 inch Vinyl Mix & Chat', 'description': 'md5:2b8aec6adce69f9d41724647c65875e8', - 'uploader': 'Daniel Drumz', + 'uploader': 'Gilles Peterson Worldwide', 'uploader_id': 'gillespeterson', - 'thumbnail': 're:https?://.*\.jpg', + 'thumbnail': 're:https?://.*/images/', 'view_count': int, 'like_count': int, }, @@ -85,7 +82,7 @@ class MixcloudIE(InfoExtractor): raise ExtractorError('Unable to extract track url') PREFIX = ( - r'<span class="play-button[^"]*?"' + r'm-play-on-spacebar[^>]+' r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+') title = self._html_search_regex( PREFIX + r'm-title="([^"]+)"', webpage, 'title') @@ -105,9 +102,6 @@ class MixcloudIE(InfoExtractor): [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', r'/listeners/?">([0-9,.]+)</a>'], webpage, 'play count', fatal=False)) - timestamp = parse_iso8601(self._search_regex( - r'<time itemprop="dateCreated" datetime="([^"]+)">', - webpage, 'upload date', default=None)) return { 'id': track_id, @@ -117,7 +111,6 @@ class MixcloudIE(InfoExtractor): 'thumbnail': thumbnail, 'uploader': uploader, 'uploader_id': uploader_id, - 'timestamp': timestamp, 'view_count': view_count, 'like_count': like_count, } From aae53774f247edcca7671434c16f7931d5c7b9da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20Le=20N=C3=A9grate?= <roman.lenegrate@gmail.com> Date: Mon, 16 Mar 2015 00:20:06 +0100 Subject: [PATCH 0108/2721] [mixcloud] Try preview server first, then further numbers --- youtube_dl/extractor/mixcloud.py | 49 ++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 2fe55179a..21aea0c55 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals import re +import itertools from .common import InfoExtractor from ..compat import ( @@ -45,20 +46,19 @@ class MixcloudIE(InfoExtractor): }, }] - def _get_url(self, track_id, template_url): - server_count = 30 - for i in range(server_count): - url = template_url % i + def _get_url(self, track_id, template_url, server_number): + boundaries = (1, 30) + for nr in server_numbers(server_number, boundaries): + url = template_url % nr try: # We only want to know if the request succeed # don't download the whole file self._request_webpage( HEADRequest(url), track_id, - 'Checking URL %d/%d ...' % (i + 1, server_count + 1)) + 'Checking URL %d/%d ...' % (nr, boundaries[-1])) return url except ExtractorError: pass - return None def _real_extract(self, url): @@ -72,12 +72,13 @@ class MixcloudIE(InfoExtractor): preview_url = self._search_regex( r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') song_url = preview_url.replace('/previews/', '/c/originals/') + server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number')) template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) - final_song_url = self._get_url(track_id, template_url) + final_song_url = self._get_url(track_id, template_url, server_number) if final_song_url is None: self.to_screen('Trying with m4a extension') template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') - final_song_url = self._get_url(track_id, template_url) + final_song_url = self._get_url(track_id, template_url, server_number) if final_song_url is None: raise ExtractorError('Unable to extract track url') @@ -114,3 +115,35 @@ class MixcloudIE(InfoExtractor): 'view_count': view_count, 'like_count': like_count, } + + +def server_numbers(first, boundaries): + """ Server numbers to try in descending order of probable availability. + Starting from first (i.e. the number of the server hosting the preview file) + and going further and further up to the higher boundary and down to the + lower one in an alternating fashion. Namely: + + server_numbers(2, (1, 5)) + + # Where the preview server is 2, min number is 1 and max is 5. + # Yields: 2, 3, 1, 4, 5 + + Why not random numbers or increasing sequences? Since from what I've seen, + full length files seem to be hosted on servers whose number is closer to + that of the preview; to be confirmed. + """ + zip_longest = getattr(itertools, 'zip_longest', None) + if zip_longest is None: + # python 2.x + zip_longest = itertools.izip_longest + + if len(boundaries) != 2: + raise ValueError("boundaries should be a two-element tuple") + min, max = boundaries + highs = range(first + 1, max + 1) + lows = range(first - 1, min - 1, -1) + rest = filter( + None, itertools.chain.from_iterable(zip_longest(highs, lows))) + yield first + for n in rest: + yield n From 3073a6d5e9036b0b613f57bc08099862a2af87f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 18 Mar 2015 23:08:18 +0600 Subject: [PATCH 0109/2721] [ultimedia] Add extractor Sponsored by thankyoumotion.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ultimedia.py | 91 +++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 youtube_dl/extractor/ultimedia.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 73c17aa84..867e7c935 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -534,6 +534,7 @@ from .twitch import ( TwitchStreamIE, ) from .ubu import UbuIE +from .ultimedia import UltimediaIE from .udemy import ( UdemyIE, UdemyCourseIE diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py new file mode 100644 index 000000000..97e4445d4 --- /dev/null +++ b/youtube_dl/extractor/ultimedia.py @@ -0,0 +1,91 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + qualities, + unified_strdate, + clean_html, +) + + +class UltimediaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P<id>[\d+a-z]+)' + _TESTS = [{ + # news + 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r', + 'md5': '276a0e49de58c7e85d32b057837952a2', + 'info_dict': { + 'id': 's8uk0r', + 'ext': 'mp4', + 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées', + 'description': 'md5:3e5c8fd65791487333dda5db8aed32af', + 'thumbnail': 're:^https?://.*\.jpg', + 'upload_date': '20150317', + }, + }, { + # music + 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8', + 'md5': '2ea3513813cf230605c7e2ffe7eca61c', + 'info_dict': { + 'id': 'xvpfp8', + 'ext': 'mp4', + 'title': "Two - C'est la vie (Clip)", + 'description': 'Two', + 'thumbnail': 're:^https?://.*\.jpg', + 'upload_date': '20150224', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + deliver_url = self._search_regex( + r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"', + webpage, 'deliver URL') + + deliver_page = self._download_webpage( + deliver_url, video_id, 'Downloading iframe page') + + player = self._parse_json( + self._search_regex( + r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'), + video_id) + + quality = qualities(['flash', 'html5']) + + formats = [{ + 'url': mode['config']['file'], + 'format_id': mode.get('type'), + 'quality': quality(mode.get('type')), + } for mode in player['modes']] + self._sort_formats(formats) + + thumbnail = player.get('image') + + title = clean_html(( + self._html_search_regex( + r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>', + webpage, 'title', default=None) + or self._search_regex( + r"var\s+nameVideo\s*=\s*'([^']+)'", + deliver_page, 'title'))) + + description = clean_html(self._html_search_regex( + r'(?s)<span>Description</span>(.+?)</p>', webpage, + 'description', fatal=False)) + + upload_date = unified_strdate(self._search_regex( + r'Ajouté le\s*<span>([^<]+)', webpage, + 'upload date', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'formats': formats, + } From d1dc7e39918ddfc3402a8ffd669e6c84ac971803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 18 Mar 2015 23:11:48 +0600 Subject: [PATCH 0110/2721] [ultimedia] Fix alphabetic order --- youtube_dl/extractor/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 867e7c935..7eb9b4fbb 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -534,11 +534,11 @@ from .twitch import ( TwitchStreamIE, ) from .ubu import UbuIE -from .ultimedia import UltimediaIE from .udemy import ( UdemyIE, UdemyCourseIE ) +from .ultimedia import UltimediaIE from .unistra import UnistraIE from .urort import UrortIE from .ustream import UstreamIE, UstreamChannelIE From 73900846b16d33f769d35ac945a97a66cc17fd5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 19 Mar 2015 00:53:26 +0600 Subject: [PATCH 0111/2721] [ultimedia] Capture and output unavailable video message --- youtube_dl/extractor/ultimedia.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py index 97e4445d4..0c1b08d7d 100644 --- a/youtube_dl/extractor/ultimedia.py +++ b/youtube_dl/extractor/ultimedia.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + ExtractorError, qualities, unified_strdate, clean_html, @@ -49,6 +50,10 @@ class UltimediaIE(InfoExtractor): deliver_page = self._download_webpage( deliver_url, video_id, 'Downloading iframe page') + if '>This video is currently not available' in deliver_page: + raise ExtractorError( + 'Video %s is currently not available' % video_id, expected=True) + player = self._parse_json( self._search_regex( r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'), From b30ef07c6ccb982cff623c34e7c5cec5d8eb9bb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 19 Mar 2015 01:06:39 +0600 Subject: [PATCH 0112/2721] [ultimedia] Handle youtube embeds --- youtube_dl/extractor/ultimedia.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py index 0c1b08d7d..06554a1be 100644 --- a/youtube_dl/extractor/ultimedia.py +++ b/youtube_dl/extractor/ultimedia.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -60,12 +62,18 @@ class UltimediaIE(InfoExtractor): video_id) quality = qualities(['flash', 'html5']) - - formats = [{ - 'url': mode['config']['file'], - 'format_id': mode.get('type'), - 'quality': quality(mode.get('type')), - } for mode in player['modes']] + formats = [] + for mode in player['modes']: + video_url = mode.get('config', {}).get('file') + if not video_url: + continue + if re.match(r'https?://www\.youtube\.com/.+?', video_url): + return self.url_result(video_url, 'Youtube') + formats.append({ + 'url': video_url, + 'format_id': mode.get('type'), + 'quality': quality(mode.get('type')), + }) self._sort_formats(formats) thumbnail = player.get('image') From cbc3cfcab41599f1b52e328878e19d15be1792d4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 18 Mar 2015 22:02:39 +0100 Subject: [PATCH 0113/2721] release 2015.03.18 --- docs/supportedsites.md | 4 ++++ youtube_dl/version.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d6a1e67c6..72b365305 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -112,6 +112,7 @@ - **Discovery** - **divxstage**: DivxStage - **Dotsub** + - **DouyuTV** - **DRBonanza** - **Dropbox** - **DrTuber** @@ -342,6 +343,7 @@ - **PornHubPlaylist** - **Pornotube** - **PornoXO** + - **PrimeShareTV** - **PromptFile** - **prosiebensat1**: ProSiebenSat.1 Digital - **Puls4** @@ -367,6 +369,7 @@ - **RTP** - **RTS**: RTS.ch - **rtve.es:alacarta**: RTVE a la carta + - **rtve.es:infantil**: RTVE infantil - **rtve.es:live**: RTVE.es live streams - **RUHD** - **rutube**: Rutube videos @@ -487,6 +490,7 @@ - **Ubu** - **udemy** - **udemy:course** + - **Ultimedia** - **Unistra** - **Urort**: NRK P3 Urørt - **ustream** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7ed07c375..51b4260aa 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.03.15' +__version__ = '2015.03.18' From 0ae8bbac2d12a883b8eb7a941c65c7b87987d213 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 19 Mar 2015 21:17:04 +0600 Subject: [PATCH 0114/2721] [nytimes] Support embed URL --- youtube_dl/extractor/nytimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index 56e1cad3b..5b57e95b6 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -7,7 +7,7 @@ from ..utils import parse_iso8601 class NYTimesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)' + _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)' _TEST = { 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263', From f3c0c667a6d8d1b58ca198b72a353887ea8c4f38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 19 Mar 2015 21:23:52 +0600 Subject: [PATCH 0115/2721] [nytimes] Modernize --- youtube_dl/extractor/nytimes.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index 5b57e95b6..d1cf8f4f3 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -1,9 +1,11 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import parse_iso8601 +from ..utils import ( + float_or_none, + int_or_none, + parse_iso8601, +) class NYTimesIE(InfoExtractor): @@ -25,15 +27,15 @@ class NYTimesIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) video_data = self._download_json( - 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON') + 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, + video_id, 'Downloading video JSON') title = video_data['headline'] - description = video_data['summary'] - duration = video_data['duration'] / 1000.0 + description = video_data.get('summary') + duration = float_or_none(video_data.get('duration'), 1000) uploader = video_data['byline'] timestamp = parse_iso8601(video_data['publication_date'][:-8]) @@ -49,11 +51,11 @@ class NYTimesIE(InfoExtractor): formats = [ { 'url': video['url'], - 'format_id': video['type'], - 'vcodec': video['video_codec'], - 'width': video['width'], - 'height': video['height'], - 'filesize': get_file_size(video['fileSize']), + 'format_id': video.get('type'), + 'vcodec': video.get('video_codec'), + 'width': int_or_none(video.get('width')), + 'height': int_or_none(video.get('height')), + 'filesize': get_file_size(video.get('fileSize')), } for video in video_data['renditions'] ] self._sort_formats(formats) @@ -61,7 +63,8 @@ class NYTimesIE(InfoExtractor): thumbnails = [ { 'url': 'http://www.nytimes.com/%s' % image['url'], - 'resolution': '%dx%d' % (image['width'], image['height']), + 'width': int_or_none(image.get('width')), + 'height': int_or_none(image.get('height')), } for image in video_data['images'] ] From 3378d67a18d2d06574698168120bf8c7f7b9e172 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 19 Mar 2015 21:26:57 +0600 Subject: [PATCH 0116/2721] [generic] Add support for nytimes embeds (Closes #5234) --- youtube_dl/extractor/generic.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index dc5755d12..8716e4503 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1006,6 +1006,13 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url')) + # Look for NYTimes player + mobj = re.search( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for Ooyala videos mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or From eecc0685c976abc8fdcebe24e5c3a76d89268c54 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Thu, 19 Mar 2015 19:37:39 +0200 Subject: [PATCH 0117/2721] [videomega] Fix extraction and update test (Fixes #5235) --- youtube_dl/extractor/videomega.py | 45 ++++++++++--------------------- 1 file changed, 14 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py index 273030316..eb309a7cd 100644 --- a/youtube_dl/extractor/videomega.py +++ b/youtube_dl/extractor/videomega.py @@ -4,28 +4,21 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, - compat_urllib_request, -) -from ..utils import ( - ExtractorError, - remove_start, -) +from ..compat import compat_urllib_request class VideoMegaIE(InfoExtractor): _VALID_URL = r'''(?x)https?:// (?:www\.)?videomega\.tv/ - (?:iframe\.php)?\?ref=(?P<id>[A-Za-z0-9]+) + (?:iframe\.php|cdn\.php)?\?ref=(?P<id>[A-Za-z0-9]+) ''' _TEST = { - 'url': 'http://videomega.tv/?ref=QR0HCUHI1661IHUCH0RQ', + 'url': 'http://videomega.tv/?ref=4GNA688SU99US886ANG4', 'md5': 'bf5c2f95c4c917536e80936af7bc51e1', 'info_dict': { - 'id': 'QR0HCUHI1661IHUCH0RQ', + 'id': '4GNA688SU99US886ANG4', 'ext': 'mp4', - 'title': 'Big Buck Bunny', + 'title': 'BigBuckBunny_320x180', 'thumbnail': 're:^https?://.*\.jpg$', } } @@ -33,34 +26,24 @@ class VideoMegaIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - iframe_url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id) + iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id req = compat_urllib_request.Request(iframe_url) req.add_header('Referer', url) webpage = self._download_webpage(req, video_id) - try: - escaped_data = re.findall(r'unescape\("([^"]+)"\)', webpage)[-1] - except IndexError: - raise ExtractorError('Unable to extract escaped data') - - playlist = compat_urllib_parse.unquote(escaped_data) - + title = self._html_search_regex( + r'<title>(.*?)', webpage, 'title') + title = re.sub( + r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s?|\s?-\svideomega\.tv$)', '', title) thumbnail = self._search_regex( - r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False) - video_url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL') - title = remove_start(self._html_search_regex( - r'(.*?)', webpage, 'title'), 'VideoMega.tv - ') - - formats = [{ - 'format_id': 'sd', - 'url': video_url, - }] - self._sort_formats(formats) + r']+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False) + video_url = self._search_regex( + r']+?src="([^"]+)"', webpage, 'video URL') return { 'id': video_id, 'title': title, - 'formats': formats, + 'url': video_url, 'thumbnail': thumbnail, 'http_headers': { 'Referer': iframe_url, From ccf3960eeca3ca8b75da709c8c3019531101f1e7 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Thu, 19 Mar 2015 20:55:05 +0200 Subject: [PATCH 0118/2721] [nytimes] Improve _VALID_URL (Fixes #5238) --- youtube_dl/extractor/nytimes.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index d1cf8f4f3..03f0a4de6 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -9,9 +9,9 @@ from ..utils import ( class NYTimesIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P\d+)' + _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263', 'md5': '18a525a510f942ada2720db5f31644c0', 'info_dict': { @@ -24,7 +24,10 @@ class NYTimesIE(InfoExtractor): 'uploader': 'Brett Weiner', 'duration': 419, } - } + }, { + 'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From 2684871bc10fe2b34f58312b6f75d3237a786732 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Fri, 20 Mar 2015 01:50:36 +0200 Subject: [PATCH 0119/2721] [vine] Fix formats extraction (Closes #5239) --- youtube_dl/extractor/vine.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 0b58fe0fe..c3187cfeb 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -33,14 +33,13 @@ class VineIE(InfoExtractor): r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data')) formats = [{ - 'url': data['videoLowURL'], - 'ext': 'mp4', - 'format_id': 'low', - }, { - 'url': data['videoUrl'], - 'ext': 'mp4', - 'format_id': 'standard', - }] + 'format_id': '%(format)s-%(rate)s' % f, + 'vcodec': f['format'], + 'quality': f['rate'], + 'url': f['videoUrl'], + } for f in data['videoUrls'] if f.get('rate')] + + self._sort_formats(formats) return { 'id': video_id, From 01218f919b3bb5d85449806ea832b29251dc13ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 20 Mar 2015 14:59:38 +0100 Subject: [PATCH 0120/2721] [test/http] Add test for proxy support --- test/test_http.py | 49 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/test/test_http.py b/test/test_http.py index bd4d46fef..f2e305b6f 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -8,7 +8,7 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl import YoutubeDL -from youtube_dl.compat import compat_http_server +from youtube_dl.compat import compat_http_server, compat_urllib_request import ssl import threading @@ -68,5 +68,52 @@ class TestHTTP(unittest.TestCase): r = ydl.extract_info('https://localhost:%d/video.html' % self.port) self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port) + +def _build_proxy_handler(name): + class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + proxy_name = name + + def log_message(self, format, *args): + pass + + def do_GET(self): + self.send_response(200) + self.send_header('Content-Type', 'text/plain; charset=utf-8') + self.end_headers() + self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8')) + return HTTPTestRequestHandler + + +class TestProxy(unittest.TestCase): + def setUp(self): + self.proxy = compat_http_server.HTTPServer( + ('localhost', 0), _build_proxy_handler('normal')) + self.port = self.proxy.socket.getsockname()[1] + self.proxy_thread = threading.Thread(target=self.proxy.serve_forever) + self.proxy_thread.daemon = True + self.proxy_thread.start() + + self.cn_proxy = compat_http_server.HTTPServer( + ('localhost', 0), _build_proxy_handler('cn')) + self.cn_port = self.cn_proxy.socket.getsockname()[1] + self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever) + self.cn_proxy_thread.daemon = True + self.cn_proxy_thread.start() + + def test_proxy(self): + cn_proxy = 'localhost:{0}'.format(self.cn_port) + ydl = YoutubeDL({ + 'proxy': 'localhost:{0}'.format(self.port), + 'cn_verification_proxy': cn_proxy, + }) + url = 'http://foo.com/bar' + response = ydl.urlopen(url).read().decode('utf-8') + self.assertEqual(response, 'normal: {0}'.format(url)) + + req = compat_urllib_request.Request(url) + req.add_header('Ytdl-request-proxy', cn_proxy) + response = ydl.urlopen(req).read().decode('utf-8') + self.assertEqual(response, 'cn: {0}'.format(url)) + if __name__ == '__main__': unittest.main() From f20bf146e217731016b32af598b177952fa1340a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 20 Mar 2015 15:14:25 +0100 Subject: [PATCH 0121/2721] [test/YoutubeDL] split in two classes The name was misleading --- test/test_YoutubeDL.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index db8a47d2d..2597a1993 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -339,6 +339,8 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'G') + +class TestYoutubeDL(unittest.TestCase): def test_subtitles(self): def s_formats(lang, autocaption=False): return [{ From 1c9a1457fc154697b327a87d9afe674425740287 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 Mar 2015 20:53:14 +0600 Subject: [PATCH 0122/2721] [niconico] Add nm video test --- youtube_dl/extractor/niconico.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 4d57ac703..ddec7b338 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -22,7 +22,7 @@ class NiconicoIE(InfoExtractor): IE_NAME = 'niconico' IE_DESC = 'ニコニコ動画' - _TEST = { + _TESTS = [{ 'url': 'http://www.nicovideo.jp/watch/sm22312215', 'md5': 'd1a75c0823e2f629128c43e1212760f9', 'info_dict': { @@ -39,7 +39,24 @@ class NiconicoIE(InfoExtractor): 'username': 'ydl.niconico@gmail.com', 'password': 'youtube-dl', }, - } + }, { + 'url': 'http://www.nicovideo.jp/watch/nm14296458', + 'md5': '8db08e0158457cf852a31519fceea5bc', + 'info_dict': { + 'id': 'nm14296458', + 'ext': 'swf', + 'title': '【鏡音リン】Dance on media【オリジナル】take2!', + 'description': 'md5:', + 'uploader': 'りょうた', + 'uploader_id': '18822557', + 'upload_date': '20110429', + 'duration': 209, + }, + 'params': { + 'username': 'ydl.niconico@gmail.com', + 'password': 'youtube-dl', + }, + }] _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P(?:[a-z]{2})?[0-9]+)' _NETRC_MACHINE = 'niconico' From 1887ecd4d629f44370846d372155512ec29e1f27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 20 Mar 2015 21:45:09 +0600 Subject: [PATCH 0123/2721] [twitch] Fix login --- youtube_dl/extractor/twitch.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index cbdaf9c7a..aad2bf222 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -23,6 +23,7 @@ class TwitchBaseIE(InfoExtractor): _API_BASE = 'https://api.twitch.tv' _USHER_BASE = 'http://usher.twitch.tv' _LOGIN_URL = 'https://secure.twitch.tv/user/login' + _LOGIN_POST_URL = 'https://secure-login.twitch.tv/login' _NETRC_MACHINE = 'twitch' def _handle_error(self, response): @@ -67,14 +68,14 @@ class TwitchBaseIE(InfoExtractor): 'authenticity_token': authenticity_token, 'redirect_on_login': '', 'embed_form': 'false', - 'mp_source_action': '', + 'mp_source_action': 'login-button', 'follow': '', - 'user[login]': username, - 'user[password]': password, + 'login': username, + 'password': password, } request = compat_urllib_request.Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) request.add_header('Referer', self._LOGIN_URL) response = self._download_webpage( request, None, 'Logging in as %s' % username) From 531980d89c37427c663152a525fd795dc0b040e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 20 Mar 2015 17:05:28 +0100 Subject: [PATCH 0124/2721] [test/YoutubeDL] test match_filter --- test/test_YoutubeDL.py | 68 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 2597a1993..652519831 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -14,6 +14,7 @@ from test.helper import FakeYDL, assertRegexpMatches from youtube_dl import YoutubeDL from youtube_dl.extractor import YoutubeIE from youtube_dl.postprocessor.common import PostProcessor +from youtube_dl.utils import match_filter_func TEST_URL = 'http://localhost/sample.mp4' @@ -463,6 +464,73 @@ class TestYoutubeDL(unittest.TestCase): self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) os.unlink(audiofile) + def test_match_filter(self): + class FilterYDL(YDL): + def __init__(self, *args, **kwargs): + super(FilterYDL, self).__init__(*args, **kwargs) + self.params['simulate'] = True + + def process_info(self, info_dict): + super(YDL, self).process_info(info_dict) + + def _match_entry(self, info_dict, incomplete): + res = super(FilterYDL, self)._match_entry(info_dict, incomplete) + if res is None: + self.downloaded_info_dicts.append(info_dict) + return res + + first = { + 'id': '1', + 'url': TEST_URL, + 'title': 'one', + 'extractor': 'TEST', + 'duration': 30, + 'filesize': 10 * 1024, + } + second = { + 'id': '2', + 'url': TEST_URL, + 'title': 'two', + 'extractor': 'TEST', + 'duration': 10, + 'description': 'foo', + 'filesize': 5 * 1024, + } + videos = [first, second] + + def get_videos(filter_=None): + ydl = FilterYDL({'match_filter': filter_}) + for v in videos: + ydl.process_ie_result(v, download=True) + return [v['id'] for v in ydl.downloaded_info_dicts] + + res = get_videos() + self.assertEqual(res, ['1', '2']) + + def f(v): + if v['id'] == '1': + return None + else: + return 'Video id is not 1' + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func('duration < 30') + res = get_videos(f) + self.assertEqual(res, ['2']) + + f = match_filter_func('description = foo') + res = get_videos(f) + self.assertEqual(res, ['2']) + + f = match_filter_func('description =? foo') + res = get_videos(f) + self.assertEqual(res, ['1', '2']) + + f = match_filter_func('filesize > 5KiB') + res = get_videos(f) + self.assertEqual(res, ['1']) + if __name__ == '__main__': unittest.main() From a7d9ded45dac709fbb9140ab3525a10a2a6d3c85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 21 Mar 2015 12:07:23 +0100 Subject: [PATCH 0125/2721] [test] Add tests for aes --- devscripts/generate_aes_testdata.py | 36 ++++++++++++++++++++++ test/test_aes.py | 47 +++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 devscripts/generate_aes_testdata.py create mode 100644 test/test_aes.py diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py new file mode 100644 index 000000000..ff66449eb --- /dev/null +++ b/devscripts/generate_aes_testdata.py @@ -0,0 +1,36 @@ +from __future__ import unicode_literals + +import codecs +import subprocess + +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.utils import intlist_to_bytes +from youtube_dl.aes import aes_encrypt, key_expansion + +secret_msg = b'Secret message goes here' + + +def hex_str(int_list): + return codecs.encode(intlist_to_bytes(int_list), 'hex') + + +def openssl_encode(algo, key, iv): + cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)] + prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + out, _ = prog.communicate(secret_msg) + return out + +iv = key = [0x20, 0x15] + 14 * [0] + +r = openssl_encode('aes-128-cbc', key, iv) +print('aes_cbc_decrypt') +print(repr(r)) + +password = key +new_key = aes_encrypt(password, key_expansion(password)) +r = openssl_encode('aes-128-ctr', new_key, iv) +print('aes_decrypt_text') +print(repr(r)) diff --git a/test/test_aes.py b/test/test_aes.py new file mode 100644 index 000000000..111b902e1 --- /dev/null +++ b/test/test_aes.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text +from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes +import base64 + +# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' + + +class TestAES(unittest.TestCase): + def setUp(self): + self.key = self.iv = [0x20, 0x15] + 14 * [0] + self.secret_msg = b'Secret message goes here' + + def test_encrypt(self): + msg = b'message' + key = list(range(16)) + encrypted = aes_encrypt(bytes_to_intlist(msg), key) + decrypted = intlist_to_bytes(aes_decrypt(encrypted, key)) + self.assertEqual(decrypted, msg) + + def test_cbc_decrypt(self): + data = bytes_to_intlist( + b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd" + ) + decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv)) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + + def test_decrypt_text(self): + password = intlist_to_bytes(self.key).decode('utf-8') + encrypted = base64.b64encode( + intlist_to_bytes(self.iv[:8]) + + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' + ) + decrypted = (aes_decrypt_text(encrypted, password, 16)) + self.assertEqual(decrypted, self.secret_msg) + +if __name__ == '__main__': + unittest.main() From 85698c508651cc047b0ba2181580d99ed057ef0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 21 Mar 2015 12:18:33 +0100 Subject: [PATCH 0126/2721] [crunchyroll] Remove unused class --- youtube_dl/extractor/crunchyroll.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index e64b88fbc..6ded723c9 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -23,7 +23,6 @@ from ..utils import ( ) from ..aes import ( aes_cbc_decrypt, - inc, ) @@ -102,13 +101,6 @@ class CrunchyrollIE(InfoExtractor): key = obfuscate_key(id) - class Counter: - __value = iv - - def next_value(self): - temp = self.__value - self.__value = inc(self.__value) - return temp decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv)) return zlib.decompress(decrypted_data) From 179d6678b1ce5ecdf22e2ae254f451183d836e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 21 Mar 2015 12:34:44 +0100 Subject: [PATCH 0127/2721] Remove the 'stitle' field A warning has been printed for more than 2 years (since 97cd3afc7525394c46398f1526d412d081c02085) --- youtube_dl/YoutubeDL.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5a83bc956..b5ef5e009 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -328,9 +328,6 @@ class YoutubeDL(object): 'Parameter outtmpl is bytes, but should be a unicode string. ' 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.') - if '%(stitle)s' in self.params.get('outtmpl', ''): - self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') - self._setup_opener() if auto_init: @@ -1218,9 +1215,6 @@ class YoutubeDL(object): if len(info_dict['title']) > 200: info_dict['title'] = info_dict['title'][:197] + '...' - # Keep for backwards compatibility - info_dict['stitle'] = info_dict['title'] - if 'format' not in info_dict: info_dict['format'] = info_dict['ext'] From 4e6a2286899ab156ad342e3e9003b0c3b239e0ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 Mar 2015 18:20:49 +0600 Subject: [PATCH 0128/2721] [nrk] Adapt to new URL format --- youtube_dl/extractor/nrk.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index bff36f9d3..d17b7ed49 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -14,22 +14,22 @@ from ..utils import ( class NRKIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P[\dA-F]{16})' + _VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P\d+)' _TESTS = [ { - 'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/', - 'md5': 'a6eac35052f3b242bb6bb7f43aed5886', + 'url': 'http://www.nrk.no/video/PS*150533', + 'md5': 'bccd850baebefe23b56d708a113229c2', 'info_dict': { 'id': '150533', 'ext': 'flv', 'title': 'Dompap og andre fugler i Piip-Show', - 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f' + 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', } }, { - 'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/', - 'md5': '3471f2a51718195164e88f46bf427668', + 'url': 'http://www.nrk.no/video/PS*154915', + 'md5': '0b1493ba1aae7d9579a5ad5531bc395a', 'info_dict': { 'id': '154915', 'ext': 'flv', @@ -40,20 +40,18 @@ class NRKIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - page = self._download_webpage(url, video_id) - - video_id = self._html_search_regex(r'
', page, 'video id') + video_id = self._match_id(url) data = self._download_json( - 'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON') + 'http://v8.psapi.nrk.no/mediaelement/%s' % video_id, + video_id, 'Downloading media JSON') if data['usageRights']['isGeoBlocked']: - raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True) + raise ExtractorError( + 'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', + expected=True) - video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124' + video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81' images = data.get('images') if images: From 393d9fc6d28704ef7b60acb1dfc785493ecbe0eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 Mar 2015 18:21:19 +0600 Subject: [PATCH 0129/2721] [nrk] Extract duration --- youtube_dl/extractor/nrk.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index d17b7ed49..117a78aa1 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -25,6 +25,7 @@ class NRKIE(InfoExtractor): 'ext': 'flv', 'title': 'Dompap og andre fugler i Piip-Show', 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', + 'duration': 263, } }, { @@ -35,6 +36,7 @@ class NRKIE(InfoExtractor): 'ext': 'flv', 'title': 'Slik høres internett ut når du er blind', 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', + 'duration': 20, } }, ] @@ -53,6 +55,8 @@ class NRKIE(InfoExtractor): video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81' + duration = parse_duration(data.get('duration')) + images = data.get('images') if images: thumbnails = images['webImages'] @@ -67,6 +71,7 @@ class NRKIE(InfoExtractor): 'ext': 'flv', 'title': data['title'], 'description': data['description'], + 'duration': duration, 'thumbnail': thumbnail, } From faa1b5c2920f1d50f01b83e96b5b0f136edc74f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 Mar 2015 18:22:08 +0600 Subject: [PATCH 0130/2721] [nrk:playlist] Add extractor (Closes #5245) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nrk.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7eb9b4fbb..e25e4a582 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -346,6 +346,7 @@ from .npo import ( ) from .nrk import ( NRKIE, + NRKPlaylistIE, NRKTVIE, ) from .ntvde import NTVDeIE diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 117a78aa1..c2a8202dd 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -76,6 +76,37 @@ class NRKIE(InfoExtractor): } +class NRKPlaylistIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:[^/]+/)*(?P[^/]+)' + + _TEST = { + 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763', + 'info_dict': { + 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763', + 'title': 'Gjenopplev den historiske solformørkelsen', + 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed', + }, + 'playlist_mincount': 2, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('nrk:%s' % video_id, 'NRK') + for video_id in re.findall( + r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="(\d+)"', webpage) + ] + + playlist_title = self._og_search_title(webpage) + playlist_description = self._og_search_description(webpage) + + return self.playlist_result( + entries, playlist_id, playlist_title, playlist_description) + + class NRKTVIE(InfoExtractor): _VALID_URL = r'(?Phttp://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P\d+))?' From c9450c7ab19bbd24a4ff8643370cb4f235b0b380 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 21 Mar 2015 14:00:37 +0100 Subject: [PATCH 0131/2721] [nrk:playlist] Restrict _VALID_URL It would also match /videos/PS... urls --- youtube_dl/extractor/nrk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index c2a8202dd..1355ecea2 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -77,7 +77,7 @@ class NRKIE(InfoExtractor): class NRKPlaylistIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:[^/]+/)*(?P[^/]+)' + _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)[^/]+/(?P[^/]+)' _TEST = { 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763', From 5379a2d40db3674fead7c4239afdb65ff7b389c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 21 Mar 2015 14:12:43 +0100 Subject: [PATCH 0132/2721] [test/utils] Test xpath_text --- test/test_utils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 3431ad24e..a8ab87685 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -24,6 +24,7 @@ from youtube_dl.utils import ( encodeFilename, escape_rfc3986, escape_url, + ExtractorError, find_xpath_attr, fix_xml_ampersands, InAdvancePagedList, @@ -54,6 +55,7 @@ from youtube_dl.utils import ( urlencode_postdata, version_tuple, xpath_with_ns, + xpath_text, render_table, match_str, ) @@ -250,6 +252,17 @@ class TestUtil(unittest.TestCase): self.assertEqual(find('media:song/media:author').text, 'The Author') self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3') + def test_xpath_text(self): + testxml = ''' +
+

Foo

+
+
''' + doc = xml.etree.ElementTree.fromstring(testxml) + self.assertEqual(xpath_text(doc, 'div/p'), 'Foo') + self.assertTrue(xpath_text(doc, 'div/bar') is None) + self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True) + def test_smuggle_url(self): data = {"ö": "ö", "abc": [3]} url = 'https://foo.bar/baz?x=y#a' From a09141548aa31db7c7d9457b10f5c84e6e32beba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 21 Mar 2015 20:42:48 +0600 Subject: [PATCH 0133/2721] [nrk:playlist] Relax video id regex and improve _VALID_URL --- youtube_dl/extractor/nrk.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 1355ecea2..e91d3a248 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -77,17 +77,25 @@ class NRKIE(InfoExtractor): class NRKPlaylistIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)[^/]+/(?P[^/]+)' + _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P[^/]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763', 'info_dict': { 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763', 'title': 'Gjenopplev den historiske solformørkelsen', 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed', }, - 'playlist_mincount': 2, - } + 'playlist_count': 2, + }, { + 'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449', + 'info_dict': { + 'id': 'rivertonprisen-til-karin-fossum-1.12266449', + 'title': 'Rivertonprisen til Karin Fossum', + 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.', + }, + 'playlist_count': 5, + }] def _real_extract(self, url): playlist_id = self._match_id(url) @@ -97,7 +105,8 @@ class NRKPlaylistIE(InfoExtractor): entries = [ self.url_result('nrk:%s' % video_id, 'NRK') for video_id in re.findall( - r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="(\d+)"', webpage) + r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"', + webpage) ] playlist_title = self._og_search_title(webpage) From 49aeedb8cb4dcf317c970a58c590d42e37904720 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Mar 2015 08:11:10 +0600 Subject: [PATCH 0134/2721] [libsyn] Improve and simplify --- youtube_dl/extractor/libsyn.py | 81 +++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py index 88379f276..6bf741db8 100644 --- a/youtube_dl/extractor/libsyn.py +++ b/youtube_dl/extractor/libsyn.py @@ -1,50 +1,59 @@ -# encoding: utf-8 +# coding: utf-8 +from __future__ import unicode_literals + +import re + from .common import InfoExtractor -from ..utils import ( - unified_strdate, -) +from ..utils import unified_strdate + class LibsynIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//html5-player\.libsyn\.com/embed/episode/id/(?P[0-9]+)(?:/.*)?' - _TESTS = [{ - 'url': "http://html5-player.libsyn.com/embed/episode/id/3377616/", + _VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P[0-9]+)' + + _TEST = { + 'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/', + 'md5': '443360ee1b58007bc3dcf09b41d093bb', 'info_dict': { - 'id': "3377616", - 'ext': "mp3", - 'title': "Episode 12: Bassem Youssef: Egypt's Jon Stewart", - 'description': "

Bassem Youssef joins executive producer Steve Bodow and senior producer Sara Taksler for a conversation about how The Daily Show inspired Bassem to create Al-Bernameg, his massively popular (and now banned) Egyptian news satire program. Sara discusses her soon-to-be-released documentary, Tickling Giants, which chronicles how Bassem and his staff risked their safety every day to tell jokes.

", + 'id': '3377616', + 'ext': 'mp3', + 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart", + 'description': 'md5:601cb790edd05908957dae8aaa866465', + 'upload_date': '20150220', }, - }] + } def _real_extract(self, url): - if url.startswith('//'): - url = 'https:' + url - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + video_id = self._match_id(url) - podcast_title = self._search_regex(r'

(.*?)

', webpage, 'show title') - podcast_episode_title = self._search_regex(r'

(.*?)

', webpage, 'episode title') - podcast_date = unified_strdate(self._search_regex(r'
Released: (.*?)
', webpage, 'release date')) - podcast_description = self._search_regex(r'
(.*?)
', webpage, 'description') + webpage = self._download_webpage(url, video_id) - url0 = self._search_regex(r'var mediaURLLibsyn = "(?Phttps?://.*)";', webpage, 'first media URL') - url1 = self._search_regex(r'var mediaURL = "(?Phttps?://.*)";', webpage, 'second media URL') + formats = [{ + 'url': media_url, + } for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))] - if url0 != url1: - formats = [{ - 'url': url0 - }, { - 'url': url1 - }] - else: - formats = [{ - 'url': url0 - }] + podcast_title = self._search_regex( + r'

([^<]+)

', webpage, 'title') + episode_title = self._search_regex( + r'

([^<]+)

', webpage, 'title', default=None) + + title = '%s - %s' %(podcast_title, episode_title) if podcast_title else episode_title + + description = self._html_search_regex( + r'
(.+?)
', webpage, + 'description', fatal=False) + + thumbnail = self._search_regex( + r']+class="info-show-icon"[^>]+src="([^"]+)"', + webpage, 'thumbnail', fatal=False) + + release_date = unified_strdate(self._search_regex( + r'
Released: ([^<]+)<', webpage, 'release date', fatal=False)) return { - 'id': display_id, - 'title': podcast_episode_title, - 'description': podcast_description, - 'upload_date': podcast_date, + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': release_date, 'formats': formats, } From a1d0aa7b882484685a1a02185d0dafd51c545701 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Mar 2015 08:11:47 +0600 Subject: [PATCH 0135/2721] [libsyn] Fix extractor alphabetic order --- youtube_dl/extractor/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a20492fc3..82b75a144 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -250,8 +250,8 @@ from .letv import ( LetvTvIE, LetvPlaylistIE ) -from .lifenews import LifeNewsIE from .libsyn import LibsynIE +from .lifenews import LifeNewsIE from .liveleak import LiveLeakIE from .livestream import ( LivestreamIE, From cefdf970ccd8017cd67e949004e5e4c770aacdb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Mar 2015 08:18:13 +0600 Subject: [PATCH 0136/2721] [extractor/generic] Support Libsyn embeds --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8716e4503..84e8f14b2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1013,6 +1013,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url')) + # Look for Libsyn player + mobj = re.search( + r']+src=(["\'])(?P(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for Ooyala videos mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage) or From 2051acdeb2ed9a0edf3b6b70682699c37d19d851 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Mar 2015 08:20:27 +0600 Subject: [PATCH 0137/2721] [extractor/generic] Add test for Libsyn embed --- youtube_dl/extractor/generic.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 84e8f14b2..8a49b0b54 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -527,6 +527,17 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Viddler'], }, + # Libsyn embed + { + 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve', + 'info_dict': { + 'id': '3377616', + 'ext': 'mp3', + 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart", + 'description': 'md5:601cb790edd05908957dae8aaa866465', + 'upload_date': '20150220', + }, + }, # jwplayer YouTube { 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/', From cf2e2eb1c0b626f2d5f210ffd14642aceb0358e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Mar 2015 08:23:20 +0600 Subject: [PATCH 0138/2721] [comedycentral] Drop thedailyshow podcast extractor Generic extractor is just fine for Libsyn embeds --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/comedycentral.py | 24 ------------------------ 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 82b75a144..d73826d44 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -84,7 +84,7 @@ from .cnn import ( ) from .collegehumor import CollegeHumorIE from .collegerama import CollegeRamaIE -from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE, TheDailyShowPodcastIE +from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comcarcoff import ComCarCoffIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .condenast import CondeNastIE diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index bd3817b56..648a6f990 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -273,27 +273,3 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): 'title': show_name + ' ' + title, 'description': description, } - -class TheDailyShowPodcastIE(InfoExtractor): - _VALID_URL = r'(?Phttps?:)?//thedailyshow\.cc\.com/podcast/(?P[a-z\-]+)' - _TESTS = [{ - "url": "http://thedailyshow.cc.com/podcast/episodetwelve", - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - player_url = self._search_regex(r']+)?\s*src="((?:https?:)?//html5-player\.libsyn\.com/embed/episode/id/[0-9]+)', webpage, 'player URL') - if player_url.startswith('//'): - mobj = re.match(self._VALID_URL, url) - scheme = mobj.group('scheme') - if not scheme: - scheme = 'https:' - player_url = scheme + player_url - - return { - '_type': 'url_transparent', - 'url': player_url, - } From 1a4123de04d0168ef4a14e6064148eb248d65dc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Mar 2015 08:23:38 +0600 Subject: [PATCH 0139/2721] [comedycentral] Remove unused import --- youtube_dl/extractor/comedycentral.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 648a6f990..e5edcc84b 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor from .mtv import MTVServicesInfoExtractor from ..compat import ( compat_str, From 336d19044c84128cc3bdcb62882372243dfa32c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 22 Mar 2015 11:03:52 +0100 Subject: [PATCH 0140/2721] [lybsyn] pep8: add space around operator --- youtube_dl/extractor/libsyn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py index 6bf741db8..9ab1416f5 100644 --- a/youtube_dl/extractor/libsyn.py +++ b/youtube_dl/extractor/libsyn.py @@ -36,7 +36,7 @@ class LibsynIE(InfoExtractor): episode_title = self._search_regex( r'

([^<]+)

', webpage, 'title', default=None) - title = '%s - %s' %(podcast_title, episode_title) if podcast_title else episode_title + title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title description = self._html_search_regex( r'
(.+?)
', webpage, From f9544f6e8fe4697a4a48364b6d7a7dc7d93cabf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 22 Mar 2015 12:09:58 +0100 Subject: [PATCH 0141/2721] [test/aes] Test aes_decrypt_text with 256 bit --- devscripts/generate_aes_testdata.py | 8 +++++++- test/test_aes.py | 8 ++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py index ff66449eb..2e389fc8e 100644 --- a/devscripts/generate_aes_testdata.py +++ b/devscripts/generate_aes_testdata.py @@ -32,5 +32,11 @@ print(repr(r)) password = key new_key = aes_encrypt(password, key_expansion(password)) r = openssl_encode('aes-128-ctr', new_key, iv) -print('aes_decrypt_text') +print('aes_decrypt_text 16') +print(repr(r)) + +password = key + 16 * [0] +new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16) +r = openssl_encode('aes-256-ctr', new_key, iv) +print('aes_decrypt_text 32') print(repr(r)) diff --git a/test/test_aes.py b/test/test_aes.py index 111b902e1..4dc7de7b5 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -43,5 +43,13 @@ class TestAES(unittest.TestCase): decrypted = (aes_decrypt_text(encrypted, password, 16)) self.assertEqual(decrypted, self.secret_msg) + password = intlist_to_bytes(self.key).decode('utf-8') + encrypted = base64.b64encode( + intlist_to_bytes(self.iv[:8]) + + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' + ) + decrypted = (aes_decrypt_text(encrypted, password, 32)) + self.assertEqual(decrypted, self.secret_msg) + if __name__ == '__main__': unittest.main() From 93f787070ff2de48023c5bfeaebd153b3e29137a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 22 Mar 2015 15:39:35 +0100 Subject: [PATCH 0142/2721] [twitch] Only match digits for the video id Urls can also contain contain a query (for example a timestamp '?t=foo') --- youtube_dl/extractor/twitch.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index aad2bf222..94bd6345d 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -149,7 +149,7 @@ class TwitchItemBaseIE(TwitchBaseIE): class TwitchVideoIE(TwitchItemBaseIE): IE_NAME = 'twitch:video' - _VALID_URL = r'%s/[^/]+/b/(?P[^/]+)' % TwitchBaseIE._VALID_URL_BASE + _VALID_URL = r'%s/[^/]+/b/(?P\d+)' % TwitchBaseIE._VALID_URL_BASE _ITEM_TYPE = 'video' _ITEM_SHORTCUT = 'a' @@ -165,7 +165,7 @@ class TwitchVideoIE(TwitchItemBaseIE): class TwitchChapterIE(TwitchItemBaseIE): IE_NAME = 'twitch:chapter' - _VALID_URL = r'%s/[^/]+/c/(?P[^/]+)' % TwitchBaseIE._VALID_URL_BASE + _VALID_URL = r'%s/[^/]+/c/(?P\d+)' % TwitchBaseIE._VALID_URL_BASE _ITEM_TYPE = 'chapter' _ITEM_SHORTCUT = 'c' @@ -184,7 +184,7 @@ class TwitchChapterIE(TwitchItemBaseIE): class TwitchVodIE(TwitchItemBaseIE): IE_NAME = 'twitch:vod' - _VALID_URL = r'%s/[^/]+/v/(?P[^/]+)' % TwitchBaseIE._VALID_URL_BASE + _VALID_URL = r'%s/[^/]+/v/(?P\d+)' % TwitchBaseIE._VALID_URL_BASE _ITEM_TYPE = 'vod' _ITEM_SHORTCUT = 'v' From 32d687f55e103963a2cb8d8f3f88bb31b9cb8fb6 Mon Sep 17 00:00:00 2001 From: zx8 Date: Sun, 22 Mar 2015 18:03:40 +0000 Subject: [PATCH 0143/2721] [safari] Add safaribooksonline extractor --- youtube_dl/extractor/__init__.py | 4 + youtube_dl/extractor/safari.py | 144 +++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 youtube_dl/extractor/safari.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d73826d44..3a0c42ded 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -420,6 +420,10 @@ from .rutube import ( ) from .rutv import RUTVIE from .sandia import SandiaIE +from .safari import ( + SafariIE, + SafariCourseIE, +) from .sapo import SapoIE from .savefrom import SaveFromIE from .sbs import SBSIE diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py new file mode 100644 index 000000000..3e494b960 --- /dev/null +++ b/youtube_dl/extractor/safari.py @@ -0,0 +1,144 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from .brightcove import BrightcoveIE + +from ..compat import ( + compat_urllib_parse, + compat_urllib_request, +) +from ..utils import ( + ExtractorError, + smuggle_url, + std_headers, +) + + +class SafariBaseIE(InfoExtractor): + _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' + _SUCCESSFUL_LOGIN_REGEX = r']+>Sign Out' + _ACCOUNT_CREDENTIALS_HINT = ('Use --username and --password options to ' + 'supply credentials for safaribooksonline.com ') + _NETRC_MACHINE = 'safaribooksonline' + + LOGGED_IN = False + + def _real_initialize(self): + # We only need to log in once for courses or individual videos + if not SafariBaseIE.LOGGED_IN: + self._login() + SafariBaseIE.LOGGED_IN = True + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + raise ExtractorError( + self._ACCOUNT_CREDENTIALS_HINT, + expected=True) + + headers = std_headers + if 'Referer' not in headers: + headers['Referer'] = self._LOGIN_URL + + login_page = self._download_webpage( + self._LOGIN_URL, None, + 'Downloading login form') + + csrf = self._html_search_regex( + r"", + login_page, 'csrf token') + + login_form = { + 'csrfmiddlewaretoken': csrf, + 'email': username, + 'password1': password, + 'login': 'Sign In', + 'next': '', + } + + request = compat_urllib_request.Request( + self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers) + login_page = self._download_webpage( + request, None, 'Logging in as %s' % username) + + if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: + raise ExtractorError('Login failed; make sure your credentials are correct and ' + 'try again.', expected=True) + + self.to_screen('Login successful') + + +class SafariIE(SafariBaseIE): + IE_NAME = 'safari' + IE_DESC = 'safaribooksonline.com online video' + _VALID_URL = (r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/' + '(?P\d+)/(?Ppart\d+)\.html') + _TEST = { + 'url': ('https://www.safaribooksonline.com/library/view/' + 'hadoop-fundamentals-livelessons/9780133392838/part00.html'), + 'md5': '5b0c4cc1b3c1ba15dda7344085aa5592', + 'info_dict': { + 'id': '9780133392838', + 'ext': 'mp4', + 'title': 'Introduction', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + part = mobj.group('part') + + webpage = self._download_webpage(url, part) + bc_url = BrightcoveIE._extract_brightcove_url(webpage) + if not bc_url: + raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True) + + return { + '_type': 'url', + 'url': smuggle_url(bc_url, {'Referer': url}), + 'ie_key': 'Brightcove' + } + + +class SafariCourseIE(SafariBaseIE): + IE_NAME = 'safari:course' + IE_DESC = 'safaribooksonline.com online courses' + + _VALID_URL = (r'https?://(?:www\.)?safaribooksonline\.com/library/view/' + '(?P[^/]+)/(?P\d+)/?$') + + _API_BASE = 'https://www.safaribooksonline.com/api/v1/book' + _API_FORMAT = 'json' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + course_path = mobj.group('course_path') + course_id = mobj.group('id') + + webpage = self._download_webpage( + '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), + course_path, 'Downloading course JSON') + + course_json = json.loads(webpage) + + if 'chapters' not in course_json: + raise ExtractorError('No chapters found for course %s' % course_id, expected=True) + + num_parts = len(course_json['chapters']) + parts = ['%02d' % part for part in range(num_parts)] + + entries = [ + self.url_result( + 'https://www.safaribooksonline.com/library/view/%s/%s/part%s.html' % (course_path, + course_id, + part_id), + 'Safari') + for part_id in parts] + + course_title = course_json['title'] + + return self.playlist_result(entries, course_id, course_title) From 575dad3c9842f333c4af27563a26bddaf0015fa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20Le=20N=C3=A9grate?= Date: Sun, 22 Mar 2015 20:25:44 +0100 Subject: [PATCH 0144/2721] [pornovoisines] Add extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/pornovoisines.py | 101 ++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 youtube_dl/extractor/pornovoisines.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d73826d44..17d075ec8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -382,6 +382,7 @@ from .pornhub import ( PornHubPlaylistIE, ) from .pornotube import PornotubeIE +from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE from .primesharetv import PrimeShareTVIE from .promptfile import PromptFileIE diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py new file mode 100644 index 000000000..efbb6a818 --- /dev/null +++ b/youtube_dl/extractor/pornovoisines.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import datetime +import random + +from ..compat import compat_urllib_parse +from .common import InfoExtractor + +class PornoVoisinesIE(InfoExtractor): + _VALID_URL = r'^((?:http://)?(?:www\.)?pornovoisines.com)/showvideo/(\d+)/([^/]+)' + + VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ + '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4' + + SERVER_NUMBERS = (1, 2) + + _TEST = { + 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/', + 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1', + 'info_dict': { + 'id': '1285', + 'display_id': 'recherche-appartement', + 'ext': 'mp4', + 'title': "Recherche appartement", + 'upload_date': '20140925', + 'view_count': int, + 'duration': 120, + 'categories': ["Débutante", "Scénario", "Sodomie"], + 'description': 're:^Pour la .+ original...$', + 'thumbnail': 're:^http://', + 'uploader': "JMTV", + 'average_rating': float, + 'comment_count': int, + 'age_limit': 18, + } + } + + @classmethod + def build_video_url(cls, id): + server_nr = random.choice(cls.SERVER_NUMBERS) + return cls.VIDEO_URL_TEMPLATE % (server_nr, id) + + @staticmethod + def parse_upload_date(str): + return datetime.datetime.strptime(str, "%d-%m-%Y").strftime("%Y%m%d") + + @staticmethod + def parse_categories(str): + return map(lambda s: s.strip(), str.split(',')) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + url_prefix = mobj.group(1) + id = mobj.group(2) + display_id = mobj.group(3) + + webpage = self._download_webpage(url, id) + + title = self._html_search_regex(r'

(.+?)

', webpage, 'title', + flags=re.DOTALL) + url = self.build_video_url(id) + upload_date = self.parse_upload_date( + self._search_regex(r'Publié le (\d\d-\d\d-\d{4})', webpage, + 'upload date')) + view_count = int(self._search_regex(r'(\d+) vues', webpage, 'view count')) + duration = int(self._search_regex('Durée (\d+)', webpage, 'duration')) + categories = self.parse_categories(self._html_search_regex( + r'
  • (.+?)
  • ', webpage, "categories", + flags=re.DOTALL)) + description = self._html_search_regex( + r'
    (.+?)
    ', webpage, "description", + flags=re.DOTALL) + thumbnail = url_prefix + self._html_search_regex(re.compile( + '
    .*?(.+?)', webpage, + "uploader", flags=re.DOTALL)) + average_rating = float(self._search_regex(r'Note : (\d+,\d+)', + webpage, "average rating").replace(',', '.')) + comment_count = int(self._search_regex(r'\((\d+)\)', webpage, + "comment count")) + + return { + 'id': id, + 'display_id': display_id, + 'url': url, + 'title': title, + 'upload_date': upload_date, + 'view_count': view_count, + 'duration': duration, + 'categories': categories, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'average_rating': average_rating, + 'comment_count': comment_count, + 'age_limit': 18, + } From c41a2ec4af9fa76b04b6d9f50d9a895d124ea14c Mon Sep 17 00:00:00 2001 From: tiktok Date: Mon, 23 Mar 2015 01:42:17 +0100 Subject: [PATCH 0145/2721] [MiomioTv] Add new extractor --- docs/supportedsites.md | 1 + youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/miomio_tv.py | 70 +++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 youtube_dl/extractor/miomio_tv.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 062cb3d62..53d280677 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -244,6 +244,7 @@ - **Mgoon** - **Minhateca** - **MinistryGrid** + - **Miomio.tv** - **mitele.es** - **mixcloud** - **MLB** diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ffcc7d9ab..370154773 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -265,6 +265,7 @@ from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE +from .miomio_tv import MiomioTvIE from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE from .mit import TechTVMITIE, MITIE, OCWMITIE diff --git a/youtube_dl/extractor/miomio_tv.py b/youtube_dl/extractor/miomio_tv.py new file mode 100644 index 000000000..355774f54 --- /dev/null +++ b/youtube_dl/extractor/miomio_tv.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class MiomioTvIE(InfoExtractor): + IE_NAME = 'miomio.tv' + _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P[0-9]+)' + _TEST = { + 'url': 'http://www.miomio.tv/watch/cc179734/', + 'md5': '48de02137d0739c15b440a224ad364b9', + 'info_dict': { + 'id': '179734', + 'title': u'\u624b\u7ed8\u52a8\u6f2b\u9b3c\u6ce3\u4f46\u4e01\u5168\u7a0b\u753b\u6cd5', + 'ext': 'flv' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r' Date: Mon, 23 Mar 2015 21:23:57 +0600 Subject: [PATCH 0146/2721] [mlb] Improve _VALID_URL (Closes #5260) --- youtube_dl/extractor/mlb.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 1a241aca7..e369551c2 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -10,7 +10,7 @@ from ..utils import ( class MLBIE(InfoExtractor): - _VALID_URL = r'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?Pn?\d+)' + _VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?Pn?\d+)' _TESTS = [ { 'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea', @@ -80,6 +80,10 @@ class MLBIE(InfoExtractor): 'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553', 'only_matching': True, }, + { + 'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728', + 'only_matching': True, + } ] def _real_extract(self, url): From b0872c19ea6fb5dcc20d695d6faead4af42da364 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 23 Mar 2015 22:15:01 +0600 Subject: [PATCH 0147/2721] [npo] Skip broken URL links (Closes #5266) --- youtube_dl/extractor/npo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 557dffa46..5d8448571 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -231,7 +231,10 @@ class NPOLiveIE(NPOBaseIE): stream_url = self._download_json( stream_info['stream'], display_id, 'Downloading %s URL' % stream_type, - transform_source=strip_jsonp) + 'Unable to download %s URL' % stream_type, + transform_source=strip_jsonp, fatal=False) + if not stream_url: + continue if stream_type == 'hds': f4m_formats = self._extract_f4m_formats(stream_url, display_id) # f4m downloader downloads only piece of live stream From 5d1f0e607b3dfe7b1adea09a1188011e57acf0fb Mon Sep 17 00:00:00 2001 From: tiktok Date: Mon, 23 Mar 2015 23:16:50 +0100 Subject: [PATCH 0148/2721] [MiomioTv] updated based on feedback to merge request: 1) added comment to explain extra xml link download 2) changed {} entries to {0}, {1} etc 3) removed redundant language header (the others are required) 4) checked out the old version of the supported sites md (the change was not required) --- docs/supportedsites.md | 1 - youtube_dl/extractor/miomio_tv.py | 12 ++++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 53d280677..062cb3d62 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -244,7 +244,6 @@ - **Mgoon** - **Minhateca** - **MinistryGrid** - - **Miomio.tv** - **mitele.es** - **mixcloud** - **MLB** diff --git a/youtube_dl/extractor/miomio_tv.py b/youtube_dl/extractor/miomio_tv.py index 355774f54..ae20a32fa 100644 --- a/youtube_dl/extractor/miomio_tv.py +++ b/youtube_dl/extractor/miomio_tv.py @@ -23,10 +23,15 @@ class MiomioTvIE(InfoExtractor): title = self._html_search_regex(r' Date: Tue, 24 Mar 2015 16:39:46 +0100 Subject: [PATCH 0149/2721] [options] Handle special characters in argv (Fixes #5157) --- test/test_execution.py | 9 +++++++++ youtube_dl/options.py | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/test/test_execution.py b/test/test_execution.py index 60df187de..f31e51558 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -1,4 +1,6 @@ #!/usr/bin/env python +# coding: utf-8 + from __future__ import unicode_literals import unittest @@ -27,5 +29,12 @@ class TestExecution(unittest.TestCase): def test_main_exec(self): subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) + def test_cmdline_umlauts(self): + p = subprocess.Popen( + [sys.executable, 'youtube_dl/__main__.py', 'ä', '--version'], + cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) + _, stderr = p.communicate() + self.assertFalse(stderr) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 4e6e47d6f..35c7e5fb3 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -794,6 +794,11 @@ def parseOpts(overrideArguments=None): write_string('[debug] Override config: ' + repr(overrideArguments) + '\n') else: command_line_conf = sys.argv[1:] + # Workaround for Python 2.x, where argv is a byte list + if sys.version_info < (3,): + command_line_conf = [ + a.decode('utf-8', 'replace') for a in command_line_conf] + if '--ignore-config' in command_line_conf: system_conf = [] user_conf = [] From 48c971e07336f1e5d33a760e454c52fac83392d5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 24 Mar 2015 16:39:53 +0100 Subject: [PATCH 0150/2721] release 2015.03.24 --- docs/supportedsites.md | 2 ++ youtube_dl/version.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 72b365305..baf7b3880 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -231,6 +231,7 @@ - **Letv** - **LetvPlaylist** - **LetvTv** + - **Libsyn** - **lifenews**: LIFE | NEWS - **LiveLeak** - **livestream** @@ -310,6 +311,7 @@ - **npo.nl:radio** - **npo.nl:radio:fragment** - **NRK** + - **NRKPlaylist** - **NRKTV** - **ntv.ru** - **Nuvid** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 51b4260aa..039ceadf2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.03.18' +__version__ = '2015.03.24' From 17941321ab0b3f9548d1f65e3f9d69e8cd01c0a3 Mon Sep 17 00:00:00 2001 From: testbonn Date: Wed, 25 Mar 2015 11:02:55 +0100 Subject: [PATCH 0151/2721] Clean up of --help output For consistency and readability --- youtube_dl/options.py | 196 +++++++++++++++++++++--------------------- 1 file changed, 98 insertions(+), 98 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 35c7e5fb3..68193a271 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -120,19 +120,19 @@ def parseOpts(overrideArguments=None): general.add_option( '-h', '--help', action='help', - help='print this help text and exit') + help='Print this help text and exit') general.add_option( '-v', '--version', action='version', - help='print program version and exit') + help='Print program version and exit') general.add_option( '-U', '--update', action='store_true', dest='update_self', - help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') + help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') general.add_option( '-i', '--ignore-errors', action='store_true', dest='ignoreerrors', default=False, - help='continue on download errors, for example to skip unavailable videos in a playlist') + help='Continue on download errors, for example to skip unavailable videos in a playlist') general.add_option( '--abort-on-error', action='store_false', dest='ignoreerrors', @@ -140,7 +140,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--dump-user-agent', action='store_true', dest='dump_user_agent', default=False, - help='display the current browser identification') + help='Display the current browser identification') general.add_option( '--list-extractors', action='store_true', dest='list_extractors', default=False, @@ -152,7 +152,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--default-search', dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.') + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.') general.add_option( '--ignore-config', action='store_true', @@ -169,7 +169,7 @@ def parseOpts(overrideArguments=None): '--no-color', '--no-colors', action='store_true', dest='no_color', default=False, - help='Do not emit color codes in output.') + help='Do not emit color codes in output') network = optparse.OptionGroup(parser, 'Network Options') network.add_option( @@ -206,23 +206,23 @@ def parseOpts(overrideArguments=None): selection.add_option( '--playlist-start', dest='playliststart', metavar='NUMBER', default=1, type=int, - help='playlist video to start at (default is %default)') + help='Playlist video to start at (default is %default)') selection.add_option( '--playlist-end', dest='playlistend', metavar='NUMBER', default=None, type=int, - help='playlist video to end at (default is last)') + help='Playlist video to end at (default is last)') selection.add_option( '--playlist-items', dest='playlist_items', metavar='ITEM_SPEC', default=None, - help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.') + help='Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.') selection.add_option( '--match-title', dest='matchtitle', metavar='REGEX', - help='download only matching titles (regex or caseless sub-string)') + help='Download only matching titles (regex or caseless sub-string)') selection.add_option( '--reject-title', dest='rejecttitle', metavar='REGEX', - help='skip download for matching titles (regex or caseless sub-string)') + help='Skip download for matching titles (regex or caseless sub-string)') selection.add_option( '--max-downloads', dest='max_downloads', metavar='NUMBER', type=int, default=None, @@ -238,19 +238,19 @@ def parseOpts(overrideArguments=None): selection.add_option( '--date', metavar='DATE', dest='date', default=None, - help='download only videos uploaded in this date') + help='Download only videos uploaded in this date') selection.add_option( '--datebefore', metavar='DATE', dest='datebefore', default=None, - help='download only videos uploaded on or before this date (i.e. inclusive)') + help='Download only videos uploaded on or before this date (i.e. inclusive)') selection.add_option( '--dateafter', metavar='DATE', dest='dateafter', default=None, - help='download only videos uploaded on or after this date (i.e. inclusive)') + help='Download only videos uploaded on or after this date (i.e. inclusive)') selection.add_option( '--min-views', metavar='COUNT', dest='min_views', default=None, type=int, - help='Do not download any videos with less than COUNT views',) + help='Do not download any videos with less than COUNT views') selection.add_option( '--max-views', metavar='COUNT', dest='max_views', default=None, type=int, @@ -259,7 +259,7 @@ def parseOpts(overrideArguments=None): '--match-filter', metavar='FILTER', dest='match_filter', default=None, help=( - '(Experimental) Generic video filter. ' + 'Generic video filter (experimental). ' 'Specify any key (see help for -o for a list of available keys) to' ' match if the key is present, ' '!key to check if the key is not present,' @@ -277,15 +277,15 @@ def parseOpts(overrideArguments=None): selection.add_option( '--no-playlist', action='store_true', dest='noplaylist', default=False, - help='If the URL refers to a video and a playlist, download only the video.') + help='Download only the video, if the URL refers to a video and a playlist.') selection.add_option( '--yes-playlist', action='store_false', dest='noplaylist', default=False, - help='If the URL refers to a video and a playlist, download the playlist.') + help='Download the playlist, if the URL refers to a video and a playlist.') selection.add_option( '--age-limit', metavar='YEARS', dest='age_limit', default=None, type=int, - help='download only videos suitable for the given age') + help='Download only videos suitable for the given age') selection.add_option( '--download-archive', metavar='FILE', dest='download_archive', @@ -299,30 +299,30 @@ def parseOpts(overrideArguments=None): authentication.add_option( '-u', '--username', dest='username', metavar='USERNAME', - help='login with this account ID') + help='Login with this account ID') authentication.add_option( '-p', '--password', dest='password', metavar='PASSWORD', - help='account password. If this option is left out, youtube-dl will ask interactively.') + help='Account password. If this option is left out, youtube-dl will ask interactively.') authentication.add_option( '-2', '--twofactor', dest='twofactor', metavar='TWOFACTOR', - help='two-factor auth code') + help='Two-factor auth code') authentication.add_option( '-n', '--netrc', action='store_true', dest='usenetrc', default=False, - help='use .netrc authentication data') + help='Use .netrc authentication data') authentication.add_option( '--video-password', dest='videopassword', metavar='PASSWORD', - help='video password (vimeo, smotri)') + help='Video password (vimeo, smotri)') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( '-f', '--format', action='store', dest='format', metavar='FORMAT', default=None, help=( - 'video format code, specify the order of preference using' + 'Video format code, specify the order of preference using' ' slashes, as in -f 22/17/18 . ' ' Instead of format codes, you can select by extension for the ' 'extensions aac, m4a, mp3, mp4, ogg, wav, webm. ' @@ -350,19 +350,19 @@ def parseOpts(overrideArguments=None): video_format.add_option( '--all-formats', action='store_const', dest='format', const='all', - help='download all available video formats') + help='Download all available video formats') video_format.add_option( '--prefer-free-formats', action='store_true', dest='prefer_free_formats', default=False, - help='prefer free video formats unless a specific one is requested') + help='Prefer free video formats unless a specific one is requested') video_format.add_option( '--max-quality', action='store', dest='format_limit', metavar='FORMAT', - help='highest quality format to download') + help='Specify highest quality format to download') video_format.add_option( '-F', '--list-formats', action='store_true', dest='listformats', - help='list all available formats') + help='List all available formats') video_format.add_option( '--youtube-include-dash-manifest', action='store_true', dest='youtube_include_dash_manifest', default=True, @@ -382,46 +382,46 @@ def parseOpts(overrideArguments=None): subtitles.add_option( '--write-sub', '--write-srt', action='store_true', dest='writesubtitles', default=False, - help='write subtitle file') + help='Write subtitle file') subtitles.add_option( '--write-auto-sub', '--write-automatic-sub', action='store_true', dest='writeautomaticsub', default=False, - help='write automatic subtitle file (youtube only)') + help='Write automatic subtitle file (YouTube only)') subtitles.add_option( '--all-subs', action='store_true', dest='allsubtitles', default=False, - help='downloads all the available subtitles of the video') + help='Download all the available subtitles of the video') subtitles.add_option( '--list-subs', action='store_true', dest='listsubtitles', default=False, - help='lists all available subtitles for the video') + help='List all available subtitles for the video') subtitles.add_option( '--sub-format', action='store', dest='subtitlesformat', metavar='FORMAT', default='best', - help='subtitle format, accepts formats preference, for example: "ass/srt/best"') + help='Specify subtitle format preference, for example: "srt" or "ass/srt/best"') subtitles.add_option( '--sub-lang', '--sub-langs', '--srt-lang', action='callback', dest='subtitleslangs', metavar='LANGS', type='str', default=[], callback=_comma_separated_values_options_callback, - help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') + help='Languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') downloader = optparse.OptionGroup(parser, 'Download Options') downloader.add_option( '-r', '--rate-limit', dest='ratelimit', metavar='LIMIT', - help='maximum download rate in bytes per second (e.g. 50K or 4.2M)') + help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)') downloader.add_option( '-R', '--retries', dest='retries', metavar='RETRIES', default=10, - help='number of retries (default is %default), or "infinite".') + help='Number of retries (default is %default), or "infinite".') downloader.add_option( '--buffer-size', dest='buffersize', metavar='SIZE', default='1024', - help='size of download buffer (e.g. 1024 or 16K) (default is %default)') + help='Size of download buffer (e.g. 1024 or 16K) (default is %default)') downloader.add_option( '--no-resize-buffer', action='store_true', dest='noresizebuffer', default=False, - help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.') + help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.') downloader.add_option( '--test', action='store_true', dest='test', default=False, @@ -433,11 +433,11 @@ def parseOpts(overrideArguments=None): downloader.add_option( '--xattr-set-filesize', dest='xattr_set_filesize', action='store_true', - help='(experimental) set file xattribute ytdl.filesize with expected filesize') + help='Set file xattribute ytdl.filesize with expected filesize (experimental)') downloader.add_option( '--hls-prefer-native', dest='hls_prefer_native', action='store_true', - help='(experimental) Use the native HLS downloader instead of ffmpeg.') + help='Use the native HLS downloader instead of ffmpeg (experimental)') downloader.add_option( '--external-downloader', dest='external_downloader', metavar='COMMAND', @@ -446,7 +446,7 @@ def parseOpts(overrideArguments=None): downloader.add_option( '--external-downloader-args', dest='external_downloader_args', metavar='ARGS', - help='Give these arguments to the external downloader.') + help='Give these arguments to the external downloader') workarounds = optparse.OptionGroup(parser, 'Workarounds') workarounds.add_option( @@ -456,7 +456,7 @@ def parseOpts(overrideArguments=None): workarounds.add_option( '--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, - help='Suppress HTTPS certificate validation.') + help='Suppress HTTPS certificate validation') workarounds.add_option( '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', @@ -464,16 +464,16 @@ def parseOpts(overrideArguments=None): workarounds.add_option( '--user-agent', metavar='UA', dest='user_agent', - help='specify a custom user agent') + help='Specify a custom user agent') workarounds.add_option( '--referer', metavar='URL', dest='referer', default=None, - help='specify a custom referer, use if the video access is restricted to one domain', + help='Specify a custom referer, use if the video access is restricted to one domain', ) workarounds.add_option( '--add-header', metavar='FIELD:VALUE', dest='headers', action='append', - help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', + help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', ) workarounds.add_option( '--bidi-workaround', @@ -488,7 +488,7 @@ def parseOpts(overrideArguments=None): verbosity.add_option( '-q', '--quiet', action='store_true', dest='quiet', default=False, - help='activates quiet mode') + help='Activate quiet mode') verbosity.add_option( '--no-warnings', dest='no_warnings', action='store_true', default=False, @@ -496,51 +496,51 @@ def parseOpts(overrideArguments=None): verbosity.add_option( '-s', '--simulate', action='store_true', dest='simulate', default=False, - help='do not download the video and do not write anything to disk',) + help='Do not download the video and do not write anything to disk') verbosity.add_option( '--skip-download', action='store_true', dest='skip_download', default=False, - help='do not download the video',) + help='Do not download the video') verbosity.add_option( '-g', '--get-url', action='store_true', dest='geturl', default=False, - help='simulate, quiet but print URL') + help='Simulate, quiet but print URL') verbosity.add_option( '-e', '--get-title', action='store_true', dest='gettitle', default=False, - help='simulate, quiet but print title') + help='Simulate, quiet but print title') verbosity.add_option( '--get-id', action='store_true', dest='getid', default=False, - help='simulate, quiet but print id') + help='Simulate, quiet but print id') verbosity.add_option( '--get-thumbnail', action='store_true', dest='getthumbnail', default=False, - help='simulate, quiet but print thumbnail URL') + help='Simulate, quiet but print thumbnail URL') verbosity.add_option( '--get-description', action='store_true', dest='getdescription', default=False, - help='simulate, quiet but print video description') + help='Simulate, quiet but print video description') verbosity.add_option( '--get-duration', action='store_true', dest='getduration', default=False, - help='simulate, quiet but print video length') + help='Simulate, quiet but print video length') verbosity.add_option( '--get-filename', action='store_true', dest='getfilename', default=False, - help='simulate, quiet but print output filename') + help='Simulate, quiet but print output filename') verbosity.add_option( '--get-format', action='store_true', dest='getformat', default=False, - help='simulate, quiet but print output format') + help='Simulate, quiet but print output format') verbosity.add_option( '-j', '--dump-json', action='store_true', dest='dumpjson', default=False, - help='simulate, quiet but print JSON information. See --output for a description of available keys.') + help='Simulate, quiet but print JSON information. See --output for a description of available keys.') verbosity.add_option( '-J', '--dump-single-json', action='store_true', dest='dump_single_json', default=False, - help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.') + help='Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.') verbosity.add_option( '--print-json', action='store_true', dest='print_json', default=False, @@ -549,23 +549,23 @@ def parseOpts(overrideArguments=None): verbosity.add_option( '--newline', action='store_true', dest='progress_with_newline', default=False, - help='output progress bar as new lines') + help='Output progress bar as new lines') verbosity.add_option( '--no-progress', action='store_true', dest='noprogress', default=False, - help='do not print progress bar') + help='Do not print progress bar') verbosity.add_option( '--console-title', action='store_true', dest='consoletitle', default=False, - help='display progress in console titlebar') + help='Display progress in console titlebar') verbosity.add_option( '-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print various debugging information') + help='Print various debugging information') verbosity.add_option( '--dump-pages', '--dump-intermediate-pages', action='store_true', dest='dump_intermediate_pages', default=False, - help='print downloaded pages to debug problems (very verbose)') + help='Print downloaded pages to debug problems (very verbose)') verbosity.add_option( '--write-pages', action='store_true', dest='write_pages', default=False, @@ -581,31 +581,31 @@ def parseOpts(overrideArguments=None): verbosity.add_option( '-C', '--call-home', dest='call_home', action='store_true', default=False, - help='Contact the youtube-dl server for debugging.') + help='Contact the youtube-dl server for debugging') verbosity.add_option( '--no-call-home', dest='call_home', action='store_false', default=False, - help='Do NOT contact the youtube-dl server for debugging.') + help='Do NOT contact the youtube-dl server for debugging') filesystem = optparse.OptionGroup(parser, 'Filesystem Options') filesystem.add_option( '-a', '--batch-file', dest='batchfile', metavar='FILE', - help='file containing URLs to download (\'-\' for stdin)') + help='File containing URLs to download (\'-\' for stdin)') filesystem.add_option( '--id', default=False, - action='store_true', dest='useid', help='use only video ID in file name') + action='store_true', dest='useid', help='Use only video ID in file name') filesystem.add_option( '-o', '--output', dest='outtmpl', metavar='TEMPLATE', - help=('output filename template. Use %(title)s to get the title, ' + help=('Output filename template. Use %(title)s to get the title, ' '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' '%(autonumber)s to get an automatically incremented number, ' '%(ext)s for the filename extension, ' '%(format)s for the format description (like "22 - 1280x720" or "HD"), ' - '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), ' + '%(format_id)s for the unique id of the format (like YouTube\'s itags: "137"), ' '%(upload_date)s for the upload date (YYYYMMDD), ' - '%(extractor)s for the provider (youtube, metacafe, etc), ' + '%(extractor)s for the provider (YouTube, metacafe, etc), ' '%(id)s for the video id, ' '%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, ' '%(playlist_index)s for the position in the playlist. ' @@ -617,7 +617,7 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--autonumber-size', dest='autonumber_size', metavar='NUMBER', - help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') + help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') filesystem.add_option( '--restrict-filenames', action='store_true', dest='restrictfilenames', default=False, @@ -625,55 +625,55 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '-A', '--auto-number', action='store_true', dest='autonumber', default=False, - help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000') + help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number of downloaded files starting from 00000') filesystem.add_option( '-t', '--title', action='store_true', dest='usetitle', default=False, - help='[deprecated] use title in file name (default)') + help='[deprecated] Use title in file name (default)') filesystem.add_option( '-l', '--literal', default=False, action='store_true', dest='usetitle', - help='[deprecated] alias of --title') + help='[deprecated] Alias of --title') filesystem.add_option( '-w', '--no-overwrites', action='store_true', dest='nooverwrites', default=False, - help='do not overwrite files') + help='Do not overwrite files') filesystem.add_option( '-c', '--continue', action='store_true', dest='continue_dl', default=True, - help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.') + help='Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.') filesystem.add_option( '--no-continue', action='store_false', dest='continue_dl', - help='do not resume partially downloaded files (restart from beginning)') + help='Do not resume partially downloaded files (restart from beginning)') filesystem.add_option( '--no-part', action='store_true', dest='nopart', default=False, - help='do not use .part files - write directly into output file') + help='Do not use .part files - write directly into output file') filesystem.add_option( '--no-mtime', action='store_false', dest='updatetime', default=True, - help='do not use the Last-modified header to set the file modification time') + help='Do not use the Last-modified header to set the file modification time') filesystem.add_option( '--write-description', action='store_true', dest='writedescription', default=False, - help='write video description to a .description file') + help='Write video description to a .description file') filesystem.add_option( '--write-info-json', action='store_true', dest='writeinfojson', default=False, - help='write video metadata to a .info.json file') + help='Write video metadata to a .info.json file') filesystem.add_option( '--write-annotations', action='store_true', dest='writeannotations', default=False, - help='write video annotations to a .annotation file') + help='Write video annotations to a .annotation file') filesystem.add_option( '--load-info', dest='load_info_filename', metavar='FILE', - help='json file containing the video information (created with the "--write-json" option)') + help='Specify JSON file containing the video information (created with the "--write-json" option)') filesystem.add_option( '--cookies', dest='cookiefile', metavar='FILE', - help='file to read cookies from and dump cookie jar in') + help='File to read cookies from and dump cookie jar in') filesystem.add_option( '--cache-dir', dest='cachedir', default=None, metavar='DIR', help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') @@ -689,11 +689,11 @@ def parseOpts(overrideArguments=None): thumbnail.add_option( '--write-thumbnail', action='store_true', dest='writethumbnail', default=False, - help='write thumbnail image to disk') + help='Write thumbnail image to disk') thumbnail.add_option( '--write-all-thumbnails', action='store_true', dest='write_all_thumbnails', default=False, - help='write all thumbnail image formats to disk') + help='Write all thumbnail image formats to disk') thumbnail.add_option( '--list-thumbnails', action='store_true', dest='list_thumbnails', default=False, @@ -703,14 +703,14 @@ def parseOpts(overrideArguments=None): postproc.add_option( '-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, - help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') + help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') postproc.add_option( '--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default') + help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default') postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', - help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)') + help='Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)') postproc.add_option( '--recode-video', metavar='FORMAT', dest='recodevideo', default=None, @@ -718,27 +718,27 @@ def parseOpts(overrideArguments=None): postproc.add_option( '-k', '--keep-video', action='store_true', dest='keepvideo', default=False, - help='keeps the video file on disk after the post-processing; the video is erased by default') + help='Keep the video file on disk after the post-processing; the video is erased by default') postproc.add_option( '--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, - help='do not overwrite post-processed files; the post-processed files are overwritten by default') + help='Do not overwrite post-processed files; the post-processed files are overwritten by default') postproc.add_option( '--embed-subs', action='store_true', dest='embedsubtitles', default=False, - help='embed subtitles in the video (only for mp4 videos)') + help='Embed subtitles in the video (only for mp4 videos)') postproc.add_option( '--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False, - help='embed thumbnail in the audio as cover art') + help='Embed thumbnail in the audio as cover art') postproc.add_option( '--add-metadata', action='store_true', dest='addmetadata', default=False, - help='write metadata to the video file') + help='Write metadata to the video file') postproc.add_option( '--metadata-from-title', metavar='FORMAT', dest='metafromtitle', - help='parse additional metadata like song title / artist from the video title. ' + help='Parse additional metadata like song title / artist from the video title. ' 'The format syntax is the same as --output, ' 'the parsed parameters replace existing values. ' 'Additional templates: %(album), %(artist). ' @@ -747,7 +747,7 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--xattrs', action='store_true', dest='xattrs', default=False, - help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)') + help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)') postproc.add_option( '--fixup', metavar='POLICY', dest='fixup', default='detect_or_warn', From 2676caf344c1e22f5b01588716471edeeebf0ea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Mar 2015 20:08:35 +0600 Subject: [PATCH 0152/2721] [redtube] Capture and output removed video message (#5281) --- youtube_dl/extractor/redtube.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 846b76c81..2e3db6887 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ExtractorError class RedTubeIE(InfoExtractor): @@ -19,6 +20,9 @@ class RedTubeIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']): + raise ExtractorError('Video %s has been removed' % video_id, expected=True) + video_url = self._html_search_regex( r'', webpage, 'video URL') video_title = self._html_search_regex( From 838b93405bc31654b07e120795115aec6e883855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Mar 2015 20:09:01 +0600 Subject: [PATCH 0153/2721] [redtube] Fix test --- youtube_dl/extractor/redtube.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 2e3db6887..d6054d717 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -8,11 +8,12 @@ class RedTubeIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P[0-9]+)' _TEST = { 'url': 'http://www.redtube.com/66418', + 'md5': '7b8c22b5e7098a3e1c09709df1126d2d', 'info_dict': { 'id': '66418', 'ext': 'mp4', - "title": "Sucked on a toilet", - "age_limit": 18, + 'title': 'Sucked on a toilet', + 'age_limit': 18, } } From 5c19d18cbfb6a0271a9d0b7ff516e1e2e0eeccfb Mon Sep 17 00:00:00 2001 From: Amish Bhadeshia Date: Mon, 23 Mar 2015 13:02:44 +0000 Subject: [PATCH 0154/2721] [22Tracks] Add new extractor Conflicts: youtube_dl/extractor/__init__.py --- youtube_dl/extractor/twentytwotracks.py | 111 ++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 youtube_dl/extractor/twentytwotracks.py diff --git a/youtube_dl/extractor/twentytwotracks.py b/youtube_dl/extractor/twentytwotracks.py new file mode 100644 index 000000000..b655a7503 --- /dev/null +++ b/youtube_dl/extractor/twentytwotracks.py @@ -0,0 +1,111 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +# 22Tracks regularly replace the audio tracks that can be streamed on their +# site. The tracks usually expire after 1 months, so we can't add tests. + + +class TwentyTwoTracksIE(InfoExtractor): + _VALID_URL = r'http://22tracks\.com/([a-z]+)/([a-z]+[2]*)/(\d+)' + IE_NAME = 'TwentyTwoTracks:Tracks' + + def _extract_info(self, city, genre, track=''): + self._base_url = "http://22tracks.com/api/" + + if track == '': + itemid = genre + else: + itemid = track + + cities = self._download_json( + self._base_url + 'cities', itemid, + 'Downloading city info', 'Cannot download city info') + city_id = [x['id'] for x in cities if x['slug'] == city] + + genres = self._download_json( + self._base_url + 'genres/' + str(city_id[0]), itemid, + 'Downloading genre info', 'Cannot download genre info') + genre_id = [x['id'] for x in genres if x['slug'] == genre] + + tracks = self._download_json( + self._base_url + 'tracks/' + str(genre_id[0]), + itemid, 'Downloading track info', 'Cannot download track info') + + if track == '': + return [[x['title'] for x in genres if x['slug'] == genre][0], + tracks] + else: + return [x for x in tracks if x['id'] == itemid][0] + + def _get_token(self, filename, track_id): + token = self._download_json( + 'http://22tracks.com/token.php?desktop=true&u=%2F128%2f{0}'.format( + filename), track_id, 'Finding download link...') + + down_url = 'http://audio.22tracks.com{0}?st={1}&e={2}'.format( + token['filename'], + token['st'], + token['e']) + + return down_url + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + city_id = mobj.group(1) + genre_id = mobj.group(2) + track_id = mobj.group(3) + + self.to_screen(':: Track ID found! - Downloading single track') + + track_info = self._extract_info(city_id, genre_id, track_id) + + download_url = self._get_token(track_info['filename'], track_id) + title = '{0}-{1}'.format( + track_info['artist'].strip(), track_info['title'].strip()) + + return { + 'id': track_id, + 'url': download_url, + 'ext': 'mp3', + 'title': title, + 'duration': track_info['duration'] + } + + +class TwentyTwoTracksGenreIE(TwentyTwoTracksIE): + _VALID_URL = r'http://22tracks\.com/([a-z]+)/([a-z]+[2]*)/?' + IE_NAME = 'TwentyTwoTracks:Genre' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + city_id = mobj.group(1) + genre_id = mobj.group(2) + + self.to_screen(':: Track ID not found! - Downloading entire genre') + + playlist_info = self._extract_info(city_id, genre_id) + + entries = [] + for track in playlist_info[1]: + title = '{0}-{1}'.format( + track['artist'].strip(), track['title'].strip()) + entries.append({ + 'id': track['id'], + 'url': self._get_token(track['filename'], track['id']), + 'ext': 'mp3', + 'title': title + }) + + self.to_screen(':: Links found - Downloading Playlist') + + return { + '_type': 'playlist', + 'id': genre_id, + 'title': playlist_info[0], + 'entries': entries + } From 8f76df7f3791bb085199eb90c3b4125931a44e55 Mon Sep 17 00:00:00 2001 From: Amish Bhadeshia Date: Tue, 24 Mar 2015 18:34:33 +0000 Subject: [PATCH 0155/2721] Updated init to add 22tracks --- youtube_dl/extractor/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d73826d44..b0df52afd 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -526,6 +526,10 @@ from .tvp import TvpIE, TvpSeriesIE from .tvplay import TVPlayIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE +from .twentytwotracks import ( + TwentyTwoTracksIE, + TwentyTwoTracksGenreIE +) from .twitch import ( TwitchVideoIE, TwitchChapterIE, From ae67d082fe0469109033714360b2b1ae92d7d86e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Mar 2015 22:26:02 +0600 Subject: [PATCH 0156/2721] [22tracks] Improve and simplify --- youtube_dl/extractor/twentytwotracks.py | 121 ++++++++++-------------- 1 file changed, 48 insertions(+), 73 deletions(-) diff --git a/youtube_dl/extractor/twentytwotracks.py b/youtube_dl/extractor/twentytwotracks.py index b655a7503..d6c0ab184 100644 --- a/youtube_dl/extractor/twentytwotracks.py +++ b/youtube_dl/extractor/twentytwotracks.py @@ -3,109 +3,84 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import int_or_none # 22Tracks regularly replace the audio tracks that can be streamed on their # site. The tracks usually expire after 1 months, so we can't add tests. class TwentyTwoTracksIE(InfoExtractor): - _VALID_URL = r'http://22tracks\.com/([a-z]+)/([a-z]+[2]*)/(\d+)' - IE_NAME = 'TwentyTwoTracks:Tracks' + _VALID_URL = r'https?://22tracks\.com/(?P[a-z]+)/(?P[\da-z]+)/(?P\d+)' + IE_NAME = '22tracks:track' - def _extract_info(self, city, genre, track=''): - self._base_url = "http://22tracks.com/api/" + _API_BASE = 'http://22tracks.com/api' - if track == '': - itemid = genre - else: - itemid = track + def _extract_info(self, city, genre_name, track_id=None): + item_id = track_id if track_id else genre_name cities = self._download_json( - self._base_url + 'cities', itemid, - 'Downloading city info', 'Cannot download city info') - city_id = [x['id'] for x in cities if x['slug'] == city] + '%s/cities' % self._API_BASE, item_id, + 'Downloading cities info', + 'Unable to download cities info') + city_id = [x['id'] for x in cities if x['slug'] == city][0] genres = self._download_json( - self._base_url + 'genres/' + str(city_id[0]), itemid, - 'Downloading genre info', 'Cannot download genre info') - genre_id = [x['id'] for x in genres if x['slug'] == genre] + '%s/genres/%s' % (self._API_BASE, city_id), item_id, + 'Downloading %s genres info' % city, + 'Unable to download %s genres info' % city) + genre = [x for x in genres if x['slug'] == genre_name][0] + genre_id = genre['id'] tracks = self._download_json( - self._base_url + 'tracks/' + str(genre_id[0]), - itemid, 'Downloading track info', 'Cannot download track info') + '%s/tracks/%s' % (self._API_BASE, genre_id), item_id, + 'Downloading %s genre tracks info' % genre_name, + 'Unable to download track info') - if track == '': - return [[x['title'] for x in genres if x['slug'] == genre][0], - tracks] - else: - return [x for x in tracks if x['id'] == itemid][0] + return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks] - def _get_token(self, filename, track_id): + def _get_track_url(self, filename, track_id): token = self._download_json( - 'http://22tracks.com/token.php?desktop=true&u=%2F128%2f{0}'.format( - filename), track_id, 'Finding download link...') - - down_url = 'http://audio.22tracks.com{0}?st={1}&e={2}'.format( - token['filename'], - token['st'], - token['e']) - - return down_url - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - city_id = mobj.group(1) - genre_id = mobj.group(2) - track_id = mobj.group(3) - - self.to_screen(':: Track ID found! - Downloading single track') - - track_info = self._extract_info(city_id, genre_id, track_id) - - download_url = self._get_token(track_info['filename'], track_id) - title = '{0}-{1}'.format( - track_info['artist'].strip(), track_info['title'].strip()) + 'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename, + track_id, 'Downloading token', 'Unable to download token') + return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e']) + def _extract_track_info(self, track_info, track_id): + download_url = self._get_track_url(track_info['filename'], track_id) + title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip()) return { 'id': track_id, 'url': download_url, 'ext': 'mp3', 'title': title, - 'duration': track_info['duration'] + 'duration': int_or_none(track_info.get('duration')), + 'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created')) } - -class TwentyTwoTracksGenreIE(TwentyTwoTracksIE): - _VALID_URL = r'http://22tracks\.com/([a-z]+)/([a-z]+[2]*)/?' - IE_NAME = 'TwentyTwoTracks:Genre' - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - city_id = mobj.group(1) - genre_id = mobj.group(2) + city = mobj.group('city') + genre = mobj.group('genre') + track_id = mobj.group('id') - self.to_screen(':: Track ID not found! - Downloading entire genre') + track_info = self._extract_info(city, genre, track_id) + return self._extract_track_info(track_info, track_id) - playlist_info = self._extract_info(city_id, genre_id) - entries = [] - for track in playlist_info[1]: - title = '{0}-{1}'.format( - track['artist'].strip(), track['title'].strip()) - entries.append({ - 'id': track['id'], - 'url': self._get_token(track['filename'], track['id']), - 'ext': 'mp3', - 'title': title - }) +class TwentyTwoTracksGenreIE(TwentyTwoTracksIE): + _VALID_URL = r'https?://22tracks\.com/(?P[a-z]+)/(?P[\da-z]+)/?$' + IE_NAME = '22tracks:genre' - self.to_screen(':: Links found - Downloading Playlist') + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) - return { - '_type': 'playlist', - 'id': genre_id, - 'title': playlist_info[0], - 'entries': entries - } + city = mobj.group('city') + genre = mobj.group('genre') + + genre_title, tracks = self._extract_info(city, genre) + + entries = [ + self._extract_track_info(track_info, track_info['id']) + for track_info in tracks] + + return self.playlist_result(entries, genre, genre_title) From 094ce39c45422d4675228337f115a110c718d68a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 Mar 2015 22:27:20 +0600 Subject: [PATCH 0157/2721] Credit @amishb for 22tracks (#5276) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 512469f4c..59f1b5f21 100644 --- a/AUTHORS +++ b/AUTHORS @@ -117,3 +117,4 @@ Alexander Mamay Devin J. Pohly Eduardo Ferro Aldama Jeff Buchbinder +Amish Bhadeshia From db40364b8767839cc1abf5ece53511167d70f792 Mon Sep 17 00:00:00 2001 From: Mohammad Teimori Pabandi Date: Thu, 26 Mar 2015 18:17:21 +0430 Subject: [PATCH 0158/2721] [Varzesh3] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/varzesh3.py | 63 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 youtube_dl/extractor/varzesh3.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b0df52afd..3011b784d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -548,6 +548,7 @@ from .ultimedia import UltimediaIE from .unistra import UnistraIE from .urort import UrortIE from .ustream import UstreamIE, UstreamChannelIE +from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veoh import VeohIE diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dl/extractor/varzesh3.py new file mode 100644 index 000000000..85dcd8734 --- /dev/null +++ b/youtube_dl/extractor/varzesh3.py @@ -0,0 +1,63 @@ +# coding: utf-8 +from __future__ import unicode_literals +from .common import InfoExtractor +from ..utils import ( + ExtractorError, +) +import re + + +class Varzesh3IE(InfoExtractor): + _VALID_URL = r'(?P(https?://(?:www\.)?video\.varzesh3\.com)/(?P.+))' + _TESTS = [ + { + 'url': 'http://video.varzesh3.com/video/%D8%AF%D8%A7%D9%86%D9%85%D8%A7%D8%B1%DA%A9-3-2-%D8%A2%D9%85%D8%B1%DB%8C%DA%A9%D8%A7/', + 'md5': 'c4b850780df9374b078463c5cf6b3318', + 'info_dict': { + 'url': 'http://dl1.video.varzesh3.com/video/clip94/1/international/friendly/usa_2_3_danmark.mp4', + 'id': '76464', + 'ext': 'mp4', + 'title': u'دانمارک ۳-۲ آمریکا (گلهای بازی)', + 'thumbnail': 'http://video.varzesh3.com/wp-content/uploads/250315_usa_danmark_site.jpg', + 'description': u'بازی دوستانه ۲۰۱۵', + + } + }, + { + 'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/', + 'md5': '2a933874cb7dce4366075281eb49e855', + 'info_dict': { + 'url': 'http://dl1.video.varzesh3.com/video/clip94/1/video/namayeshi/saves_week26.mp4', + 'id': '76337', + 'ext': 'mp4', + 'title': u'۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا', + 'thumbnail': 'http://video.varzesh3.com/wp-content/uploads/230315_saves_week26.jpg', + 'description': u'فصل ۲۰۱۵-۲۰۱۴', + } + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + if not 'shortlink' in webpage: + raise ExtractorError('URL has no videos or there is a problem.') + + title = self._html_search_regex(r'meta[^>]+property="og:title"[^>]+content="([^"]+)"', webpage, 'title') + video_link = self._html_search_regex(r'source[^>]+src="([^"]+)"', webpage, 'video_link') + vid_id = self._html_search_regex(r"link[^>]+rel='canonical'[^>]+href='\/\?p=([^']+)'\/>", webpage, 'vid_id') + try: + description = self._html_search_regex(r'
    (.*?)
    ', webpage, 'description', flags=re.DOTALL) + except: + description = title + thumbnail = self._html_search_regex(r'link[^>]+rel="image_src"[^>]+href="([^"]+)"', webpage, 'thumbnail') + + return { + 'url': video_link, + 'id': vid_id, + 'title': title, + 'ext': video_link.split(".")[-1], + 'description': description, + 'thumbnail': thumbnail, + } From fbfcc2972b3b24bda092eaed92b81113154c4327 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Thu, 26 Mar 2015 16:13:53 +0200 Subject: [PATCH 0159/2721] [teamcoco] Fix extraction --- youtube_dl/extractor/teamcoco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 7cb06f351..a46a7ecba 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -54,7 +54,7 @@ class TeamcocoIE(InfoExtractor): embed_url, video_id, 'Downloading embed page') player_data = self._parse_json(self._search_regex( - r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id) + r'Y\.Ginger\.Module\.Player(?:;var\s*player\s*=\s*new\s*m)?\((\{.*?\})\);', embed, 'player data'), video_id) data = self._parse_json( base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id) From 91757b0f373ec3201f95066eeb0e09ebdcc1a067 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Thu, 26 Mar 2015 17:15:27 +0200 Subject: [PATCH 0160/2721] [utils] Escape all HTML entities written in hexadecimal form --- test/test_utils.py | 2 ++ youtube_dl/utils.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index a8ab87685..abaf1ab73 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -200,6 +200,8 @@ class TestUtil(unittest.TestCase): def test_unescape_html(self): self.assertEqual(unescapeHTML('%20;'), '%20;') + self.assertEqual(unescapeHTML('/'), '/') + self.assertEqual(unescapeHTML('/'), '/') self.assertEqual( unescapeHTML('é'), 'é') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 472d4df41..245d623d8 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -348,7 +348,7 @@ def _htmlentity_transform(entity): if entity in compat_html_entities.name2codepoint: return compat_chr(compat_html_entities.name2codepoint[entity]) - mobj = re.match(r'#(x?[0-9]+)', entity) + mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity) if mobj is not None: numstr = mobj.group(1) if numstr.startswith('x'): From a7fce980adeb12afd94e4414cc6a6b37021f6aa4 Mon Sep 17 00:00:00 2001 From: Mohammad Teimori Pabandi Date: Thu, 26 Mar 2015 19:47:34 +0430 Subject: [PATCH 0161/2721] removed one of tests that made problem with testing server --- youtube_dl/extractor/varzesh3.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dl/extractor/varzesh3.py index 85dcd8734..cb3fdacff 100644 --- a/youtube_dl/extractor/varzesh3.py +++ b/youtube_dl/extractor/varzesh3.py @@ -9,21 +9,7 @@ import re class Varzesh3IE(InfoExtractor): _VALID_URL = r'(?P(https?://(?:www\.)?video\.varzesh3\.com)/(?P.+))' - _TESTS = [ - { - 'url': 'http://video.varzesh3.com/video/%D8%AF%D8%A7%D9%86%D9%85%D8%A7%D8%B1%DA%A9-3-2-%D8%A2%D9%85%D8%B1%DB%8C%DA%A9%D8%A7/', - 'md5': 'c4b850780df9374b078463c5cf6b3318', - 'info_dict': { - 'url': 'http://dl1.video.varzesh3.com/video/clip94/1/international/friendly/usa_2_3_danmark.mp4', - 'id': '76464', - 'ext': 'mp4', - 'title': u'دانمارک ۳-۲ آمریکا (گلهای بازی)', - 'thumbnail': 'http://video.varzesh3.com/wp-content/uploads/250315_usa_danmark_site.jpg', - 'description': u'بازی دوستانه ۲۰۱۵', - - } - }, - { + _TEST ={ 'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/', 'md5': '2a933874cb7dce4366075281eb49e855', 'info_dict': { @@ -34,8 +20,7 @@ class Varzesh3IE(InfoExtractor): 'thumbnail': 'http://video.varzesh3.com/wp-content/uploads/230315_saves_week26.jpg', 'description': u'فصل ۲۰۱۵-۲۰۱۴', } - }, - ] + } def _real_extract(self, url): video_id = self._match_id(url) From 8896b614a9257ba33346f49a135371d460bfb311 Mon Sep 17 00:00:00 2001 From: Mohammad Teimori Pabandi Date: Thu, 26 Mar 2015 20:06:50 +0430 Subject: [PATCH 0162/2721] removing unicode literal because it is imported :)) --- youtube_dl/extractor/varzesh3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dl/extractor/varzesh3.py index cb3fdacff..62bcb9118 100644 --- a/youtube_dl/extractor/varzesh3.py +++ b/youtube_dl/extractor/varzesh3.py @@ -16,7 +16,7 @@ class Varzesh3IE(InfoExtractor): 'url': 'http://dl1.video.varzesh3.com/video/clip94/1/video/namayeshi/saves_week26.mp4', 'id': '76337', 'ext': 'mp4', - 'title': u'۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا', + 'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا', 'thumbnail': 'http://video.varzesh3.com/wp-content/uploads/230315_saves_week26.jpg', 'description': u'فصل ۲۰۱۵-۲۰۱۴', } From 448830ce7b180250a9f2cbafc1625a20584e59d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 26 Mar 2015 21:41:09 +0600 Subject: [PATCH 0163/2721] [youtube:watchlater] Extract watchlater as playlist (Closes #5280) --- youtube_dl/extractor/youtube.py | 52 ++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 27c8c4453..e0f9228a4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1263,27 +1263,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): return self.playlist_result(url_results, playlist_id, title) - def _real_extract(self, url): - # Extract playlist id - mobj = re.match(self._VALID_URL, url) - if mobj is None: - raise ExtractorError('Invalid URL: %s' % url) - playlist_id = mobj.group(1) or mobj.group(2) - - # Check if it's a video-specific URL - query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - if 'v' in query_dict: - video_id = query_dict['v'][0] - if self._downloader.params.get('noplaylist'): - self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - return self.url_result(video_id, 'Youtube', video_id=video_id) - else: - self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) - - if playlist_id.startswith('RD') or playlist_id.startswith('UL'): - # Mixes require a custom extraction process - return self._extract_mix(playlist_id) - + def _extract_playlist(self, playlist_id): url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) more_widget_html = content_html = page @@ -1327,6 +1307,29 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): url_results = self._ids_to_results(ids) return self.playlist_result(url_results, playlist_id, playlist_title) + def _real_extract(self, url): + # Extract playlist id + mobj = re.match(self._VALID_URL, url) + if mobj is None: + raise ExtractorError('Invalid URL: %s' % url) + playlist_id = mobj.group(1) or mobj.group(2) + + # Check if it's a video-specific URL + query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + if 'v' in query_dict: + video_id = query_dict['v'][0] + if self._downloader.params.get('noplaylist'): + self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + return self.url_result(video_id, 'Youtube', video_id=video_id) + else: + self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) + + if playlist_id.startswith('RD') or playlist_id.startswith('UL'): + # Mixes require a custom extraction process + return self._extract_mix(playlist_id) + + return self._extract_playlist(playlist_id) + class YoutubeChannelIE(InfoExtractor): IE_DESC = 'YouTube.com channels' @@ -1649,13 +1652,16 @@ class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): _PLAYLIST_TITLE = 'Youtube Recommended videos' -class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor): +class YoutubeWatchLaterIE(YoutubePlaylistIE): IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' + _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater' _FEED_NAME = 'watch_later' _PLAYLIST_TITLE = 'Youtube Watch Later' _PERSONAL_FEED = True + def _real_extract(self, url): + return self._extract_playlist('WL') + class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' From 7e17ec8c717796ec304b205059c741e7b47474dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 26 Mar 2015 21:42:28 +0600 Subject: [PATCH 0164/2721] [youtube] Clarify some IE_NAMEs --- youtube_dl/extractor/youtube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e0f9228a4..eba699c3a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1646,6 +1646,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): + IE_NAME = 'youtube:recommended' IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' _FEED_NAME = 'recommended' @@ -1653,6 +1654,7 @@ class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): class YoutubeWatchLaterIE(YoutubePlaylistIE): + IE_NAME = 'youtube:watchlater' IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater' _FEED_NAME = 'watch_later' @@ -1664,6 +1666,7 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): + IE_NAME = 'youtube:history' IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory' _FEED_NAME = 'history' From 425142be601adfb4218f78c10aedbed14ad1facd Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Thu, 26 Mar 2015 17:46:20 +0200 Subject: [PATCH 0165/2721] [slideshare] Fix extraction (#5279) --- youtube_dl/extractor/slideshare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py index 9f79ff5c1..0b717a1e4 100644 --- a/youtube_dl/extractor/slideshare.py +++ b/youtube_dl/extractor/slideshare.py @@ -30,7 +30,7 @@ class SlideshareIE(InfoExtractor): page_title = mobj.group('title') webpage = self._download_webpage(url, page_title) slideshare_obj = self._search_regex( - r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=', + r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);', webpage, 'slideshare object') info = json.loads(slideshare_obj) if info['slideshow']['type'] != 'video': From 223b27f46cdf3cde9c9556dac19055ad5d8bff5f Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Thu, 26 Mar 2015 19:48:22 +0200 Subject: [PATCH 0166/2721] [vessel] Add new extractor (Closes #5275) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vessel.py | 127 +++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 youtube_dl/extractor/vessel.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b0df52afd..053eb32ae 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -551,6 +551,7 @@ from .ustream import UstreamIE, UstreamChannelIE from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veoh import VeohIE +from .vessel import VesselIE from .vesti import VestiIE from .vevo import VevoIE from .vgtv import VGTVIE diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py new file mode 100644 index 000000000..123d9470e --- /dev/null +++ b/youtube_dl/extractor/vessel.py @@ -0,0 +1,127 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..compat import compat_urllib_request +from ..utils import ( + ExtractorError, + parse_iso8601, +) + + +class VesselIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P[0-9a-zA-Z]+)' + _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s' + _LOGIN_URL = 'https://www.vessel.com/api/account/login' + _NETRC_MACHINE = 'vessel' + _TEST = { + 'url': 'https://www.vessel.com/videos/HDN7G5UMs', + 'md5': '455cdf8beb71c6dd797fd2f3818d05c4', + 'info_dict': { + 'id': 'HDN7G5UMs', + 'ext': 'mp4', + 'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?', + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20150317', + 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?', + 'timestamp': int, + }, + } + + @staticmethod + def make_json_request(url, data): + payload = json.dumps(data).encode('utf-8') + req = compat_urllib_request.Request(url, payload) + req.add_header('Content-Type', 'application/json; charset=utf-8') + return req + + @staticmethod + def find_assets(data, asset_type): + for asset in data.get('assets', []): + if asset.get('type') == asset_type: + yield asset + + def _check_access_rights(self, data): + access_info = data.get('__view', {}) + if access_info.get('allow_access') == False: + err_code = access_info.get('error_code') or '' + if err_code == 'ITEM_PAID_ONLY': + raise ExtractorError( + 'This video requires subscription.', expected=True) + else: + raise ExtractorError( + 'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True) + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + self.report_login() + data = { + 'client_id': 'web', + 'type': 'password', + 'user_key': username, + 'password': password, + } + login_request = VesselIE.make_json_request(self._LOGIN_URL, data) + self._download_webpage(login_request, None, False, 'Wrong login info') + + def _real_initialize(self): + self._login() + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + data = self._parse_json(self._search_regex( + r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id) + asset_id = data['model']['data']['id'] + + req = VesselIE.make_json_request( + self._API_URL_TEMPLATE % asset_id, {'client': 'web'}) + data = self._download_json(req, video_id) + + self._check_access_rights(data) + + try: + video_asset = next(VesselIE.find_assets(data, 'video')) + except StopIteration: + raise ExtractorError('No video assets found') + + formats = [] + for f in video_asset.get('sources', []): + if f['name'] == 'hls-index': + formats.extend(self._extract_m3u8_formats( + f['location'], video_id, ext='mp4', m3u8_id='m3u8')) + else: + formats.append({ + 'format_id': f['name'], + 'tbr': f.get('bitrate'), + 'height': f.get('height'), + 'width': f.get('width'), + 'url': f['location'], + }) + self._sort_formats(formats) + + thumbnails = [] + for im_asset in VesselIE.find_assets(data, 'image'): + thumbnails.append({ + 'url': im_asset['location'], + 'width': im_asset.get('width', 0), + 'height': im_asset.get('height', 0), + }) + + return { + 'id': video_id, + 'title': data['title'], + 'formats': formats, + 'thumbnails': thumbnails, + 'description': data.get('short_description'), + 'duration': data.get('duration'), + 'comment_count': data.get('comment_count'), + 'like_count': data.get('like_count'), + 'view_count': data.get('view_count'), + 'timestamp': parse_iso8601(data.get('released_at')), + } From 31c48098276100a3fa2529b1443687e56838f707 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 26 Mar 2015 23:57:46 +0600 Subject: [PATCH 0167/2721] [safari] Improve and simplify --- youtube_dl/extractor/safari.py | 101 +++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 3e494b960..10251f29e 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from .brightcove import BrightcoveIE @@ -20,16 +19,18 @@ from ..utils import ( class SafariBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' - _SUCCESSFUL_LOGIN_REGEX = r']+>Sign Out' - _ACCOUNT_CREDENTIALS_HINT = ('Use --username and --password options to ' - 'supply credentials for safaribooksonline.com ') - _NETRC_MACHINE = 'safaribooksonline' + _SUCCESSFUL_LOGIN_REGEX = r']*>Sign Out' + _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com' + _NETRC_MACHINE = 'safari' + + _API_BASE = 'https://www.safaribooksonline.com/api/v1/book' + _API_FORMAT = 'json' LOGGED_IN = False def _real_initialize(self): # We only need to log in once for courses or individual videos - if not SafariBaseIE.LOGGED_IN: + if not self.LOGGED_IN: self._login() SafariBaseIE.LOGGED_IN = True @@ -49,7 +50,7 @@ class SafariBaseIE(InfoExtractor): 'Downloading login form') csrf = self._html_search_regex( - r"", + r"name='csrfmiddlewaretoken'\s+value='([^']+)'", login_page, 'csrf token') login_form = { @@ -66,8 +67,9 @@ class SafariBaseIE(InfoExtractor): request, None, 'Logging in as %s' % username) if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: - raise ExtractorError('Login failed; make sure your credentials are correct and ' - 'try again.', expected=True) + raise ExtractorError( + 'Login failed; make sure your credentials are correct and try again.', + expected=True) self.to_screen('Login successful') @@ -75,69 +77,80 @@ class SafariBaseIE(InfoExtractor): class SafariIE(SafariBaseIE): IE_NAME = 'safari' IE_DESC = 'safaribooksonline.com online video' - _VALID_URL = (r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/' - '(?P\d+)/(?Ppart\d+)\.html') - _TEST = { - 'url': ('https://www.safaribooksonline.com/library/view/' - 'hadoop-fundamentals-livelessons/9780133392838/part00.html'), + _VALID_URL = r'''(?x)https?:// + (?:www\.)?safaribooksonline\.com/ + (?: + library/view/[^/]+| + api/v1/book + )/ + (?P\d+)/ + (?:chapter(?:-content)?/)? + (?Ppart\d+)\.html + ''' + + _TESTS = [{ + 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', 'md5': '5b0c4cc1b3c1ba15dda7344085aa5592', 'info_dict': { - 'id': '9780133392838', + 'id': '2842601850001', 'ext': 'mp4', 'title': 'Introduction', - } - } + }, + 'skip': 'Requires safaribooksonline account credentials', + }, { + 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + course_id = mobj.group('course_id') part = mobj.group('part') - webpage = self._download_webpage(url, part) + webpage = self._download_webpage( + '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part), + part) + bc_url = BrightcoveIE._extract_brightcove_url(webpage) if not bc_url: raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True) - return { - '_type': 'url', - 'url': smuggle_url(bc_url, {'Referer': url}), - 'ie_key': 'Brightcove' - } + return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove') class SafariCourseIE(SafariBaseIE): IE_NAME = 'safari:course' IE_DESC = 'safaribooksonline.com online courses' - _VALID_URL = (r'https?://(?:www\.)?safaribooksonline\.com/library/view/' - '(?P[^/]+)/(?P\d+)/?$') + _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P\d+)/?(?:[#?]|$)' - _API_BASE = 'https://www.safaribooksonline.com/api/v1/book' - _API_FORMAT = 'json' + _TESTS = [{ + 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', + 'info_dict': { + 'id': '9780133392838', + 'title': 'Hadoop Fundamentals LiveLessons', + }, + 'playlist_count': 22, + 'skip': 'Requires safaribooksonline account credentials', + }, { + 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - course_path = mobj.group('course_path') - course_id = mobj.group('id') + course_id = self._match_id(url) - webpage = self._download_webpage( + course_json = self._download_json( '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), - course_path, 'Downloading course JSON') - - course_json = json.loads(webpage) + course_id, 'Downloading course JSON') if 'chapters' not in course_json: - raise ExtractorError('No chapters found for course %s' % course_id, expected=True) - - num_parts = len(course_json['chapters']) - parts = ['%02d' % part for part in range(num_parts)] + raise ExtractorError( + 'No chapters found for course %s' % course_id, expected=True) entries = [ - self.url_result( - 'https://www.safaribooksonline.com/library/view/%s/%s/part%s.html' % (course_path, - course_id, - part_id), - 'Safari') - for part_id in parts] + self.url_result(chapter, 'Safari') + for chapter in course_json['chapters']] course_title = course_json['title'] From c496ec084823436fa27eda0a12887c028f572ed3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 26 Mar 2015 19:51:40 +0100 Subject: [PATCH 0168/2721] [vessel] Fix pep8 issue --- youtube_dl/extractor/vessel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index 123d9470e..6215f0642 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -45,7 +45,7 @@ class VesselIE(InfoExtractor): def _check_access_rights(self, data): access_info = data.get('__view', {}) - if access_info.get('allow_access') == False: + if not access_info.get('allow_access', True): err_code = access_info.get('error_code') or '' if err_code == 'ITEM_PAID_ONLY': raise ExtractorError( From 157e9e5aa5e2e461ec0e4f1e0875125a440d6d60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 26 Mar 2015 20:03:31 +0100 Subject: [PATCH 0169/2721] [youtube:watchlater] Remove unused properties and fix tests --- test/test_all_urls.py | 2 +- youtube_dl/extractor/youtube.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 6ae168b7f..a9db42b30 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -59,7 +59,7 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user']) def test_youtube_feeds(self): - self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later']) + self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater']) self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions']) self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended']) self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites']) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index eba699c3a..5488101e1 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1657,9 +1657,8 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): IE_NAME = 'youtube:watchlater' IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater' - _FEED_NAME = 'watch_later' - _PLAYLIST_TITLE = 'Youtube Watch Later' - _PERSONAL_FEED = True + + _TESTS = [] # override PlaylistIE tests def _real_extract(self, url): return self._extract_playlist('WL') From 2315fb5e5fb65271519fc018bb1df5a7269f0a6a Mon Sep 17 00:00:00 2001 From: Mohammad Teimori Pabandi Date: Thu, 26 Mar 2015 23:53:57 +0430 Subject: [PATCH 0170/2721] unicde :( --- youtube_dl/extractor/varzesh3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dl/extractor/varzesh3.py index 62bcb9118..eb49586cc 100644 --- a/youtube_dl/extractor/varzesh3.py +++ b/youtube_dl/extractor/varzesh3.py @@ -18,7 +18,7 @@ class Varzesh3IE(InfoExtractor): 'ext': 'mp4', 'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا', 'thumbnail': 'http://video.varzesh3.com/wp-content/uploads/230315_saves_week26.jpg', - 'description': u'فصل ۲۰۱۵-۲۰۱۴', + 'description': 'فصل ۲۰۱۵-۲۰۱۴', } } From 998e6cdba01ff54e2009d50c3026e7af0bb1c11a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 27 Mar 2015 03:05:08 +0600 Subject: [PATCH 0171/2721] [vimeo] Capture and output error message (#5294) --- youtube_dl/extractor/vimeo.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index bd09652cd..28bcc89cd 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -244,6 +244,16 @@ class VimeoIE(VimeoBaseInfoExtractor): # and latter we extract those that are Vimeo specific. self.report_extraction(video_id) + vimeo_config = self._search_regex( + r'vimeo\.config\s*=\s*({.+?});', webpage, + 'vimeo config', default=None) + if vimeo_config: + seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {}) + if seed_status.get('state') == 'failed': + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, seed_status['title']), + expected=True) + # Extract the config JSON try: try: From af140002158c1079b1365392c6a48ea06ad23c82 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Thu, 26 Mar 2015 23:24:15 +0200 Subject: [PATCH 0172/2721] [eroprofile] Add login support (#5269) --- youtube_dl/extractor/eroprofile.py | 52 ++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py index 79e2fbd39..0cbca90b0 100644 --- a/youtube_dl/extractor/eroprofile.py +++ b/youtube_dl/extractor/eroprofile.py @@ -1,11 +1,17 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..compat import compat_urllib_parse +from ..utils import ExtractorError class EroProfileIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P[^/]+)' - _TEST = { + _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?' + _NETRC_MACHINE = 'eroprofile' + _TESTS = [{ 'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore', 'md5': 'c26f351332edf23e1ea28ce9ec9de32f', 'info_dict': { @@ -16,13 +22,55 @@ class EroProfileIE(InfoExtractor): 'thumbnail': 're:https?://.*\.jpg', 'age_limit': 18, } - } + }, { + 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file', + 'md5': '1baa9602ede46ce904c431f5418d8916', + 'info_dict': { + 'id': '1133519', + 'ext': 'm4v', + 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file', + 'thumbnail': 're:https?://.*\.jpg', + 'age_limit': 18, + }, + 'skip': 'Requires login', + }] + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + + query = compat_urllib_parse.urlencode({ + 'username': username, + 'password': password, + 'url': 'http://www.eroprofile.com/', + }) + login_url = self._LOGIN_URL + query + login_page = self._download_webpage(login_url, None, False) + + m = re.search(r'Your username or password was incorrect\.', login_page) + if m: + raise ExtractorError( + 'Wrong username and/or password.', expected=True) + + self.report_login() + redirect_url = self._search_regex( + r']+?src="([^"]+)"', login_page, 'login redirect url') + self._download_webpage(redirect_url, None, False) + + def _real_initialize(self): + self._login() def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) + m = re.search(r'You must be logged in to view this video\.', webpage) + if m: + raise ExtractorError( + 'This video requires login. Please specify a username and password and try again.', expected=True) + video_id = self._search_regex( [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], webpage, 'video id', default=None) From 70a1165b32acf253905109e9b4f245295d67af1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 27 Mar 2015 13:02:20 +0100 Subject: [PATCH 0173/2721] Don't use bare 'except:' They catch any exception, including KeyboardInterrupt, we don't want to catch it. --- devscripts/check-porn.py | 2 +- youtube_dl/YoutubeDL.py | 4 ++-- youtube_dl/compat.py | 2 +- youtube_dl/downloader/common.py | 2 +- youtube_dl/extractor/youporn.py | 2 +- youtube_dl/postprocessor/ffmpeg.py | 15 ++++++--------- youtube_dl/update.py | 4 ++-- youtube_dl/utils.py | 4 ++-- 8 files changed, 16 insertions(+), 19 deletions(-) diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py index 6a5bd9eda..7a219ebe9 100644 --- a/devscripts/check-porn.py +++ b/devscripts/check-porn.py @@ -28,7 +28,7 @@ for test in get_testcases(): if METHOD == 'EURISTIC': try: webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() - except: + except Exception: print('\nFail: {0}'.format(test['name'])) continue diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b5ef5e009..640b8c99d 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1701,10 +1701,10 @@ class YoutubeDL(object): out = out.decode().strip() if re.match('[0-9a-f]+', out): self._write_string('[debug] Git HEAD: ' + out + '\n') - except: + except Exception: try: sys.exc_clear() - except: + except Exception: pass self._write_string('[debug] Python version %s - %s\n' % ( platform.python_version(), platform_name())) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index b2bf149ef..973bcd320 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -389,7 +389,7 @@ else: stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = sp.communicate() lines, columns = map(int, out.split()) - except: + except Exception: pass return _terminal_size(columns, lines) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 8ed5c19a6..ca14d64bc 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -204,7 +204,7 @@ class FileDownloader(object): return try: os.utime(filename, (time.time(), filetime)) - except: + except Exception: pass return filetime diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index e4c855ee0..6abe72f73 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -52,7 +52,7 @@ class YouPornIE(InfoExtractor): webpage, 'JSON parameters') try: params = json.loads(json_params) - except: + except ValueError: raise ExtractorError('Invalid JSON') self.report_extraction(video_id) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index b6f51cfd5..55adf9685 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import io import os import subprocess -import sys import time @@ -269,19 +268,17 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): else: self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path) self.run_ffmpeg(path, new_path, acodec, more_opts) - except: - etype, e, tb = sys.exc_info() - if isinstance(e, AudioConversionError): - msg = 'audio conversion failed: ' + e.msg - else: - msg = 'error running ' + self.basename - raise PostProcessingError(msg) + except AudioConversionError as e: + raise PostProcessingError( + 'audio conversion failed: ' + e.msg) + except Exception: + raise PostProcessingError('error running ' + self.basename) # Try to update the date time for extracted audio file. if information.get('filetime') is not None: try: os.utime(encodeFilename(new_path), (time.time(), information['filetime'])) - except: + except Exception: self._downloader.report_warning('Cannot update utime of audio file') information['filepath'] = new_path diff --git a/youtube_dl/update.py b/youtube_dl/update.py index d8be4049f..de3169eef 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -65,7 +65,7 @@ def update_self(to_screen, verbose): # Check if there is a new version try: newversion = opener.open(VERSION_URL).read().decode('utf-8').strip() - except: + except Exception: if verbose: to_screen(compat_str(traceback.format_exc())) to_screen('ERROR: can\'t find the current version. Please try again later.') @@ -78,7 +78,7 @@ def update_self(to_screen, verbose): try: versions_info = opener.open(JSON_URL).read().decode('utf-8') versions_info = json.loads(versions_info) - except: + except Exception: if verbose: to_screen(compat_str(traceback.format_exc())) to_screen('ERROR: can\'t obtain versions info. Please try again later.') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 245d623d8..90e0ed9ab 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -75,7 +75,7 @@ def preferredencoding(): try: pref = locale.getpreferredencoding() 'TEST'.encode(pref) - except: + except Exception: pref = 'UTF-8' return pref @@ -127,7 +127,7 @@ def write_json_file(obj, fn): except OSError: pass os.rename(tf.name, fn) - except: + except Exception: try: os.remove(tf.name) except OSError: From 8e678af4ba873c348ec2634ab22f84c61546c093 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 27 Mar 2015 14:21:53 +0100 Subject: [PATCH 0174/2721] Makefile: fix 'find' command It worked with the GNU version, but not with the BSD version. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c6c76274f..fdb1abb60 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas clean: rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe - find -name "*.pyc" -delete + find . -name "*.pyc" -delete PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin From c59e701e35dedb40da9d1e88c051141e63ded550 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 28 Mar 2015 08:11:32 +0100 Subject: [PATCH 0175/2721] Default to continuedl=True We already do this in the CLI interface, so it should be just fine. --- youtube_dl/downloader/common.py | 2 +- youtube_dl/downloader/http.py | 2 +- youtube_dl/downloader/rtmp.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index ca14d64bc..a0fc5ead0 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -318,7 +318,7 @@ class FileDownloader(object): ) continuedl_and_exists = ( - self.params.get('continuedl', False) and + self.params.get('continuedl', True) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False) ) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 4047d7167..d136bebd1 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -49,7 +49,7 @@ class HttpFD(FileDownloader): open_mode = 'wb' if resume_len != 0: - if self.params.get('continuedl', False): + if self.params.get('continuedl', True): self.report_resuming_byte(resume_len) request.add_header('Range', 'bytes=%d-' % resume_len) open_mode = 'ab' diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 89e98ae61..ddf5724ae 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -105,7 +105,7 @@ class RtmpFD(FileDownloader): protocol = info_dict.get('rtmp_protocol', None) real_time = info_dict.get('rtmp_real_time', False) no_resume = info_dict.get('no_resume', False) - continue_dl = info_dict.get('continuedl', False) + continue_dl = info_dict.get('continuedl', True) self.report_destination(filename) tmpfilename = self.temp_name(filename) From 4747e2183acbddea61a3f10fb4ba1ed5b70c8bd1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 28 Mar 2015 08:12:05 +0100 Subject: [PATCH 0176/2721] release 2015.03.28 --- docs/supportedsites.md | 7 ++++++- youtube_dl/version.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index baf7b3880..fd59cc2be 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -2,6 +2,8 @@ - **1tv**: Первый канал - **1up.com** - **220.ro** + - **22tracks:genre** + - **22tracks:track** - **24video** - **3sat** - **4tube** @@ -380,6 +382,8 @@ - **rutube:movie**: Rutube movies - **rutube:person**: Rutube person videos - **RUTV**: RUTV.RU + - **safari**: safaribooksonline.com online video + - **safari:course**: safaribooksonline.com online courses - **Sandia**: Sandia National Laboratories - **Sapo**: SAPO Vídeos - **savefrom.net** @@ -500,6 +504,7 @@ - **Vbox7** - **VeeHD** - **Veoh** + - **Vessel** - **Vesti**: Вести.Ru - **Vevo** - **VGTV** @@ -588,7 +593,7 @@ - **youtube:show**: YouTube.com (multi-season) shows - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) - - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) + - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) - **Zapiks** - **ZDF** - **ZDFChannel** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 039ceadf2..dd93e295a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.03.24' +__version__ = '2015.03.28' From 643fe72717e6b9c45af4528e46dfc181cad7aebb Mon Sep 17 00:00:00 2001 From: Oskar Jauch Date: Sat, 28 Mar 2015 10:38:52 +0100 Subject: [PATCH 0177/2721] [DHM] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/dhm.py | 52 ++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/dhm.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d56eb6448..a65c0c25b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -106,6 +106,7 @@ from .dbtv import DBTVIE from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .dfb import DFBIE +from .dhm import DHMIE from .dotsub import DotsubIE from .douyutv import DouyuTVIE from .dreisat import DreiSatIE diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py new file mode 100644 index 000000000..d379c9d53 --- /dev/null +++ b/youtube_dl/extractor/dhm.py @@ -0,0 +1,52 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +import urllib2 +import xml.etree.ElementTree as ET +import re + + +class DHMIE(InfoExtractor): + _VALID_URL = r'http://www\.dhm\.de/filmarchiv/(?P.*?)' + + _TEST = { + 'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/', + 'md5': '11c475f670209bf6acca0b2b7ef51827', + 'info_dict': { + 'id': 'marshallwg', + 'ext': 'flv', + 'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE', + 'thumbnail': 'http://www.dhm.de/filmarchiv/video/mpworkwg.jpg', + } + } + + def _real_extract(self, url): + video_id = '' + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r'dc:title=\"(.*?)\"', webpage, 'title') + + playlist_url = self._html_search_regex( + r'file: \'(.*?)\'', webpage, 'playlist URL') + + xml_file = urllib2.urlopen(playlist_url) + data = xml_file.read() + xml_file.close() + + root = ET.fromstring(data) + video_url = root[0][0][0].text + thumbnail = root[0][0][2].text + + m = re.search('video/(.+?).flv', video_url) + if m: + video_id = m.group(1) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'thumbnail': thumbnail, + } From ff79552f13efbfa87a814dd0a950aa797b08a5cd Mon Sep 17 00:00:00 2001 From: Oskar Jauch Date: Sat, 28 Mar 2015 10:42:35 +0100 Subject: [PATCH 0178/2721] [DHM] Add extractor description --- youtube_dl/extractor/dhm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py index d379c9d53..a0a584f6a 100644 --- a/youtube_dl/extractor/dhm.py +++ b/youtube_dl/extractor/dhm.py @@ -9,6 +9,7 @@ import re class DHMIE(InfoExtractor): + IE_DESC = 'Deutsches Historisches Museum' _VALID_URL = r'http://www\.dhm\.de/filmarchiv/(?P.*?)' _TEST = { From cb88671e37a9fbb964ee0b2ab46031195f2aab1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 28 Mar 2015 14:18:11 +0100 Subject: [PATCH 0179/2721] [nbc] Recognize https urls (fixes #5300) --- youtube_dl/extractor/nbc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 3645d3033..80a01c778 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -14,7 +14,7 @@ from ..utils import ( class NBCIE(InfoExtractor): - _VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?Pn?\d+)' + _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?Pn?\d+)' _TESTS = [ { From af8c93086c45cf61cadc8571644713927659a65e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Mar 2015 22:30:13 +0600 Subject: [PATCH 0180/2721] [dhm] Simplify --- youtube_dl/extractor/dhm.py | 59 ++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py index a0a584f6a..80ee40018 100644 --- a/youtube_dl/extractor/dhm.py +++ b/youtube_dl/extractor/dhm.py @@ -1,53 +1,64 @@ -# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor - -import urllib2 -import xml.etree.ElementTree as ET -import re +from ..utils import ( + xpath_text, + parse_duration, +) class DHMIE(InfoExtractor): - IE_DESC = 'Deutsches Historisches Museum' - _VALID_URL = r'http://www\.dhm\.de/filmarchiv/(?P.*?)' + IE_DESC = 'Filmarchiv - Deutsches Historisches Museum' + _VALID_URL = r'http://www\.dhm\.de/filmarchiv/die-filme/(?P[^/]+)' _TEST = { 'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/', 'md5': '11c475f670209bf6acca0b2b7ef51827', 'info_dict': { - 'id': 'marshallwg', + 'id': 'the-marshallplan-at-work-in-west-germany', 'ext': 'flv', 'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE', - 'thumbnail': 'http://www.dhm.de/filmarchiv/video/mpworkwg.jpg', + 'description': 'md5:1fabd480c153f97b07add61c44407c82', + 'duration': 660, + 'thumbnail': 're:^https?://.*\.jpg$', } } def _real_extract(self, url): - video_id = '' + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - title = self._html_search_regex( - r'dc:title=\"(.*?)\"', webpage, 'title') + playlist_url = self._search_regex( + r"file\s*:\s*'([^']+)'", webpage, 'playlist url') - playlist_url = self._html_search_regex( - r'file: \'(.*?)\'', webpage, 'playlist URL') + playlist = self._download_xml(playlist_url, video_id) - xml_file = urllib2.urlopen(playlist_url) - data = xml_file.read() - xml_file.close() + track = playlist.find( + './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track') - root = ET.fromstring(data) - video_url = root[0][0][0].text - thumbnail = root[0][0][2].text + video_url = xpath_text( + track, './{http://xspf.org/ns/0/}location', + 'video url', fatal=True) + thumbnail = xpath_text( + track, './{http://xspf.org/ns/0/}image', + 'thumbnail') - m = re.search('video/(.+?).flv', video_url) - if m: - video_id = m.group(1) + title = self._search_regex( + [r'dc:title="([^"]+)"', r' »([^<]+)'], + webpage, 'title').strip() + description = self._html_search_regex( + r'

    Description:(.+?)

    ', + webpage, 'description', fatal=False) + duration = parse_duration(self._search_regex( + r'Length\s*\s*:\s*([^<]+)', + webpage, 'duration', fatal=False)) return { 'id': video_id, - 'title': title, 'url': video_url, + 'title': title, + 'description': description, + 'duration': duration, 'thumbnail': thumbnail, } From b7a2268e7b52fbedd1630ad101460d76cca9dcdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Mar 2015 23:43:15 +0600 Subject: [PATCH 0181/2721] Credit @ossi96 for dhm (#5305) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 59f1b5f21..b36086448 100644 --- a/AUTHORS +++ b/AUTHORS @@ -118,3 +118,4 @@ Devin J. Pohly Eduardo Ferro Aldama Jeff Buchbinder Amish Bhadeshia +Oskar Jauch From 5a3b315b5fcc1b1e153baeaed669e64d137d4047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Mar 2015 23:55:15 +0600 Subject: [PATCH 0182/2721] [dhm] Improve _VALID_URL and add test --- youtube_dl/extractor/dhm.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py index 80ee40018..3ed1f1663 100644 --- a/youtube_dl/extractor/dhm.py +++ b/youtube_dl/extractor/dhm.py @@ -9,9 +9,9 @@ from ..utils import ( class DHMIE(InfoExtractor): IE_DESC = 'Filmarchiv - Deutsches Historisches Museum' - _VALID_URL = r'http://www\.dhm\.de/filmarchiv/die-filme/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P[^/]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/', 'md5': '11c475f670209bf6acca0b2b7ef51827', 'info_dict': { @@ -21,8 +21,17 @@ class DHMIE(InfoExtractor): 'description': 'md5:1fabd480c153f97b07add61c44407c82', 'duration': 660, 'thumbnail': 're:^https?://.*\.jpg$', - } - } + }, + }, { + 'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/', + 'md5': '09890226332476a3e3f6f2cb74734aa5', + 'info_dict': { + 'id': 'rolle-1', + 'ext': 'flv', + 'title': 'ROLLE 1', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -49,10 +58,10 @@ class DHMIE(InfoExtractor): webpage, 'title').strip() description = self._html_search_regex( r'

    Description:(.+?)

    ', - webpage, 'description', fatal=False) + webpage, 'description', default=None) duration = parse_duration(self._search_regex( r'Length\s*\s*:\s*([^<]+)', - webpage, 'duration', fatal=False)) + webpage, 'duration', default=None)) return { 'id': video_id, From 616af2f4b9458dd15152ddf6a7905f7818ae583a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Mar 2015 00:03:59 +0600 Subject: [PATCH 0183/2721] Unduplicate @ossi96 --- AUTHORS | 1 - 1 file changed, 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index b36086448..59f1b5f21 100644 --- a/AUTHORS +++ b/AUTHORS @@ -118,4 +118,3 @@ Devin J. Pohly Eduardo Ferro Aldama Jeff Buchbinder Amish Bhadeshia -Oskar Jauch From 5f88e028183896f67e35d2d82d9596213cd2c73e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 28 Mar 2015 23:35:55 +0100 Subject: [PATCH 0184/2721] [ultimedia] PEP8 --- youtube_dl/extractor/ultimedia.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py index 06554a1be..96c809eaf 100644 --- a/youtube_dl/extractor/ultimedia.py +++ b/youtube_dl/extractor/ultimedia.py @@ -42,7 +42,6 @@ class UltimediaIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) deliver_url = self._search_regex( @@ -81,8 +80,8 @@ class UltimediaIE(InfoExtractor): title = clean_html(( self._html_search_regex( r'(?s).+?
    (.+?)', - webpage, 'title', default=None) - or self._search_regex( + webpage, 'title', default=None) or + self._search_regex( r"var\s+nameVideo\s*=\s*'([^']+)'", deliver_page, 'title'))) From ecb750a446dd4805467904864b99eab705e866a8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 28 Mar 2015 23:39:41 +0100 Subject: [PATCH 0185/2721] [cnn] Match more URLs --- youtube_dl/extractor/cnn.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 90ea07438..0a77e951c 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -12,7 +12,7 @@ from ..utils import ( class CNNIE(InfoExtractor): _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/ - (?P.+?/(?P[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))''' + (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln|ktvk)(?:-ap)?|(?=&)))''' _TESTS = [{ 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', @@ -45,6 +45,9 @@ class CNNIE(InfoExtractor): 'description': 'md5:e7223a503315c9f150acac52e76de086', 'upload_date': '20141222', } + }, { + 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', + 'only_matching': True, }] def _real_extract(self, url): From ad320e9b8376221a3eda935a358886b6e7ab7bf6 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sun, 29 Mar 2015 04:57:37 +0300 Subject: [PATCH 0186/2721] [generic] Add support for 5min embeds (#5310) --- youtube_dl/extractor/generic.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8a49b0b54..042d23a13 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -620,6 +620,16 @@ class GenericIE(InfoExtractor): 'age_limit': 0, }, }, + # 5min embed + { + 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/', + 'md5': '4c6f127a30736b59b3e2c19234ee2bf7', + 'info_dict': { + 'id': '518726732', + 'ext': 'mp4', + 'title': 'Facebook Creates "On This Day" | Crunch Report', + }, + }, # RSS feed with enclosure { 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', @@ -1236,6 +1246,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'Pladform') + # Look for 5min embeds + mobj = re.search( + r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage) + if mobj is not None: + return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True From 8343a03357ec4edb49b29bc841f2410ee4b610d1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 Mar 2015 14:26:28 +0800 Subject: [PATCH 0187/2721] [douyutv] Fix extractor and improve error handling --- youtube_dl/extractor/douyutv.py | 57 ++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index d7956e6e4..3a1665ab4 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -1,19 +1,23 @@ # coding: utf-8 from __future__ import unicode_literals +import hashlib +import time from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import (ExtractorError, unescapeHTML) +from ..compat import (compat_str, compat_basestring) class DouyuTVIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.douyutv.com/iseven', 'info_dict': { - 'id': 'iseven', + 'id': '17732', + 'display_id': 'iseven', 'ext': 'flv', 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'md5:9e525642c25a0a24302869937cf69d17', + 'description': 'md5:c93d6692dde6fe33809a46edcbecca44', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': '7师傅', 'uploader_id': '431925', @@ -22,22 +26,52 @@ class DouyuTVIE(InfoExtractor): 'params': { 'skip_download': True, } - } + }, { + 'url': 'http://www.douyutv.com/85982', + 'info_dict': { + 'id': '85982', + 'display_id': '85982', + 'ext': 'flv', + 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'douyu小漠', + 'uploader_id': '3769985', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + }] def _real_extract(self, url): video_id = self._match_id(url) + if video_id.isdigit(): + room_id = video_id + else: + page = self._download_webpage(url, video_id) + room_id = self._html_search_regex( + r'"room_id"\s*:\s*(\d+),', page, 'room id') + + prefix = 'room/%s?aid=android&client_sys=android&time=%d' % ( + room_id, int(time.time())) + + auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest() config = self._download_json( - 'http://www.douyutv.com/api/client/room/%s' % video_id, video_id) + 'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth), + video_id) data = config['data'] error_code = config.get('error', 0) - show_status = data.get('show_status') if error_code is not 0: - raise ExtractorError( - 'Server reported error %i' % error_code, expected=True) + error_desc = 'Server reported error %i' % error_code + if isinstance(data, compat_str) or isinstance(data, compat_basestring): + error_desc += ': ' + data + raise ExtractorError(error_desc, expected=True) + show_status = data.get('show_status') # 1 = live, 2 = offline if show_status == '2': raise ExtractorError( @@ -46,7 +80,7 @@ class DouyuTVIE(InfoExtractor): base_url = data['rtmp_url'] live_path = data['rtmp_live'] - title = self._live_title(data['room_name']) + title = self._live_title(unescapeHTML(data['room_name'])) description = data.get('show_details') thumbnail = data.get('room_src') @@ -66,7 +100,8 @@ class DouyuTVIE(InfoExtractor): self._sort_formats(formats) return { - 'id': video_id, + 'id': room_id, + 'display_id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, From 2ddf08358861a3c0f8724020c4eaf119e6be21a2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 Mar 2015 18:17:48 +0800 Subject: [PATCH 0188/2721] [douyutv] Simplify usage of isinstance --- youtube_dl/extractor/douyutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index 3a1665ab4..479430c51 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -67,7 +67,7 @@ class DouyuTVIE(InfoExtractor): error_code = config.get('error', 0) if error_code is not 0: error_desc = 'Server reported error %i' % error_code - if isinstance(data, compat_str) or isinstance(data, compat_basestring): + if isinstance(data, (compat_str, compat_basestring)): error_desc += ': ' + data raise ExtractorError(error_desc, expected=True) From 9b4774b21bb26eea9eb8042b2ff248a176595c5a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 Mar 2015 20:40:05 +0800 Subject: [PATCH 0189/2721] [Xuite] Fix extraction on python 3.2 base64.b64decode() accept only binary types in Python 3.2 --- youtube_dl/extractor/xuite.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py index 4971965f9..81d885fdc 100644 --- a/youtube_dl/extractor/xuite.py +++ b/youtube_dl/extractor/xuite.py @@ -69,18 +69,26 @@ class XuiteIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def base64_decode_utf8(data): + return base64.b64decode(data.encode('utf-8')).decode('utf-8') + + @staticmethod + def base64_encode_utf8(data): + return base64.b64encode(data.encode('utf-8')).decode('utf-8') + def _extract_flv_config(self, media_id): - base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8') + base64_media_id = self.base64_encode_utf8(media_id) flv_config = self._download_xml( 'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id, 'flv config') prop_dict = {} for prop in flv_config.findall('./property'): - prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8') + prop_id = self.base64_decode_utf8(prop.attrib['id']) # CDATA may be empty in flv config if not prop.text: continue - encoded_content = base64.b64decode(prop.text).decode('utf-8') + encoded_content = self.base64_decode_utf8(prop.text) prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content) return prop_dict From 4d5d14f5cf096f4bd90ad373cad687dc82bf8a8f Mon Sep 17 00:00:00 2001 From: Joram Schrijver <i@joram.io> Date: Sun, 29 Mar 2015 23:41:06 +0200 Subject: [PATCH 0190/2721] [Dumpert] Add new extractor Add support for the Dutch video site Dumpert. http://www.dumpert.nl/ --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/dumpert.py | 47 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 youtube_dl/extractor/dumpert.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a65c0c25b..43bac0252 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -115,6 +115,7 @@ from .drtuber import DrTuberIE from .drtv import DRTVIE from .dvtv import DVTVIE from .dump import DumpIE +from .dumpert import DumpertIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE from .divxstage import DivxStageIE diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py new file mode 100644 index 000000000..52d07deac --- /dev/null +++ b/youtube_dl/extractor/dumpert.py @@ -0,0 +1,47 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 + +from .common import InfoExtractor + + +class DumpertIE(InfoExtractor): + _VALID_URL = (r'https?://(?:www\.)?dumpert\.nl/mediabase/' + r'(?P<id>[0-9]+/[0-9a-zA-Z]+)/?.*') + _TEST = { + 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/', + 'md5': '1b9318d7d5054e7dcb9dc7654f21d643', + 'info_dict': { + 'id': '6646981/951bc60f', + 'ext': 'mp4', + 'title': 'Ik heb nieuws voor je', + 'description': 'Niet schrikken hoor' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_meta('title', webpage) + description = self._html_search_meta('description', webpage) + + files_base64 = self._html_search_regex(r'data-files="(.*?)"', + webpage, + 'files') + files_json = base64.b64decode(files_base64).decode('iso-8859-1') + files = self._parse_json(files_json, video_id) + + format_names = ['flv', 'mobile', 'tablet', '720p'] + formats = [{'format_id': name, + 'url': files[name].replace(r'\/', '/')} + for name in format_names + if name in files] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats + } From 7700207ec7d39e1594d9963a5014ddcb30c7301a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 Mar 2015 19:41:04 +0600 Subject: [PATCH 0191/2721] [pornhub] Fix comment count extraction (Closes #5320) --- youtube_dl/extractor/pornhub.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 3a27e3789..0c8b731cf 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor): } def _extract_count(self, pattern, webpage, name): - count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False) - if count: - count = str_to_int(count) - return count + return str_to_int(self._search_regex( + pattern, webpage, '%s count' % name, fatal=False)) def _real_extract(self, url): video_id = self._match_id(url) @@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor): if thumbnail: thumbnail = compat_urllib_parse.unquote(thumbnail) - view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') - like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') - dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike') + view_count = self._extract_count( + r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') + like_count = self._extract_count( + r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') + dislike_count = self._extract_count( + r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike') comment_count = self._extract_count( - r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment') + r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) if webpage.find('"encrypted":true') != -1: From 87270c84166bb014ba1043a6eeee7330694b0649 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 Mar 2015 20:11:51 +0600 Subject: [PATCH 0192/2721] [dumpert] Simplify and fix python 3.2 --- youtube_dl/extractor/dumpert.py | 39 ++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py index 52d07deac..e43bc81b2 100644 --- a/youtube_dl/extractor/dumpert.py +++ b/youtube_dl/extractor/dumpert.py @@ -4,11 +4,11 @@ from __future__ import unicode_literals import base64 from .common import InfoExtractor +from ..utils import qualities class DumpertIE(InfoExtractor): - _VALID_URL = (r'https?://(?:www\.)?dumpert\.nl/mediabase/' - r'(?P<id>[0-9]+/[0-9a-zA-Z]+)/?.*') + _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)' _TEST = { 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/', 'md5': '1b9318d7d5054e7dcb9dc7654f21d643', @@ -16,7 +16,8 @@ class DumpertIE(InfoExtractor): 'id': '6646981/951bc60f', 'ext': 'mp4', 'title': 'Ik heb nieuws voor je', - 'description': 'Niet schrikken hoor' + 'description': 'Niet schrikken hoor', + 'thumbnail': 're:^https?://.*\.jpg$', } } @@ -24,24 +25,32 @@ class DumpertIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_meta('title', webpage) - description = self._html_search_meta('description', webpage) + files_base64 = self._search_regex( + r'data-files="([^"]+)"', webpage, 'data files') - files_base64 = self._html_search_regex(r'data-files="(.*?)"', - webpage, - 'files') - files_json = base64.b64decode(files_base64).decode('iso-8859-1') - files = self._parse_json(files_json, video_id) + files = self._parse_json( + base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'), + video_id) - format_names = ['flv', 'mobile', 'tablet', '720p'] - formats = [{'format_id': name, - 'url': files[name].replace(r'\/', '/')} - for name in format_names - if name in files] + quality = qualities(['flv', 'mobile', 'tablet', '720p']) + + formats = [{ + 'url': video_url, + 'format_id': format_id, + 'quality': quality(format_id), + } for format_id, video_url in files.items() if format_id != 'still'] + self._sort_formats(formats) + + title = self._html_search_meta( + 'title', webpage) or self._og_search_title(webpage) + description = self._html_search_meta( + 'description', webpage) or self._og_search_description(webpage) + thumbnail = files.get('still') or self._og_search_thumbnail(webpage) return { 'id': video_id, 'title': title, 'description': description, + 'thumbnail': thumbnail, 'formats': formats } From fd203fe35774f6e8a5ce17ad78650089d2536c97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 Mar 2015 20:12:55 +0600 Subject: [PATCH 0193/2721] Credit @jorams or dumpert.nl (#5319) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 59f1b5f21..48be31e29 100644 --- a/AUTHORS +++ b/AUTHORS @@ -118,3 +118,4 @@ Devin J. Pohly Eduardo Ferro Aldama Jeff Buchbinder Amish Bhadeshia +Joram Schrijver From c808ef81bb67f737b89671ce882abfca666e0139 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 Mar 2015 21:03:38 +0600 Subject: [PATCH 0194/2721] [soundcloud:set:user] Support mobile URLs (Closes #5323) --- youtube_dl/extractor/soundcloud.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 9d4505972..316b2c90f 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -242,7 +242,7 @@ class SoundcloudIE(InfoExtractor): class SoundcloudSetIE(SoundcloudIE): - _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' + _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' IE_NAME = 'soundcloud:set' _TESTS = [{ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep', @@ -287,7 +287,7 @@ class SoundcloudSetIE(SoundcloudIE): class SoundcloudUserIE(SoundcloudIE): - _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$' + _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$' IE_NAME = 'soundcloud:user' _TESTS = [{ 'url': 'https://soundcloud.com/the-concept-band', From edd7344820303e550c9daae1ef591be8068ac47e Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Mon, 30 Mar 2015 18:15:08 +0300 Subject: [PATCH 0195/2721] [phoenix] Extend _VALID_URL (#5322) --- youtube_dl/extractor/phoenix.py | 40 ++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py index a20672c0c..46cebc0d7 100644 --- a/youtube_dl/extractor/phoenix.py +++ b/youtube_dl/extractor/phoenix.py @@ -5,19 +5,33 @@ from .zdf import extract_from_xml_url class PhoenixIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.phoenix.de/content/884301', - 'md5': 'ed249f045256150c92e72dbb70eadec6', - 'info_dict': { - 'id': '884301', - 'ext': 'mp4', - 'title': 'Michael Krons mit Hans-Werner Sinn', - 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr', - 'upload_date': '20141025', - 'uploader': 'Im Dialog', - } - } + _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/ + (?: + phoenix/die_sendungen/(?:[^/]+/)? + )? + (?P<id>[0-9]+)''' + _TESTS = [ + { + 'url': 'http://www.phoenix.de/content/884301', + 'md5': 'ed249f045256150c92e72dbb70eadec6', + 'info_dict': { + 'id': '884301', + 'ext': 'mp4', + 'title': 'Michael Krons mit Hans-Werner Sinn', + 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr', + 'upload_date': '20141025', + 'uploader': 'Im Dialog', + } + }, + { + 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815', + 'only_matching': True, + }, + { + 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234', + 'only_matching': True, + }, + ] def _real_extract(self, url): video_id = self._match_id(url) From a28ccbabc60c81016c851ae46365be377ea83795 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 31 Mar 2015 02:21:27 +0800 Subject: [PATCH 0196/2721] [Yahoo/NBCSports] Fix #5226 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nbc.py | 21 +++++++++++++++++++++ youtube_dl/extractor/yahoo.py | 14 ++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 43bac0252..5d0d2a9bc 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -310,6 +310,7 @@ from .naver import NaverIE from .nba import NBAIE from .nbc import ( NBCIE, + NBCSportsIE, NBCNewsIE, ) from .ndr import NDRIE diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 80a01c778..033bf71f0 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -50,6 +50,27 @@ class NBCIE(InfoExtractor): return self.url_result(theplatform_url) +class NBCSportsIE(InfoExtractor): + _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)' + + _TEST = { + 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', + 'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5', + 'info_dict': { + 'id': '9CsDKds0kvHI', + 'ext': 'flv', + 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', + 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + theplatform_url = self._og_search_video_url(webpage) + return self.url_result(theplatform_url, 'ThePlatform') + + class NBCNewsIE(InfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ (?:video/.+?/(?P<id>\d+)| diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 97dbac4cc..6e72f1e55 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -129,6 +129,15 @@ class YahooIE(InfoExtractor): }, { 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html', 'only_matching': True, + }, { + 'note': 'NBC Sports embeds', + 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', + 'info_dict': { + 'id': '9CsDKds0kvHI', + 'ext': 'flv', + 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', + 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', + } } ] @@ -151,6 +160,11 @@ class YahooIE(InfoExtractor): items = json.loads(items_json) video_id = items[0]['id'] return self._get_info(video_id, display_id, webpage) + # Look for NBCSports iframes + iframe_m = re.search( + r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage) + if iframe_m: + return self.url_result(iframe_m.group('url'), 'NBCSports') items_json = self._search_regex( r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, From a2a4d5fa313d5244d24fa70d5db91971a7583d79 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 31 Mar 2015 02:47:18 +0800 Subject: [PATCH 0197/2721] [Yahoo/NBCSports] Generalize NBC sports info extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nbc.py | 31 ++++++++++++++++++++++++++++++- youtube_dl/extractor/yahoo.py | 10 ++++++---- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5d0d2a9bc..b113aaec6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -310,6 +310,7 @@ from .naver import NaverIE from .nba import NBAIE from .nbc import ( NBCIE, + NBCSportsVPlayerIE, NBCSportsIE, NBCNewsIE, ) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 033bf71f0..c8dd72ab4 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -50,7 +50,7 @@ class NBCIE(InfoExtractor): return self.url_result(theplatform_url) -class NBCSportsIE(InfoExtractor): +class NBCSportsVPlayerIE(InfoExtractor): _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)' _TEST = { @@ -64,6 +64,13 @@ class NBCSportsIE(InfoExtractor): } } + @staticmethod + def _extract_url(webpage): + iframe_m = re.search( + r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage) + if iframe_m: + return iframe_m.group('url') + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -71,6 +78,28 @@ class NBCSportsIE(InfoExtractor): return self.url_result(theplatform_url, 'ThePlatform') +class NBCSportsIE(InfoExtractor): + # Does not include https becuase its certificate is invalid + _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' + + _TEST = { + 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', + 'md5': 'ba6c93f96b67bf05344f78bd523dac0f', + 'info_dict': { + 'id': 'PHJSaFWbrTY9', + 'ext': 'flv', + 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke', + 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + return self.url_result( + NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer') + + class NBCNewsIE(InfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ (?:video/.+?/(?P<id>\d+)| diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 6e72f1e55..43776d1e6 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -17,6 +17,8 @@ from ..utils import ( int_or_none, ) +from .nbc import NBCSportsVPlayerIE + class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' @@ -132,6 +134,7 @@ class YahooIE(InfoExtractor): }, { 'note': 'NBC Sports embeds', 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', + 'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5', 'info_dict': { 'id': '9CsDKds0kvHI', 'ext': 'flv', @@ -161,10 +164,9 @@ class YahooIE(InfoExtractor): video_id = items[0]['id'] return self._get_info(video_id, display_id, webpage) # Look for NBCSports iframes - iframe_m = re.search( - r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage) - if iframe_m: - return self.url_result(iframe_m.group('url'), 'NBCSports') + nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) + if nbc_sports_url: + return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') items_json = self._search_regex( r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, From 1d31e7a2fc2fb78c792754578a8a58b056811b84 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 31 Mar 2015 02:51:11 +0800 Subject: [PATCH 0198/2721] [NBCSports] Move imports alphabetically --- youtube_dl/extractor/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b113aaec6..9fddb8e32 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -310,9 +310,9 @@ from .naver import NaverIE from .nba import NBAIE from .nbc import ( NBCIE, - NBCSportsVPlayerIE, - NBCSportsIE, NBCNewsIE, + NBCSportsIE, + NBCSportsVPlayerIE, ) from .ndr import NDRIE from .ndtv import NDTVIE From a2edf2e7ff314eaa3124c1da1b962d054b6d9fff Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 31 Mar 2015 03:36:09 +0800 Subject: [PATCH 0199/2721] [NBC/ThePlatform/Generic] Add a generic detector for NBCSportsVPlayer and enhance error detection in ThePlatformIE --- youtube_dl/extractor/generic.py | 15 +++++++++++++++ youtube_dl/extractor/nbc.py | 2 +- youtube_dl/extractor/theplatform.py | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 042d23a13..9ddf36f6b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -29,6 +29,7 @@ from ..utils import ( xpath_text, ) from .brightcove import BrightcoveIE +from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE from .smotri import SmotriIE @@ -639,6 +640,15 @@ class GenericIE(InfoExtractor): 'upload_date': '20150228', 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } + }, + # NBC Sports vplayer embeds + { + 'url': 'http://bbs.clutchfans.net/showthread.php?t=244180', + 'info_dict': { + 'id': '_hqLjQ95yx8Z', + 'ext': 'flv' + }, + 'skip': 'This content expired on 9/17/14 12:23 PM', } ] @@ -1252,6 +1262,11 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin') + # Look for NBC Sports VPlayer embeds + nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) + if nbc_sports_url: + return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index c8dd72ab4..be9969d12 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -51,7 +51,7 @@ class NBCIE(InfoExtractor): class NBCSportsVPlayerIE(InfoExtractor): - _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)' + _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)' _TEST = { 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index feac666f7..0e3e627f4 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -92,7 +92,7 @@ class ThePlatformIE(InfoExtractor): error_msg = next( n.attrib['abstract'] for n in meta.findall(_x('.//smil:ref')) - if n.attrib.get('title') == 'Geographic Restriction') + if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired') except StopIteration: pass else: From 5cbb2699ee04535449e37a07dd9cac9bfd224fe3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 31 Mar 2015 03:38:45 +0800 Subject: [PATCH 0200/2721] [NBCSports] Add a test case for extended _VALID_URL --- youtube_dl/extractor/nbc.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index be9969d12..395f53df3 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -53,7 +53,7 @@ class NBCIE(InfoExtractor): class NBCSportsVPlayerIE(InfoExtractor): _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)' - _TEST = { + _TESTS = [{ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', 'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5', 'info_dict': { @@ -62,7 +62,11 @@ class NBCSportsVPlayerIE(InfoExtractor): 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', } - } + }, { + 'note': 'This video is already expired. It\'s for testing _VALID_URL', + 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z', + 'only_matching': True, + }] @staticmethod def _extract_url(webpage): From e15307a612ea588b504f1f03ba0201612df66b35 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 31 Mar 2015 13:13:29 +0800 Subject: [PATCH 0201/2721] [NBCSports/Yahoo] Comment out some MD5 checksums They seems to change constantly --- youtube_dl/extractor/nbc.py | 2 -- youtube_dl/extractor/yahoo.py | 1 - 2 files changed, 3 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 395f53df3..b7f6a5366 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -55,7 +55,6 @@ class NBCSportsVPlayerIE(InfoExtractor): _TESTS = [{ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', - 'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5', 'info_dict': { 'id': '9CsDKds0kvHI', 'ext': 'flv', @@ -88,7 +87,6 @@ class NBCSportsIE(InfoExtractor): _TEST = { 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', - 'md5': 'ba6c93f96b67bf05344f78bd523dac0f', 'info_dict': { 'id': 'PHJSaFWbrTY9', 'ext': 'flv', diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 43776d1e6..b777159c5 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -134,7 +134,6 @@ class YahooIE(InfoExtractor): }, { 'note': 'NBC Sports embeds', 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313', - 'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5', 'info_dict': { 'id': '9CsDKds0kvHI', 'ext': 'flv', From facecb84a11ba59a67d01c659ec3944179774418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 31 Mar 2015 20:11:14 +0600 Subject: [PATCH 0202/2721] [generic] Add working NBC Sports vplayer test --- youtube_dl/extractor/generic.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9ddf36f6b..2ff002643 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -641,14 +641,15 @@ class GenericIE(InfoExtractor): 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } }, - # NBC Sports vplayer embeds + # NBC Sports vplayer embed { - 'url': 'http://bbs.clutchfans.net/showthread.php?t=244180', + 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a', 'info_dict': { - 'id': '_hqLjQ95yx8Z', - 'ext': 'flv' + 'id': 'ln7x1qSThw4k', + 'ext': 'flv', + 'title': "PFT Live: New leader in the 'new-look' defense", + 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e', }, - 'skip': 'This content expired on 9/17/14 12:23 PM', } ] From c89fbfb385c4989af3bf2eb45e300e01c385cfc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 31 Mar 2015 20:14:37 +0600 Subject: [PATCH 0203/2721] [nbc] Remove redundant note This is already supposed by `only_matching` --- youtube_dl/extractor/nbc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index b7f6a5366..ecd0ac8b1 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -62,7 +62,6 @@ class NBCSportsVPlayerIE(InfoExtractor): 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', } }, { - 'note': 'This video is already expired. It\'s for testing _VALID_URL', 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z', 'only_matching': True, }] From 2a0c2ca2b8be753f13f9b9dfdcce55560e4953e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 31 Mar 2015 20:55:21 +0600 Subject: [PATCH 0204/2721] [dailymotion] Fix ff cookie and use it for embed page (Closes #5330) --- youtube_dl/extractor/dailymotion.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 4f67c3aac..47d58330b 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -25,8 +25,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor): def _build_request(url): """Build a request with the family filter disabled""" request = compat_urllib_request.Request(url) - request.add_header('Cookie', 'family_filter=off') - request.add_header('Cookie', 'ff=off') + request.add_header('Cookie', 'family_filter=off; ff=off') return request @@ -112,8 +111,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor): video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id - embed_page = self._download_webpage(embed_url, video_id, - 'Downloading embed page') + embed_request = self._build_request(embed_url) + embed_page = self._download_webpage( + embed_request, video_id, 'Downloading embed page') info = self._search_regex(r'var info = ({.*?}),$', embed_page, 'video info', flags=re.MULTILINE) info = json.loads(info) From 55cde6ef3c046dfd5f6ef84b908d38ee4375cf08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 1 Apr 2015 22:02:55 +0600 Subject: [PATCH 0205/2721] [varzesh3] Simplify --- youtube_dl/extractor/varzesh3.py | 43 +++++++++++++++----------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dl/extractor/varzesh3.py index eb49586cc..9369abaf8 100644 --- a/youtube_dl/extractor/varzesh3.py +++ b/youtube_dl/extractor/varzesh3.py @@ -1,48 +1,45 @@ # coding: utf-8 from __future__ import unicode_literals + from .common import InfoExtractor -from ..utils import ( - ExtractorError, -) -import re class Varzesh3IE(InfoExtractor): - _VALID_URL = r'(?P<url>(https?://(?:www\.)?video\.varzesh3\.com)/(?P<id>.+))' - _TEST ={ + _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?' + _TEST = { 'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/', 'md5': '2a933874cb7dce4366075281eb49e855', 'info_dict': { - 'url': 'http://dl1.video.varzesh3.com/video/clip94/1/video/namayeshi/saves_week26.mp4', 'id': '76337', 'ext': 'mp4', 'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا', - 'thumbnail': 'http://video.varzesh3.com/wp-content/uploads/230315_saves_week26.jpg', 'description': 'فصل ۲۰۱۵-۲۰۱۴', + 'thumbnail': 're:^https?://.*\.jpg$', } } def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + display_id = self._match_id(url) - if not 'shortlink' in webpage: - raise ExtractorError('URL has no videos or there is a problem.') + webpage = self._download_webpage(url, display_id) - title = self._html_search_regex(r'meta[^>]+property="og:title"[^>]+content="([^"]+)"', webpage, 'title') - video_link = self._html_search_regex(r'source[^>]+src="([^"]+)"', webpage, 'video_link') - vid_id = self._html_search_regex(r"link[^>]+rel='canonical'[^>]+href='\/\?p=([^']+)'\/>", webpage, 'vid_id') - try: - description = self._html_search_regex(r'<div class="matn">(.*?)</div>', webpage, 'description', flags=re.DOTALL) - except: - description = title - thumbnail = self._html_search_regex(r'link[^>]+rel="image_src"[^>]+href="([^"]+)"', webpage, 'thumbnail') + video_url = self._search_regex( + r'<source[^>]+src="([^"]+)"', webpage, 'video url') + + title = self._og_search_title(webpage) + description = self._html_search_regex( + r'(?s)<div class="matn">(.+?)</div>', + webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) + + video_id = self._search_regex( + r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'", + webpage, display_id, default=display_id) return { - 'url': video_link, - 'id': vid_id, + 'url': video_url, + 'id': video_id, 'title': title, - 'ext': video_link.split(".")[-1], 'description': description, 'thumbnail': thumbnail, } From 1a944d8a2ae8756b78a709862cccae58720445ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 2 Apr 2015 14:09:55 +0200 Subject: [PATCH 0206/2721] Print a warning if no ssl certificates are loaded --- youtube_dl/YoutubeDL.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 640b8c99d..4fa2223ad 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1768,6 +1768,12 @@ class YoutubeDL(object): debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) + # The ssl context is only available in python 2.7.9 and 3.x + if hasattr(https_handler, '_context'): + if len(https_handler._context.get_ca_certs()) == 0: + self.report_warning( + 'No ssl certificates were loaded, urls that use https ' + 'won\'t work') ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) opener = compat_urllib_request.build_opener( proxy_handler, https_handler, cookie_processor, ydlh) From 8075d4f99dbbf330c4a44d58b535055853b99aab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Apr 2015 20:26:05 +0600 Subject: [PATCH 0207/2721] [playfm] Adapt to v2api (Closes #5344) --- youtube_dl/extractor/playfm.py | 87 +++++++++++++++------------------- 1 file changed, 37 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py index 9576aed0e..e766ccca3 100644 --- a/youtube_dl/extractor/playfm.py +++ b/youtube_dl/extractor/playfm.py @@ -4,85 +4,72 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, - compat_urllib_request, -) +from ..compat import compat_str from ..utils import ( ExtractorError, - float_or_none, int_or_none, - str_to_int, + parse_iso8601, ) class PlayFMIE(InfoExtractor): IE_NAME = 'play.fm' - _VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])' _TEST = { - 'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220', + 'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12', 'md5': 'c505f8307825a245d0c7ad1850001f22', 'info_dict': { - 'id': '137220', + 'id': '71276', 'ext': 'mp3', - 'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12', - 'uploader': 'Sven Tasnadi', - 'uploader_id': 'sventasnadi', - 'duration': 5627.428, - 'upload_date': '20140712', + 'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12', + 'description': '', + 'duration': 5627, + 'timestamp': 1406033781, + 'upload_date': '20140722', + 'uploader': 'Dan Drastic', + 'uploader_id': '71170', 'view_count': int, 'comment_count': int, - 'thumbnail': 're:^https?://.*\.jpg$', }, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - upload_date = mobj.group('upload_date') + slug = mobj.group('slug') - rec_data = compat_urllib_parse.urlencode({'rec_id': video_id}) - req = compat_urllib_request.Request( - 'http://www.play.fm/flexRead/recording', data=rec_data) - req.add_header('Content-Type', 'application/x-www-form-urlencoded') - rec_doc = self._download_xml(req, video_id) + recordings = self._download_json( + 'http://v2api.play.fm/recordings/slug/%s' % slug, video_id) - error_node = rec_doc.find('./error') - if error_node is not None: - raise ExtractorError('An error occured: %s (code %s)' % ( - error_node.text, rec_doc.find('./status').text)) + error = recordings.get('error') + if isinstance(error, dict): + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error.get('message')), + expected=True) - recording = rec_doc.find('./recording') - title = recording.find('./title').text - view_count = str_to_int(recording.find('./stats/playcount').text) - comment_count = str_to_int(recording.find('./stats/comments').text) - duration = float_or_none(recording.find('./duration').text, scale=1000) - thumbnail = recording.find('./image').text - - artist = recording.find('./artists/artist') - uploader = artist.find('./name').text - uploader_id = artist.find('./slug').text - - video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % ( - 'http:', recording.find('./url').text, - recording.find('./_class').text, recording.find('./file_id').text, - rec_doc.find('./uuid').text, video_id, - rec_doc.find('./jingle/file_id').text, - 'http%3A%2F%2Fwww.play.fm%2Fplayer', - ) + audio_url = recordings['audio'] + video_id = compat_str(recordings.get('id') or video_id) + title = recordings['title'] + description = recordings.get('description') + duration = int_or_none(recordings.get('recordingDuration')) + timestamp = parse_iso8601(recordings.get('created_at')) + uploader = recordings.get('page', {}).get('title') + uploader_id = compat_str(recordings.get('page', {}).get('id')) + view_count = int_or_none(recordings.get('playCount')) + comment_count = int_or_none(recordings.get('commentCount')) + categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')] return { 'id': video_id, - 'url': video_url, - 'ext': 'mp3', - 'filesize': int_or_none(recording.find('./size').text), + 'url': audio_url, 'title': title, - 'upload_date': upload_date, - 'view_count': view_count, - 'comment_count': comment_count, + 'description': description, 'duration': duration, - 'thumbnail': thumbnail, + 'timestamp': timestamp, 'uploader': uploader, 'uploader_id': uploader_id, + 'view_count': view_count, + 'comment_count': comment_count, + 'categories': categories, } From f5b669113fc530922280987ff9e19afa780b8844 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Apr 2015 22:32:16 +0600 Subject: [PATCH 0208/2721] [miomio] Simplify and fix python 2.6 issue --- youtube_dl/extractor/miomio_tv.py | 101 ++++++++++++++++++------------ 1 file changed, 60 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/miomio_tv.py b/youtube_dl/extractor/miomio_tv.py index ae20a32fa..dc2ba7cb4 100644 --- a/youtube_dl/extractor/miomio_tv.py +++ b/youtube_dl/extractor/miomio_tv.py @@ -1,74 +1,93 @@ # coding: utf-8 from __future__ import unicode_literals +import random + from .common import InfoExtractor +from ..utils import ( + xpath_text, + int_or_none, +) class MiomioTvIE(InfoExtractor): IE_NAME = 'miomio.tv' _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.miomio.tv/watch/cc179734/', 'md5': '48de02137d0739c15b440a224ad364b9', 'info_dict': { 'id': '179734', - 'title': u'\u624b\u7ed8\u52a8\u6f2b\u9b3c\u6ce3\u4f46\u4e01\u5168\u7a0b\u753b\u6cd5', - 'ext': 'flv' - } - } + 'ext': 'flv', + 'title': '手绘动漫鬼泣但丁全程画法', + 'duration': 354, + }, + }, { + 'url': 'http://www.miomio.tv/watch/cc184024/', + 'info_dict': { + 'id': '43729', + 'title': '《动漫同人插画绘制》', + }, + 'playlist_mincount': 86, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<meta\s+name="description"\s+content="\s*([^"]*)\s*"', webpage, 'title') - ref_path = self._search_regex(r'src="(/mioplayer/.*?)"', webpage, 'ref_path') - referer = 'http://www.miomio.tv{0}'.format(ref_path) - xml_config = self._search_regex(r'flashvars="type=sina&(.*?)&cid=', webpage, 'xml config') + title = self._html_search_meta( + 'description', webpage, 'title', fatal=True) + + mioplayer_path = self._search_regex( + r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path') + + xml_config = self._search_regex( + r'flashvars="type=sina&(.+?)&', + webpage, 'xml config') # skipping the following page causes lags and eventually connection drop-outs - # id is normally a rotating three digit value but a fixed value always appears to work - self._request_webpage("http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id={0}&r=cc{1}".format(id, 945), video_id) + self._request_webpage( + 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)), + video_id) # the following xml contains the actual configuration information on the video file(s) - xml_url = 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config) - vidconfig = self._download_xml(xml_url, video_id) - - file_els = vidconfig.findall('.//durl') - - entries = [] - - for file_el in file_els: - segment_id = file_el.find('order').text.strip() - segment_title = '_'.join([title, segment_id]) - segment_duration = file_el.find('length').text.strip() - segment_url = file_el.find('url').text.strip() - - entries.append({ - 'id': segment_id, - 'title': segment_title, - 'duration': segment_duration, - 'url': segment_url - }) + vid_config = self._download_xml( + 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config), + video_id) http_headers = { - 'Referer': referer, - 'Accept-Encoding': 'gzip, deflate', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' + 'Referer': 'http://www.miomio.tv%s' % mioplayer_path, } + entries = [] + for f in vid_config.findall('./durl'): + segment_url = xpath_text(f, 'url', 'video url') + if not segment_url: + continue + order = xpath_text(f, 'order', 'order') + segment_id = video_id + segment_title = title + if order: + segment_id += '-%s' % order + segment_title += ' part %s' % order + entries.append({ + 'id': segment_id, + 'url': segment_url, + 'title': segment_title, + 'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000), + 'http_headers': http_headers, + }) + if len(entries) == 1: - return { - 'id': video_id, - 'title': title, - 'url': entries[0]['url'], - 'http_headers': http_headers - } + segment = entries[0] + segment['id'] = video_id + segment['title'] = title + return segment return { '_type': 'multi_video', 'id': video_id, - 'title': title, 'entries': entries, - 'http_headers': http_headers + 'title': title, + 'http_headers': http_headers, } From e03bfb30ce0062c048759ed01f5a2fe4190d66fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Apr 2015 22:33:30 +0600 Subject: [PATCH 0209/2721] [miomio] Rename extractor --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/{miomio_tv.py => miomio.py} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename youtube_dl/extractor/{miomio_tv.py => miomio.py} (98%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0b9736f2d..9700d81f5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -274,7 +274,7 @@ from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE -from .miomio_tv import MiomioTvIE +from .miomio import MioMioIE from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE from .mit import TechTVMITIE, MITIE, OCWMITIE diff --git a/youtube_dl/extractor/miomio_tv.py b/youtube_dl/extractor/miomio.py similarity index 98% rename from youtube_dl/extractor/miomio_tv.py rename to youtube_dl/extractor/miomio.py index dc2ba7cb4..11608f730 100644 --- a/youtube_dl/extractor/miomio_tv.py +++ b/youtube_dl/extractor/miomio.py @@ -10,7 +10,7 @@ from ..utils import ( ) -class MiomioTvIE(InfoExtractor): +class MioMioIE(InfoExtractor): IE_NAME = 'miomio.tv' _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)' _TESTS = [{ From 2ec8e04cac895121a71f11a44b855b1bf8a0195e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Apr 2015 22:34:08 +0600 Subject: [PATCH 0210/2721] [miomio] Fix alphabetic order --- youtube_dl/extractor/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9700d81f5..aae4aae4c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -274,9 +274,9 @@ from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE -from .miomio import MioMioIE from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE +from .miomio import MioMioIE from .mit import TechTVMITIE, MITIE, OCWMITIE from .mitele import MiTeleIE from .mixcloud import MixcloudIE From a3c7019e065298103026960e00fff6b80b6bcf62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 2 Apr 2015 22:50:10 +0600 Subject: [PATCH 0211/2721] [YoutubeDL] Check for `get_ca_certs` availability `get_ca_certs` is not available in python <3.4 --- youtube_dl/YoutubeDL.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4fa2223ad..ce4b72fd3 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1770,7 +1770,9 @@ class YoutubeDL(object): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) # The ssl context is only available in python 2.7.9 and 3.x if hasattr(https_handler, '_context'): - if len(https_handler._context.get_ca_certs()) == 0: + ctx = https_handler._context + # get_ca_certs is unavailable prior to python 3.4 + if hasattr(ctx, 'get_ca_certs') and len(ctx.get_ca_certs()) == 0: self.report_warning( 'No ssl certificates were loaded, urls that use https ' 'won\'t work') From 6b7556a554a490a21307fec65cdaefc3331027c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Apr 2015 01:47:18 +0600 Subject: [PATCH 0212/2721] Credit @tiktok7 for miomio.tv (#5265) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 48be31e29..48769320a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -119,3 +119,4 @@ Eduardo Ferro Aldama Jeff Buchbinder Amish Bhadeshia Joram Schrijver +Will W. From a9cbab173584c716219b348ff36ccd5274f75249 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 3 Apr 2015 10:22:25 +0200 Subject: [PATCH 0213/2721] release 2015.04.03 --- docs/supportedsites.md | 6 ++++++ youtube_dl/version.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index fd59cc2be..2785b9587 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -111,6 +111,7 @@ - **DctpTv** - **DeezerPlaylist** - **defense.gouv.fr** + - **DHM**: Filmarchiv - Deutsches Historisches Museum - **Discovery** - **divxstage**: DivxStage - **Dotsub** @@ -120,6 +121,7 @@ - **DrTuber** - **DRTV** - **Dump** + - **Dumpert** - **dvtv**: http://video.aktualne.cz/ - **EaglePlatform** - **EbaumsWorld** @@ -253,6 +255,7 @@ - **Mgoon** - **Minhateca** - **MinistryGrid** + - **miomio.tv** - **mitele.es** - **mixcloud** - **MLB** @@ -286,6 +289,8 @@ - **NBA** - **NBC** - **NBCNews** + - **NBCSports** + - **NBCSportsVPlayer** - **ndr**: NDR.de - Mediathek - **NDTV** - **NerdCubedFeed** @@ -501,6 +506,7 @@ - **Urort**: NRK P3 Urørt - **ustream** - **ustream:channel** + - **Varzesh3** - **Vbox7** - **VeeHD** - **Veoh** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index dd93e295a..e1c385bec 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.03.28' +__version__ = '2015.04.03' From 4bbeb19fc77a49af763ce3443293b29b8450d686 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 3 Apr 2015 14:09:07 +0200 Subject: [PATCH 0214/2721] [miomio] pep8: remove whitespaces in empty line --- youtube_dl/extractor/miomio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index 11608f730..cc3f27194 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -44,7 +44,7 @@ class MioMioIE(InfoExtractor): xml_config = self._search_regex( r'flashvars="type=sina&(.+?)&', webpage, 'xml config') - + # skipping the following page causes lags and eventually connection drop-outs self._request_webpage( 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)), From 3da4b31359c8dbbad3477fbe8341e6fc293cda82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 3 Apr 2015 14:09:50 +0200 Subject: [PATCH 0215/2721] [postprocessor/ffmpeg] Fix crash when ffprobe/avprobe are not installed (closes #5349) 'self.probe_basename' was None, so 'probe_executable' raised a KeyError exception --- youtube_dl/postprocessor/ffmpeg.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 55adf9685..0b60ac7e7 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -116,6 +116,10 @@ class FFmpegPostProcessor(PostProcessor): def executable(self): return self._paths[self.basename] + @property + def probe_available(self): + return self.probe_basename is not None + @property def probe_executable(self): return self._paths[self.probe_basename] @@ -168,7 +172,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): def get_audio_codec(self, path): - if not self.probe_executable: + if not self.probe_available: raise PostProcessingError('ffprobe or avprobe not found. Please install one.') try: cmd = [ From ff2be6e180f1af471dd6d533719d9c595c756557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 3 Apr 2015 15:01:17 +0200 Subject: [PATCH 0216/2721] [bloomberg] Adapt to website changes (fixes #5347) --- youtube_dl/extractor/bloomberg.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 4a88ccd13..0dca29b71 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -6,32 +6,39 @@ from .common import InfoExtractor class BloombergIE(InfoExtractor): - _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html' + _VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)' _TEST = { - 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', + 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2', # The md5 checksum changes 'info_dict': { 'id': 'qurhIVlJSB6hzkVi229d8g', 'ext': 'flv', 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', - 'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88', + 'description': 'md5:a8ba0302912d03d246979735c17d2761', }, } def _real_extract(self, url): name = self._match_id(url) webpage = self._download_webpage(url, name) - - f4m_url = self._search_regex( - r'<source src="(https?://[^"]+\.f4m.*?)"', webpage, - 'f4m url') + video_id = self._search_regex(r'"bmmrId":"(.+?)"', webpage, 'id') title = re.sub(': Video$', '', self._og_search_title(webpage)) + embed_info = self._download_json( + 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id) + formats = [] + for stream in embed_info['streams']: + if stream["muxing_format"] == "TS": + formats.extend(self._extract_m3u8_formats(stream['url'], video_id)) + else: + formats.extend(self._extract_f4m_formats(stream['url'], video_id)) + self._sort_formats(formats) + return { - 'id': name.split('-')[-1], + 'id': video_id, 'title': title, - 'formats': self._extract_f4m_formats(f4m_url, name), + 'formats': formats, 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), } From e81a47460365738a0add4d4da52a712c0091704f Mon Sep 17 00:00:00 2001 From: snipem <mail@matthias-kuech.de> Date: Fri, 3 Apr 2015 15:34:49 +0200 Subject: [PATCH 0217/2721] [Gamersyde] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/gamersyde.py | 64 +++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 youtube_dl/extractor/gamersyde.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index aae4aae4c..2935d5b33 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -177,6 +177,7 @@ from .gameone import ( GameOneIE, GameOnePlaylistIE, ) +from .gamersyde import GamersydeIE from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gametrailers import GametrailersIE diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py new file mode 100644 index 000000000..c40106216 --- /dev/null +++ b/youtube_dl/extractor/gamersyde.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals +import re +import json +import time +from .common import InfoExtractor + + +class GamersydeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_' + _TEST = { + 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', + 'md5': 'f38d400d32f19724570040d5ce3a505f', + 'info_dict': { + 'id': '34371', + 'ext': 'mp4', + 'title': 'Bloodborne - Birth of a hero', + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + + def _calculateDuration(self, durationString): + duration = time.strptime(durationString, "%M minutes %S seconds") + return duration.tm_min * 60 + duration.tm_sec + + def _fixJsonSyntax(self, json): + + json = re.sub(r"{\s*(\w)", r'{"\1', json) + json = re.sub(r",\s*(\w)", r',"\1', json) + json = re.sub(r"(\w): ", r'\1":', json) + json = re.sub(r",\s*}", "}", json, flags=re.DOTALL) + json = re.sub(r",\s*]", "]", json, flags=re.DOTALL) + + return json + + def _real_extract(self, url): + + video_id = self._search_regex(r'-(.*?)_[a-z]{2}.html$', url, 'video_id') + webpage = self._download_webpage(url, video_id) + + filesJson = self._search_regex(r'playlist: (.*?)\}\);', webpage, 'files', flags=re.DOTALL) + filesJson = self._fixJsonSyntax(filesJson) + + data = json.loads(filesJson) + playlist = data[0] + + formats = [] + + title = re.sub(r"[0-9]+ - ", "", playlist['title']) + + for playlistEntry in playlist['sources']: + format = { + 'url': playlistEntry['file'], + 'format_id': playlistEntry['label'] + } + + formats.append(format) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': playlist['image'] + } From 185a7e25e7c18b0dff17bdb9ae828616a5ac17d4 Mon Sep 17 00:00:00 2001 From: Mohammad Teimori Pabandi <mtp1376@gmail.com> Date: Fri, 3 Apr 2015 20:55:39 +0430 Subject: [PATCH 0218/2721] [RadioJavan] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/radiojavan.py | 71 ++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 youtube_dl/extractor/radiojavan.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3011b784d..df4a7419a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -391,6 +391,7 @@ from .pyvideo import PyvideoIE from .quickvid import QuickVidIE from .r7 import R7IE from .radiode import RadioDeIE +from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE from .rai import RaiIE diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py new file mode 100644 index 000000000..de90f9270 --- /dev/null +++ b/youtube_dl/extractor/radiojavan.py @@ -0,0 +1,71 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import( + parse_duration, + str_to_int +) + +class RadioJavanIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?' + _TEST = { + 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', + 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', + 'info_dict': { + 'id': 'chaartaar-ashoobam', + 'ext': 'mp4', + 'title': 'Chaartaar - Ashoobam', + 'description': 'Chaartaar - Ashoobam', + 'thumbnail': 're:^https?://.*\.jpe?g$', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + urls = list() + prefix = 'https://media.rdjavan.com/media/music_video/' + + video_url_480 = self._search_regex( + r'RJ\.video480p = \'([^\']+)\'', webpage, '480 video url', fatal= False) + video_url_720 = self._search_regex( + r'RJ\.video720p = \'([^\']+)\'', webpage, '720 video url', fatal= False) + video_url_1080 = self._search_regex( + r'RJ\.video1080p = \'([^\']+)\'', webpage, '1080 video url', fatal= False) + + if video_url_480: + urls.append({'url': prefix + video_url_480, 'format': '480p'}) + if video_url_720: + urls.append({'url': prefix + video_url_720, 'format': '720p'}) + if video_url_1080: + urls.append({'url': prefix + video_url_1080, 'format': '1080p'}) + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + formats = [{ + 'url': url['url'], + 'format': url['format'] + } for url in urls] + + likes = self._search_regex( + r'<span class="rating">([\d,]+)\s*likes</span>', webpage, 'Likes Count', fatal=False ) + likes = likes.replace(',', '') + dislikes = self._search_regex( + r'<span class="rating">([\d,]+)\s*dislikes</span>', webpage, 'Dislikes Count', fatal=False ) + dislikes = dislikes.replace(',', '') + + plays = self._search_regex( + r'views_publish[">\s]*<span[^>]+class="views">Plays: ([\d,]+)</span>', webpage, 'Play Count', fatal=False ) + plays = plays.replace(',', '') + + return { + 'formats': formats, + 'id': display_id, + 'title': title, + 'description': title, # no description provided in RadioJavan + 'thumbnail': thumbnail, + 'like_count': str_to_int(likes), + 'dislike_count': str_to_int(dislikes), + 'viewCount': str_to_int(plays) + } \ No newline at end of file From cd341b6e0679c11b3698191615b18dbaaf2b0a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 3 Apr 2015 19:37:35 +0200 Subject: [PATCH 0219/2721] [mixcloud] Fix extraction of like count (reported in #5231) --- youtube_dl/extractor/mixcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 21aea0c55..84f291558 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -97,7 +97,7 @@ class MixcloudIE(InfoExtractor): r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) description = self._og_search_description(webpage) like_count = str_to_int(self._search_regex( - r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"', + r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"', webpage, 'like count', fatal=False)) view_count = str_to_int(self._search_regex( [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', From 01534bf54f7eb80e4414cba86f1ac6fa608fc6e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Apr 2015 23:42:53 +0600 Subject: [PATCH 0220/2721] [prosiebensat1] Fix bitrate (Closes #5350 closes #5351) --- youtube_dl/extractor/prosiebensat1.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 385681d06..c46aaada6 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -10,6 +10,7 @@ from ..compat import ( ) from ..utils import ( unified_strdate, + int_or_none, ) @@ -266,6 +267,9 @@ class ProSiebenSat1IE(InfoExtractor): urls_sources = urls_sources.values() def fix_bitrate(bitrate): + bitrate = int_or_none(bitrate) + if not bitrate: + return None return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate for source in urls_sources: From 16fa01291bd94703e2258a68bef1491d57f0dabc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 3 Apr 2015 23:44:13 +0600 Subject: [PATCH 0221/2721] [prosiebensat1] Fix test --- youtube_dl/extractor/prosiebensat1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index c46aaada6..7cc799664 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -25,7 +25,7 @@ class ProSiebenSat1IE(InfoExtractor): 'info_dict': { 'id': '2104602', 'ext': 'mp4', - 'title': 'Staffel 2, Episode 18 - Jahresrückblick', + 'title': 'Episode 18 - Staffel 2', 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', 'upload_date': '20131231', 'duration': 5845.04, From ff556f5c09ec8700bb012a58a5e39505b887b774 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 00:30:37 +0600 Subject: [PATCH 0222/2721] Do not encode outtmpl twice (Closes #5288) --- youtube_dl/__init__.py | 4 ---- youtube_dl/options.py | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 852b2fc3d..1c8b411b7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -189,10 +189,6 @@ def _real_main(argv=None): if opts.allsubtitles and not opts.writeautomaticsub: opts.writesubtitles = True - if sys.version_info < (3,): - # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) - if opts.outtmpl is not None: - opts.outtmpl = opts.outtmpl.decode(preferredencoding()) outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 35c7e5fb3..8e80e3759 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -13,6 +13,7 @@ from .compat import ( compat_kwargs, ) from .utils import ( + preferredencoding, write_string, ) from .version import __version__ @@ -797,7 +798,7 @@ def parseOpts(overrideArguments=None): # Workaround for Python 2.x, where argv is a byte list if sys.version_info < (3,): command_line_conf = [ - a.decode('utf-8', 'replace') for a in command_line_conf] + a.decode(preferredencoding(), 'replace') for a in command_line_conf] if '--ignore-config' in command_line_conf: system_conf = [] From 115c281672bd7479f87c48249f6a0186ac7d19cc Mon Sep 17 00:00:00 2001 From: snipem <mail@matthias-kuech.de> Date: Sat, 4 Apr 2015 12:31:48 +0200 Subject: [PATCH 0223/2721] [Gamersyde] Improved robustness, added duration and tests Fix for Json syntax is now less error prone for Json syntax inside of values. Extractor is now also using native Json handling. Added tests for several videos that were producing errors in the first place. --- youtube_dl/extractor/gamersyde.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py index c40106216..5c68a6891 100644 --- a/youtube_dl/extractor/gamersyde.py +++ b/youtube_dl/extractor/gamersyde.py @@ -8,7 +8,6 @@ from .common import InfoExtractor class GamersydeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_' - _TEST = { 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', 'md5': 'f38d400d32f19724570040d5ce3a505f', 'info_dict': { @@ -17,6 +16,11 @@ class GamersydeIE(InfoExtractor): 'title': 'Bloodborne - Birth of a hero', 'thumbnail': 're:^https?://.*\.jpg$', } + }, + { + 'url': 'http://www.gamersyde.com/hqstream_dark_souls_ii_scholar_of_the_first_sin_gameplay_part_1-34417_en.html', + 'info_dict': { + 'ext': 'mp4', } def _calculateDuration(self, durationString): @@ -27,7 +31,6 @@ class GamersydeIE(InfoExtractor): json = re.sub(r"{\s*(\w)", r'{"\1', json) json = re.sub(r",\s*(\w)", r',"\1', json) - json = re.sub(r"(\w): ", r'\1":', json) json = re.sub(r",\s*}", "}", json, flags=re.DOTALL) json = re.sub(r",\s*]", "]", json, flags=re.DOTALL) @@ -40,7 +43,6 @@ class GamersydeIE(InfoExtractor): filesJson = self._search_regex(r'playlist: (.*?)\}\);', webpage, 'files', flags=re.DOTALL) filesJson = self._fixJsonSyntax(filesJson) - data = json.loads(filesJson) playlist = data[0] From 3d24d997ae1f92686aa7edd0bfeed28353fbfb2e Mon Sep 17 00:00:00 2001 From: snipem <mail@matthias-kuech.de> Date: Sat, 4 Apr 2015 12:42:14 +0200 Subject: [PATCH 0224/2721] Fixed intendation of test cases Leaded to error on Linux machine --- youtube_dl/extractor/gamersyde.py | 45 ++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py index 5c68a6891..cc6fa4037 100644 --- a/youtube_dl/extractor/gamersyde.py +++ b/youtube_dl/extractor/gamersyde.py @@ -1,39 +1,62 @@ # coding: utf-8 from __future__ import unicode_literals import re -import json import time + from .common import InfoExtractor class GamersydeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_' + _TESTS = [{ 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', 'md5': 'f38d400d32f19724570040d5ce3a505f', 'info_dict': { 'id': '34371', 'ext': 'mp4', + 'duration': 372, 'title': 'Bloodborne - Birth of a hero', 'thumbnail': 're:^https?://.*\.jpg$', } - }, - { + }, { 'url': 'http://www.gamersyde.com/hqstream_dark_souls_ii_scholar_of_the_first_sin_gameplay_part_1-34417_en.html', + 'md5': '94bd7c3feff3275576cf5cb6c8a3a720', 'info_dict': { + 'id': '34417', 'ext': 'mp4', + 'duration': 270, + 'title': 'Dark Souls II: Scholar of the First Sin - Gameplay - Part 1', + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, { + 'url': 'http://www.gamersyde.com/hqstream_grand_theft_auto_v_heists_trailer-33786_en.html', + 'md5': '65e442f5f340d571ece8c80d50700369', + 'info_dict': { + 'id': '33786', + 'ext': 'mp4', + 'duration': 59, + 'title': 'Grand Theft Auto V - Heists Trailer', + 'thumbnail': 're:^https?://.*\.jpg$', + } } + ] def _calculateDuration(self, durationString): - duration = time.strptime(durationString, "%M minutes %S seconds") + if (durationString.find("minutes") > -1): + duration = time.strptime(durationString, "%M minutes %S seconds") + else: + duration = time.strptime(durationString, "%S seconds") return duration.tm_min * 60 + duration.tm_sec def _fixJsonSyntax(self, json): - json = re.sub(r"{\s*(\w)", r'{"\1', json) - json = re.sub(r",\s*(\w)", r',"\1', json) json = re.sub(r",\s*}", "}", json, flags=re.DOTALL) json = re.sub(r",\s*]", "]", json, flags=re.DOTALL) - + json = json.replace('file: "', '"file": "') + json = json.replace('title: "', '"title": "') + json = json.replace('label: "', '"label": "') + json = json.replace('image: "', '"image": "') + json = json.replace('sources: [', '"sources": [') return json def _real_extract(self, url): @@ -42,13 +65,16 @@ class GamersydeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) filesJson = self._search_regex(r'playlist: (.*?)\}\);', webpage, 'files', flags=re.DOTALL) - filesJson = self._fixJsonSyntax(filesJson) - data = json.loads(filesJson) + data = self._parse_json(filesJson,video_id, transform_source=self._fixJsonSyntax) + playlist = data[0] formats = [] title = re.sub(r"[0-9]+ - ", "", playlist['title']) + + length = self._search_regex(r'(([0-9]{1,2} minutes ){0,1}[0-9]{1,2} seconds)', webpage, 'length') + duration = self._calculateDuration(length) for playlistEntry in playlist['sources']: format = { @@ -62,5 +88,6 @@ class GamersydeIE(InfoExtractor): 'id': video_id, 'title': title, 'formats': formats, + 'duration': duration, 'thumbnail': playlist['image'] } From 7cf97daf77f6419f2b965a199a3fb1e63b8771b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 16:45:41 +0600 Subject: [PATCH 0225/2721] [radiojavan] Simplify and extract upload date --- youtube_dl/extractor/radiojavan.py | 75 ++++++++++++++---------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index de90f9270..73ab78d6d 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -1,12 +1,14 @@ -# coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import( - parse_duration, - str_to_int + unified_strdate, + str_to_int, ) + class RadioJavanIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?' _TEST = { @@ -16,56 +18,49 @@ class RadioJavanIE(InfoExtractor): 'id': 'chaartaar-ashoobam', 'ext': 'mp4', 'title': 'Chaartaar - Ashoobam', - 'description': 'Chaartaar - Ashoobam', 'thumbnail': 're:^https?://.*\.jpe?g$', + 'upload_date': '20150215', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, } } def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - urls = list() - prefix = 'https://media.rdjavan.com/media/music_video/' + video_id = self._match_id(url) - video_url_480 = self._search_regex( - r'RJ\.video480p = \'([^\']+)\'', webpage, '480 video url', fatal= False) - video_url_720 = self._search_regex( - r'RJ\.video720p = \'([^\']+)\'', webpage, '720 video url', fatal= False) - video_url_1080 = self._search_regex( - r'RJ\.video1080p = \'([^\']+)\'', webpage, '1080 video url', fatal= False) + webpage = self._download_webpage(url, video_id) - if video_url_480: - urls.append({'url': prefix + video_url_480, 'format': '480p'}) - if video_url_720: - urls.append({'url': prefix + video_url_720, 'format': '720p'}) - if video_url_1080: - urls.append({'url': prefix + video_url_1080, 'format': '1080p'}) + formats = [{ + 'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path, + 'format_id': '%sp' % height, + 'height': height, + } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) - formats = [{ - 'url': url['url'], - 'format': url['format'] - } for url in urls] - likes = self._search_regex( - r'<span class="rating">([\d,]+)\s*likes</span>', webpage, 'Likes Count', fatal=False ) - likes = likes.replace(',', '') - dislikes = self._search_regex( - r'<span class="rating">([\d,]+)\s*dislikes</span>', webpage, 'Dislikes Count', fatal=False ) - dislikes = dislikes.replace(',', '') + upload_date = unified_strdate(self._search_regex( + r'class="date_added">Date added: ([^<]+)<', + webpage, 'upload date', fatal=False)) - plays = self._search_regex( - r'views_publish[">\s]*<span[^>]+class="views">Plays: ([\d,]+)</span>', webpage, 'Play Count', fatal=False ) - plays = plays.replace(',', '') + view_count = str_to_int(self._search_regex( + r'class="views">Plays: ([\d,]+)', + webpage, 'view count', fatal=False)) + like_count = str_to_int(self._search_regex( + r'class="rating">([\d,]+) likes', + webpage, 'like count', fatal=False)) + dislike_count = str_to_int(self._search_regex( + r'class="rating">([\d,]+) dislikes', + webpage, 'dislike count', fatal=False)) return { - 'formats': formats, - 'id': display_id, + 'id': video_id, 'title': title, - 'description': title, # no description provided in RadioJavan 'thumbnail': thumbnail, - 'like_count': str_to_int(likes), - 'dislike_count': str_to_int(dislikes), - 'viewCount': str_to_int(plays) - } \ No newline at end of file + 'upload_date': upload_date, + 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'formats': formats, + } From 6e617ed0b6b5bb932f928f63c2bda36f5317468d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 16:47:09 +0600 Subject: [PATCH 0226/2721] Credit @mtp1376 for varzesh3 and radiojavan --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 48769320a..cf238176b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -120,3 +120,4 @@ Jeff Buchbinder Amish Bhadeshia Joram Schrijver Will W. +Mohammad Teimori Pabandi From e9f65f87496d740fbb61e036c710bf2c174f1cc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 4 Apr 2015 13:11:55 +0200 Subject: [PATCH 0227/2721] [rtve] Extract a better quality video --- youtube_dl/extractor/rtve.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 13f071077..8d9be1b98 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -10,6 +10,7 @@ from ..compat import compat_urlparse from ..utils import ( float_or_none, remove_end, + std_headers, struct_unpack, ) @@ -84,13 +85,20 @@ class RTVEALaCartaIE(InfoExtractor): 'only_matching': True, }] + def _real_initialize(self): + user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8') + manager_info = self._download_json( + 'http://www.rtve.es/odin/loki/' + user_agent_b64, + None, 'Fetching manager info') + self._manager = manager_info['manager'] + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') info = self._download_json( 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, video_id)['page']['items'][0] - png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id + png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id) png = self._download_webpage(png_url, video_id, 'Downloading url information') video_url = _decrypt_url(png) if not video_url.endswith('.f4m'): From ba9e68f40261355ceae5bb87c5707adc7f7beb2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 17:48:55 +0600 Subject: [PATCH 0228/2721] [utils] Drop trailing comma before closing brace --- test/test_utils.py | 6 ++++++ youtube_dl/utils.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index abaf1ab73..4e524aca3 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -470,6 +470,12 @@ class TestUtil(unittest.TestCase): self.assertEqual(d['x'], 1) self.assertEqual(d['y'], 'a') + on = js_to_json('["abc", "def",]') + self.assertEqual(json.loads(on), ['abc', 'def']) + + on = js_to_json('{"abc": "def",}') + self.assertEqual(json.loads(on), {'abc': 'def'}) + def test_clean_html(self): self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 90e0ed9ab..e1761265c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1577,7 +1577,7 @@ def js_to_json(code): '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'| [a-zA-Z_][.a-zA-Z_0-9]* ''', fix_kv, code) - res = re.sub(r',(\s*\])', lambda m: m.group(1), res) + res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res) return res From 5c29dbd0c76083eaf596f623fabb612575f71861 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 17:53:22 +0600 Subject: [PATCH 0229/2721] [gamersyde] Simplify --- youtube_dl/extractor/gamersyde.py | 103 ++++++++++++------------------ 1 file changed, 40 insertions(+), 63 deletions(-) diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py index cc6fa4037..d545e01bb 100644 --- a/youtube_dl/extractor/gamersyde.py +++ b/youtube_dl/extractor/gamersyde.py @@ -1,14 +1,18 @@ -# coding: utf-8 from __future__ import unicode_literals + import re -import time from .common import InfoExtractor +from ..utils import ( + js_to_json, + parse_duration, + remove_start, +) class GamersydeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_' - _TESTS = [{ + _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html' + _TEST = { 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', 'md5': 'f38d400d32f19724570040d5ce3a505f', 'info_dict': { @@ -18,76 +22,49 @@ class GamersydeIE(InfoExtractor): 'title': 'Bloodborne - Birth of a hero', 'thumbnail': 're:^https?://.*\.jpg$', } - }, { - 'url': 'http://www.gamersyde.com/hqstream_dark_souls_ii_scholar_of_the_first_sin_gameplay_part_1-34417_en.html', - 'md5': '94bd7c3feff3275576cf5cb6c8a3a720', - 'info_dict': { - 'id': '34417', - 'ext': 'mp4', - 'duration': 270, - 'title': 'Dark Souls II: Scholar of the First Sin - Gameplay - Part 1', - 'thumbnail': 're:^https?://.*\.jpg$', - } - }, { - 'url': 'http://www.gamersyde.com/hqstream_grand_theft_auto_v_heists_trailer-33786_en.html', - 'md5': '65e442f5f340d571ece8c80d50700369', - 'info_dict': { - 'id': '33786', - 'ext': 'mp4', - 'duration': 59, - 'title': 'Grand Theft Auto V - Heists Trailer', - 'thumbnail': 're:^https?://.*\.jpg$', - } } - ] - - def _calculateDuration(self, durationString): - if (durationString.find("minutes") > -1): - duration = time.strptime(durationString, "%M minutes %S seconds") - else: - duration = time.strptime(durationString, "%S seconds") - return duration.tm_min * 60 + duration.tm_sec - - def _fixJsonSyntax(self, json): - - json = re.sub(r",\s*}", "}", json, flags=re.DOTALL) - json = re.sub(r",\s*]", "]", json, flags=re.DOTALL) - json = json.replace('file: "', '"file": "') - json = json.replace('title: "', '"title": "') - json = json.replace('label: "', '"label": "') - json = json.replace('image: "', '"image": "') - json = json.replace('sources: [', '"sources": [') - return json def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') - video_id = self._search_regex(r'-(.*?)_[a-z]{2}.html$', url, 'video_id') - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) - filesJson = self._search_regex(r'playlist: (.*?)\}\);', webpage, 'files', flags=re.DOTALL) - data = self._parse_json(filesJson,video_id, transform_source=self._fixJsonSyntax) - - playlist = data[0] + playlist = self._parse_json( + self._search_regex( + r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'), + display_id, transform_source=js_to_json) formats = [] - - title = re.sub(r"[0-9]+ - ", "", playlist['title']) - - length = self._search_regex(r'(([0-9]{1,2} minutes ){0,1}[0-9]{1,2} seconds)', webpage, 'length') - duration = self._calculateDuration(length) - - for playlistEntry in playlist['sources']: - format = { - 'url': playlistEntry['file'], - 'format_id': playlistEntry['label'] + for source in playlist['sources']: + video_url = source.get('file') + if not video_url: + continue + format_id = source.get('label') + f = { + 'url': video_url, + 'format_id': format_id, } + m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id) + if m: + f.update({ + 'height': int(m.group('height')), + 'fps': int(m.group('fps')), + }) + formats.append(f) + self._sort_formats(formats) - formats.append(format) + title = remove_start(playlist['title'], '%s - ' % video_id) + thumbnail = playlist.get('image') + duration = parse_duration(self._search_regex( + r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False)) return { 'id': video_id, + 'display_id': display_id, 'title': title, - 'formats': formats, + 'thumbnail': thumbnail, 'duration': duration, - 'thumbnail': playlist['image'] - } + 'formats': formats, + } From 79c21abba7c9902f00ddac83a2af29c36fe0e122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 18:45:46 +0600 Subject: [PATCH 0230/2721] [utils] Add one more template to unified_strdate --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e1761265c..be3f62da7 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -759,6 +759,7 @@ def unified_strdate(date_str, day_first=True): ] if day_first: format_expressions.extend([ + '%d-%m-%Y', '%d.%m.%Y', '%d/%m/%Y', '%d/%m/%y', @@ -766,6 +767,7 @@ def unified_strdate(date_str, day_first=True): ]) else: format_expressions.extend([ + '%m-%d-%Y', '%m.%d.%Y', '%m/%d/%Y', '%m/%d/%y', From 15ac8413c78b991f2e99b6bdc538bc8c5ae8e8a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 19:08:48 +0600 Subject: [PATCH 0231/2721] [utils] Avoid treating `*-%Y` date template as UTC offset --- youtube_dl/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index be3f62da7..52f0dd09a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -730,7 +730,8 @@ def unified_strdate(date_str, day_first=True): # Replace commas date_str = date_str.replace(',', ' ') # %z (UTC offset) is only supported in python>=3.2 - date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) + if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str): + date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) # Remove AM/PM + timezone date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) From 8cf70de428c3fef910ba966fb56d39478226acc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 19:11:01 +0600 Subject: [PATCH 0232/2721] [test_utils] Add test for unified_strdate --- test/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_utils.py b/test/test_utils.py index 4e524aca3..2e3a6480c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -227,6 +227,7 @@ class TestUtil(unittest.TestCase): self.assertEqual( unified_strdate('2/2/2015 6:47:40 PM', day_first=False), '20150202') + self.assertEqual(unified_strdate('25-09-2014'), '20140925') def test_find_xpath_attr(self): testxml = '''<root> From 7c39a65543b809b681434246b84710349f5837aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 19:13:37 +0600 Subject: [PATCH 0233/2721] [pornovoisines] Simplify --- youtube_dl/extractor/pornovoisines.py | 111 ++++++++++++-------------- 1 file changed, 53 insertions(+), 58 deletions(-) diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py index efbb6a818..9688ed948 100644 --- a/youtube_dl/extractor/pornovoisines.py +++ b/youtube_dl/extractor/pornovoisines.py @@ -2,19 +2,23 @@ from __future__ import unicode_literals import re -import datetime import random -from ..compat import compat_urllib_parse from .common import InfoExtractor +from ..utils import ( + int_or_none, + float_or_none, + unified_strdate, +) + class PornoVoisinesIE(InfoExtractor): - _VALID_URL = r'^((?:http://)?(?:www\.)?pornovoisines.com)/showvideo/(\d+)/([^/]+)' + _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)' - VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ + _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4' - SERVER_NUMBERS = (1, 2) + _SERVER_NUMBERS = (1, 2) _TEST = { 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/', @@ -23,79 +27,70 @@ class PornoVoisinesIE(InfoExtractor): 'id': '1285', 'display_id': 'recherche-appartement', 'ext': 'mp4', - 'title': "Recherche appartement", + 'title': 'Recherche appartement', + 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e', + 'thumbnail': 're:^https?://.*\.jpg$', 'upload_date': '20140925', - 'view_count': int, 'duration': 120, - 'categories': ["Débutante", "Scénario", "Sodomie"], - 'description': 're:^Pour la .+ original...$', - 'thumbnail': 're:^http://', - 'uploader': "JMTV", + 'view_count': int, 'average_rating': float, - 'comment_count': int, + 'categories': ['Débutante', 'Scénario', 'Sodomie'], 'age_limit': 18, } } @classmethod - def build_video_url(cls, id): - server_nr = random.choice(cls.SERVER_NUMBERS) - return cls.VIDEO_URL_TEMPLATE % (server_nr, id) - - @staticmethod - def parse_upload_date(str): - return datetime.datetime.strptime(str, "%d-%m-%Y").strftime("%Y%m%d") - - @staticmethod - def parse_categories(str): - return map(lambda s: s.strip(), str.split(',')) + def build_video_url(cls, num): + return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - url_prefix = mobj.group(1) - id = mobj.group(2) - display_id = mobj.group(3) + video_id = mobj.group('id') + display_id = mobj.group('display_id') - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title', - flags=re.DOTALL) - url = self.build_video_url(id) - upload_date = self.parse_upload_date( - self._search_regex(r'Publié le (\d\d-\d\d-\d{4})', webpage, - 'upload date')) - view_count = int(self._search_regex(r'(\d+) vues', webpage, 'view count')) - duration = int(self._search_regex('Durée (\d+)', webpage, 'duration')) - categories = self.parse_categories(self._html_search_regex( - r'<li class="categorie">(.+?)</li>', webpage, "categories", - flags=re.DOTALL)) + video_url = self.build_video_url(video_id) + + title = self._html_search_regex( + r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL) description = self._html_search_regex( - r'<article id="descriptif">(.+?)</article>', webpage, "description", - flags=re.DOTALL) - thumbnail = url_prefix + self._html_search_regex(re.compile( - '<div id="mediaspace' + id + '">.*?<img src="(.+?)"', re.DOTALL), - webpage, "thumbnail") - uploader = re.sub(r' *\| *$', '', - self._html_search_regex(r'<li class="auteur">(.+?)</li>', webpage, - "uploader", flags=re.DOTALL)) - average_rating = float(self._search_regex(r'Note : (\d+,\d+)', - webpage, "average rating").replace(',', '.')) - comment_count = int(self._search_regex(r'\((\d+)\)', webpage, - "comment count")) + r'<article id="descriptif">(.+?)</article>', + webpage, "description", fatal=False, flags=re.DOTALL) + + thumbnail = self._search_regex( + r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id, + webpage, 'thumbnail', fatal=False) + if thumbnail: + thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail + + upload_date = unified_strdate(self._search_regex( + r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False)) + duration = int_or_none(self._search_regex( + 'Durée (\d+)', webpage, 'duration', fatal=False)) + view_count = int_or_none(self._search_regex( + r'(\d+) vues', webpage, 'view count', fatal=False)) + average_rating = self._search_regex( + r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False) + if average_rating: + average_rating = float_or_none(average_rating.replace(',', '.')) + + categories = self._html_search_meta( + 'keywords', webpage, 'categories', fatal=False) + if categories: + categories = [category.strip() for category in categories.split(',')] return { - 'id': id, + 'id': video_id, 'display_id': display_id, - 'url': url, + 'url': video_url, 'title': title, - 'upload_date': upload_date, - 'view_count': view_count, - 'duration': duration, - 'categories': categories, 'description': description, 'thumbnail': thumbnail, - 'uploader': uploader, + 'upload_date': upload_date, + 'duration': duration, + 'view_count': view_count, 'average_rating': average_rating, - 'comment_count': comment_count, + 'categories': categories, 'age_limit': 18, } From 424266abb10eafe6c57c5d391a947fa190a365e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 19:16:18 +0600 Subject: [PATCH 0234/2721] Credit @Roman2K for pornovoisines (#5264) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index cf238176b..9c65dc1d4 100644 --- a/AUTHORS +++ b/AUTHORS @@ -121,3 +121,4 @@ Amish Bhadeshia Joram Schrijver Will W. Mohammad Teimori Pabandi +Roman Le Négrate From ff02a228e35ab11c9cfa6e0d000b7fd6de52a0c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 19:21:50 +0600 Subject: [PATCH 0235/2721] [test_execution] Fix test under python 2 @ windows --- test/test_execution.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/test_execution.py b/test/test_execution.py index f31e51558..620db080e 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -8,6 +8,9 @@ import unittest import sys import os import subprocess +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.utils import encodeArgument rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -31,7 +34,7 @@ class TestExecution(unittest.TestCase): def test_cmdline_umlauts(self): p = subprocess.Popen( - [sys.executable, 'youtube_dl/__main__.py', 'ä', '--version'], + [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'], cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) _, stderr = p.communicate() self.assertFalse(stderr) From 4e8cc1e973da2656c46c5df84d4e85c5d78836ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 19:24:37 +0600 Subject: [PATCH 0236/2721] [radiojavan] Fix height --- youtube_dl/extractor/radiojavan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index 73ab78d6d..c9eda9b53 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -34,7 +34,7 @@ class RadioJavanIE(InfoExtractor): formats = [{ 'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path, 'format_id': '%sp' % height, - 'height': height, + 'height': int(height), } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] title = self._og_search_title(webpage) From 8fb2e5a4f5b9604f93964f9b6ae7062830e3bab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 19:25:08 +0600 Subject: [PATCH 0237/2721] [radiojavan] Sort formats --- youtube_dl/extractor/radiojavan.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index c9eda9b53..884c28420 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -36,6 +36,7 @@ class RadioJavanIE(InfoExtractor): 'format_id': '%sp' % height, 'height': int(height), } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] + self._sort_formats(formats) title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) From 4a3cdf81af9c22c45912b0b4c5845531d52d3a0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 20:00:23 +0600 Subject: [PATCH 0238/2721] [options] Restore some strings --- youtube_dl/options.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 9bded4521..d861ac458 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -359,7 +359,7 @@ def parseOpts(overrideArguments=None): video_format.add_option( '--max-quality', action='store', dest='format_limit', metavar='FORMAT', - help='Specify highest quality format to download') + help='Highest quality format to download') video_format.add_option( '-F', '--list-formats', action='store_true', dest='listformats', @@ -399,7 +399,7 @@ def parseOpts(overrideArguments=None): subtitles.add_option( '--sub-format', action='store', dest='subtitlesformat', metavar='FORMAT', default='best', - help='Specify subtitle format preference, for example: "srt" or "ass/srt/best"') + help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"') subtitles.add_option( '--sub-lang', '--sub-langs', '--srt-lang', action='callback', dest='subtitleslangs', metavar='LANGS', type='str', From f01855813b364dbd3e0c7fecacda84410d2780bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 20:01:24 +0600 Subject: [PATCH 0239/2721] [options] extractor is lowercase --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index d861ac458..ed2216d40 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -606,7 +606,7 @@ def parseOpts(overrideArguments=None): '%(format)s for the format description (like "22 - 1280x720" or "HD"), ' '%(format_id)s for the unique id of the format (like YouTube\'s itags: "137"), ' '%(upload_date)s for the upload date (YYYYMMDD), ' - '%(extractor)s for the provider (YouTube, metacafe, etc), ' + '%(extractor)s for the provider (youtube, metacafe, etc), ' '%(id)s for the video id, ' '%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, ' '%(playlist_index)s for the position in the playlist. ' From 6b70a4eb7d4bcbe6812f78876b4aa9aa44a58fef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 20:02:29 +0600 Subject: [PATCH 0240/2721] [options] `Number` is a verb here --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index ed2216d40..2097a9436 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -626,7 +626,7 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '-A', '--auto-number', action='store_true', dest='autonumber', default=False, - help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number of downloaded files starting from 00000') + help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000') filesystem.add_option( '-t', '--title', action='store_true', dest='usetitle', default=False, From 1a48181a9ff872e4b8428603f70851c386a2790d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 20:09:11 +0600 Subject: [PATCH 0241/2721] [options] Fix load info help string --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 2097a9436..5720fb424 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -670,7 +670,7 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--load-info', dest='load_info_filename', metavar='FILE', - help='Specify JSON file containing the video information (created with the "--write-json" option)') + help='JSON file containing the video information (created with the "--write-info-json" option)') filesystem.add_option( '--cookies', dest='cookiefile', metavar='FILE', From 1a68d39211cab61994c8717cce296b0baae8095a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 22:15:59 +0600 Subject: [PATCH 0242/2721] [aftonbladet] Fix extraction --- youtube_dl/extractor/aftonbladet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py index 8442019ea..4675585ca 100644 --- a/youtube_dl/extractor/aftonbladet.py +++ b/youtube_dl/extractor/aftonbladet.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class AftonbladetIE(InfoExtractor): - _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])' + _VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])' _TEST = { 'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab', 'info_dict': { From 8e1f93747338d64f6855c0f7f9467714bf56db93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 22:19:34 +0600 Subject: [PATCH 0243/2721] [aftonbladet] Modernize --- youtube_dl/extractor/aftonbladet.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py index 4675585ca..a117502bc 100644 --- a/youtube_dl/extractor/aftonbladet.py +++ b/youtube_dl/extractor/aftonbladet.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import int_or_none class AftonbladetIE(InfoExtractor): @@ -43,9 +44,9 @@ class AftonbladetIE(InfoExtractor): formats.append({ 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']), 'ext': 'mp4', - 'width': fmt['width'], - 'height': fmt['height'], - 'tbr': fmt['bitrate'], + 'width': int_or_none(fmt.get('width')), + 'height': int_or_none(fmt.get('height')), + 'tbr': int_or_none(fmt.get('bitrate')), 'protocol': 'http', }) self._sort_formats(formats) @@ -54,9 +55,9 @@ class AftonbladetIE(InfoExtractor): 'id': video_id, 'title': internal_meta_json['title'], 'formats': formats, - 'thumbnail': internal_meta_json['imageUrl'], - 'description': internal_meta_json['shortPreamble'], - 'timestamp': internal_meta_json['timePublished'], - 'duration': internal_meta_json['duration'], - 'view_count': internal_meta_json['views'], + 'thumbnail': internal_meta_json.get('imageUrl'), + 'description': internal_meta_json.get('shortPreamble'), + 'timestamp': int_or_none(internal_meta_json.get('timePublished')), + 'duration': int_or_none(internal_meta_json.get('duration')), + 'view_count': int_or_none(internal_meta_json.get('views')), } From ed676e8c0ab087acb8e5e26a2a8d94a47fe10c33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 22:27:25 +0600 Subject: [PATCH 0244/2721] [bliptv] Check format URLs Some formats are now 404 --- youtube_dl/extractor/bliptv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 8c7ba4b91..b632ce967 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -172,6 +172,7 @@ class BlipTVIE(InfoExtractor): 'width': int_or_none(media_content.get('width')), 'height': int_or_none(media_content.get('height')), }) + self._check_formats(formats, video_id) self._sort_formats(formats) subtitles = self.extract_subtitles(video_id, subtitles_urls) From 184a1974414bc91c5804251a33b8dd5cba1f75d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 22:43:34 +0600 Subject: [PATCH 0245/2721] [culturebox] Check for unavailable videos --- youtube_dl/extractor/francetv.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 170d68075..20acc96bd 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -275,7 +275,12 @@ class CultureboxIE(FranceTVBaseInfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) name = mobj.group('name') + webpage = self._download_webpage(url, name) + + if ">Ce live n'est plus disponible en replay<" in webpage: + raise ExtractorError('Video %s is not available' % name, expected=True) + video_id, catalogue = self._search_regex( r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@') From aed2d4b31e331422fefa304ab3fa49c050ea13e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 22:50:13 +0600 Subject: [PATCH 0246/2721] [culturebox] Replace test --- youtube_dl/extractor/francetv.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 20acc96bd..55b8e9d9e 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -260,15 +260,18 @@ class CultureboxIE(FranceTVBaseInfoExtractor): _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)' _TEST = { - 'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553', - 'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6', + 'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511', 'info_dict': { - 'id': 'EV_22853', - 'ext': 'flv', - 'title': 'Dans les jardins de William Christie - Le Camus', - 'description': 'md5:4710c82315c40f0c865ca8b9a68b5299', - 'upload_date': '20140829', - 'timestamp': 1409317200, + 'id': 'EV_50111', + 'ext': 'mp4', + 'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne", + 'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9', + 'upload_date': '20150320', + 'timestamp': 1426892400, + 'duration': 2760.9, + }, + 'params': { + 'skip_download': True, }, } From f05d0e73c6d38d86393f8f552fbfcdda80f37607 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 22:52:25 +0600 Subject: [PATCH 0247/2721] [francetv] Fix duration --- youtube_dl/extractor/francetv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 55b8e9d9e..0d92ef9c4 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -14,6 +14,7 @@ from ..utils import ( clean_html, ExtractorError, int_or_none, + float_or_none, parse_duration, ) @@ -86,7 +87,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor): 'title': info['titre'], 'description': clean_html(info['synopsis']), 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']), - 'duration': parse_duration(info['duree']), + 'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']), 'timestamp': int_or_none(info['diffusion']['timestamp']), 'formats': formats, } From bc03228ab52672666b79c9fadfbf886f8d8bf5d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 23:02:04 +0600 Subject: [PATCH 0248/2721] [francetv] Improve formats extraction --- youtube_dl/extractor/francetv.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 0d92ef9c4..fd3e7aa7b 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -16,6 +16,7 @@ from ..utils import ( int_or_none, float_or_none, parse_duration, + determine_ext, ) @@ -51,7 +52,8 @@ class FranceTVBaseInfoExtractor(InfoExtractor): if not video_url: continue format_id = video['format'] - if video_url.endswith('.f4m'): + ext = determine_ext(video_url) + if ext == 'f4m': if georestricted: # See https://github.com/rg3/youtube-dl/issues/3963 # m3u8 urls work fine @@ -61,12 +63,9 @@ class FranceTVBaseInfoExtractor(InfoExtractor): 'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path, video_id, 'Downloading f4m manifest token', fatal=False) if f4m_url: - f4m_formats = self._extract_f4m_formats(f4m_url, video_id) - for f4m_format in f4m_formats: - f4m_format['preference'] = 1 - formats.extend(f4m_formats) - elif video_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4')) + formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id)) elif video_url.startswith('rtmp'): formats.append({ 'url': video_url, From e21a55abcc502abac559027551751ff84d215077 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 23:05:25 +0600 Subject: [PATCH 0249/2721] [extractor/common] Remove f4m section It's now provided by `f4m_id` --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e5245ec3f..530c449c1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -822,7 +822,7 @@ class InfoExtractor(object): (media_el.attrib.get('href') or media_el.attrib.get('url'))) tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ - 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), + 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), 'url': manifest_url, 'ext': 'flv', 'tbr': tbr, From ac651e974ef8da74df35e5ac5464cc03e35bb2d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 23:06:16 +0600 Subject: [PATCH 0250/2721] [culturebox] Fix test --- youtube_dl/extractor/francetv.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index fd3e7aa7b..edf555b29 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -261,18 +261,16 @@ class CultureboxIE(FranceTVBaseInfoExtractor): _TEST = { 'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511', + 'md5': '9b88dc156781c4dbebd4c3e066e0b1d6', 'info_dict': { 'id': 'EV_50111', - 'ext': 'mp4', + 'ext': 'flv', 'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne", 'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9', 'upload_date': '20150320', 'timestamp': 1426892400, 'duration': 2760.9, }, - 'params': { - 'skip_download': True, - }, } def _real_extract(self, url): From aff84bec07fc1919591827543845790074b0194f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 23:17:09 +0600 Subject: [PATCH 0251/2721] [drtv] Check for unavailable videos --- youtube_dl/extractor/drtv.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 8257e35a4..bd1109549 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -26,6 +26,10 @@ class DRTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + if '>Programmet er ikke længere tilgængeligt' in webpage: + raise ExtractorError( + 'Video %s is not available' % video_id, expected=True) + video_id = self._search_regex( r'data-(?:material-identifier|episode-slug)="([^"]+)"', webpage, 'video id') From 7d2546397209deab14a0ebad6c933ed97e73fe41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 23:19:28 +0600 Subject: [PATCH 0252/2721] [drtv] Update test --- youtube_dl/extractor/drtv.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index bd1109549..f25ab319e 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor, ExtractorError @@ -8,16 +9,16 @@ class DRTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' _TEST = { - 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8', - 'md5': '4a7e1dd65cdb2643500a3f753c942f25', + 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5', + 'md5': 'dc515a9ab50577fa14cc4e4b0265168f', 'info_dict': { - 'id': 'partiets-mand-7-8', + 'id': 'panisk-paske-5', 'ext': 'mp4', - 'title': 'Partiets mand (7:8)', - 'description': 'md5:a684b90a8f9336cd4aab94b7647d7862', - 'timestamp': 1403047940, - 'upload_date': '20140617', - 'duration': 1299.040, + 'title': 'Panisk Påske (5)', + 'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c', + 'timestamp': 1426984612, + 'upload_date': '20150322', + 'duration': 1455, }, } From 218d6bcc05bd84d8f69a7b764702dc24acb2f761 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 23:28:47 +0600 Subject: [PATCH 0253/2721] [dreisat] Capture status errors --- youtube_dl/extractor/dreisat.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 69ca75423..b88460a23 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + ExtractorError, + unified_strdate, +) class DreiSatIE(InfoExtractor): @@ -28,6 +31,15 @@ class DreiSatIE(InfoExtractor): details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id details_doc = self._download_xml(details_url, video_id, 'Downloading video details') + status_code = details_doc.find('./status/statuscode') + if status_code is not None and status_code.text != 'ok': + code = status_code.text + if code == 'notVisibleAnymore': + message = 'Video %s is not available' % video_id + else: + message = '%s returned error: %s' % (self.IE_NAME, code) + raise ExtractorError(message, expected=True) + thumbnail_els = details_doc.findall('.//teaserimage') thumbnails = [{ 'width': int(te.attrib['key'].partition('x')[0]), From a319c33d8b8c4a7ac1d2f8dd739508b041d960b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 23:30:38 +0600 Subject: [PATCH 0254/2721] [dreisat] Update test --- youtube_dl/extractor/dreisat.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index b88460a23..05bb22ddf 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -13,15 +13,15 @@ class DreiSatIE(InfoExtractor): IE_NAME = '3sat' _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' _TEST = { - 'url': 'http://www.3sat.de/mediathek/index.php?obj=36983', - 'md5': '9dcfe344732808dbfcc901537973c922', + 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', + 'md5': 'be37228896d30a88f315b638900a026e', 'info_dict': { - 'id': '36983', + 'id': '45918', 'ext': 'mp4', - 'title': 'Kaffeeland Schweiz', - 'description': 'md5:cc4424b18b75ae9948b13929a0814033', + 'title': 'Waidmannsheil', + 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', 'uploader': '3sat', - 'upload_date': '20130622' + 'upload_date': '20140913' } } From fefc9d121d32321d3609e131e488c443d7af962a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 23:33:07 +0600 Subject: [PATCH 0255/2721] [dump] Fix title extraction --- youtube_dl/extractor/dump.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dump.py b/youtube_dl/extractor/dump.py index 6b651778a..ff78d4fd2 100644 --- a/youtube_dl/extractor/dump.py +++ b/youtube_dl/extractor/dump.py @@ -28,12 +28,12 @@ class DumpIE(InfoExtractor): video_url = self._search_regex( r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL') - thumb = self._og_search_thumbnail(webpage) - title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title') + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) return { 'id': video_id, 'title': title, 'url': video_url, - 'thumbnail': thumb, + 'thumbnail': thumbnail, } From f67dcc09f5e2f68032e379133e53b07e58c544dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 4 Apr 2015 23:36:45 +0600 Subject: [PATCH 0256/2721] [eagleplatform] Skip georestricted test --- youtube_dl/extractor/eagleplatform.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 7173371ee..688dfc2f7 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -45,6 +45,7 @@ class EaglePlatformIE(InfoExtractor): 'duration': 216, 'view_count': int, }, + 'skip': 'Georestricted', }] def _handle_error(self, response): From 27fe5e347350484009e79251ec7ef97484219481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Apr 2015 00:00:04 +0600 Subject: [PATCH 0257/2721] [ellentv] Make video url extraction fatal --- youtube_dl/extractor/ellentv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index fc92ff825..3a7962144 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -40,14 +40,15 @@ class EllenTVIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_url = self._html_search_meta('VideoURL', webpage, 'url') + + video_url = self._html_search_meta('VideoURL', webpage, 'url', fatal=True) title = self._og_search_title(webpage, default=None) or self._search_regex( r'pageName\s*=\s*"([^"]+)"', webpage, 'title') description = self._html_search_meta( 'description', webpage, 'description') or self._og_search_description(webpage) timestamp = parse_iso8601(self._search_regex( r'<span class="publish-date"><time datetime="([^"]+)">', - webpage, 'timestamp')) + webpage, 'timestamp', fatal=False)) return { 'id': video_id, From 0de9312a7e7c15262ce4a59249d3c4294989757c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 5 Apr 2015 00:01:55 +0600 Subject: [PATCH 0258/2721] [ellentv] Replace test --- youtube_dl/extractor/ellentv.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 3a7962144..5154bbd7f 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -13,15 +13,15 @@ from ..utils import ( class EllenTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)' _TESTS = [{ - 'url': 'http://www.ellentv.com/videos/0-7jqrsr18/', - 'md5': 'e4af06f3bf0d5f471921a18db5764642', + 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/', + 'md5': '8e3c576bf2e9bfff4d76565f56f94c9c', 'info_dict': { - 'id': '0-7jqrsr18', + 'id': '0-ipq1gsai', 'ext': 'mp4', - 'title': 'What\'s Wrong with These Photos? A Whole Lot', - 'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6', - 'timestamp': 1406876400, - 'upload_date': '20140801', + 'title': 'Fast Fingers of Fate', + 'description': 'md5:686114ced0a032926935e9015ee794ac', + 'timestamp': 1428033600, + 'upload_date': '20150403', } }, { 'url': 'http://ellentube.com/videos/0-dvzmabd5/', From 3a9fadd6dfc127ed0707b218b11ac10c654af1e2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 5 Apr 2015 22:29:06 +0800 Subject: [PATCH 0259/2721] [youtube] Enhance url_encoded_fmt_stream_map checking (fix #5361) --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5488101e1..f7f701cc5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -855,7 +855,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): args = ytplayer_config['args'] # Convert to the same format returned by compat_parse_qs video_info = dict((k, [v]) for k, v in args.items()) - if 'url_encoded_fmt_stream_map' not in args: + if ('url_encoded_fmt_stream_map' not in args or + args['url_encoded_fmt_stream_map'] == ''): raise ValueError('No stream_map present') # caught below except ValueError: # We fallback to the get_video_info pages (used by the embed page) From 06b491eb7b9459f92484f83973c17d46dba59f1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 6 Apr 2015 00:35:55 +0600 Subject: [PATCH 0260/2721] [youtube] Add test for #5361 --- youtube_dl/extractor/youtube.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f7f701cc5..198fe84ef 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -495,7 +495,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': '孫艾倫', 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人', }, - } + }, + # url_encoded_fmt_stream_map is empty string + { + 'url': 'qEJwOuvDf7I', + 'info_dict': { + 'id': 'qEJwOuvDf7I', + 'ext': 'mp4', + 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге', + 'description': '', + 'upload_date': '20150404', + 'uploader_id': 'spbelect', + 'uploader': 'Наблюдатели Петербурга', + }, + 'params': { + 'skip_download': 'requires avconv', + } + }, ] def __init__(self, *args, **kwargs): From e40bd5f06ba397f6a3f9f21ab1df01c120d90eb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 6 Apr 2015 00:45:57 +0600 Subject: [PATCH 0261/2721] [youtube] Simplify url_encoded_fmt_stream_map check --- youtube_dl/extractor/youtube.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 198fe84ef..79ce39aa4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -871,8 +871,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): args = ytplayer_config['args'] # Convert to the same format returned by compat_parse_qs video_info = dict((k, [v]) for k, v in args.items()) - if ('url_encoded_fmt_stream_map' not in args or - args['url_encoded_fmt_stream_map'] == ''): + if not args.get('url_encoded_fmt_stream_map'): raise ValueError('No stream_map present') # caught below except ValueError: # We fallback to the get_video_info pages (used by the embed page) From 64102296818f94d3814a8183daa5d92cbdd952fd Mon Sep 17 00:00:00 2001 From: newtonelectron <newton.electron@gmail.com> Date: Sun, 5 Apr 2015 12:50:21 -0700 Subject: [PATCH 0262/2721] [SpankBang] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/spankbang.py | 38 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 youtube_dl/extractor/spankbang.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0f7d44616..e6fdf1297 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -471,6 +471,7 @@ from .southpark import ( SouthparkDeIE, ) from .space import SpaceIE +from .spankbang import SpankBangIE from .spankwire import SpankwireIE from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py new file mode 100644 index 000000000..8e845ef26 --- /dev/null +++ b/youtube_dl/extractor/spankbang.py @@ -0,0 +1,38 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +import re + +class SpankBangIE(InfoExtractor): + """Extractor for http://spankbang.com""" + + _VALID_URL = r"https?://(?:www\.)?spankbang\.com/(?P<id>\w+)/video/.*" + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r"<h1>(?:<img.+?>)?(.*?)</h1>", webpage, "title") + + stream_key = self._html_search_regex(r"""var\s+stream_key\s*[=]\s*['"](.+?)['"]\s*;""", webpage, "stream_key") + + qualities = re.findall(r"<span.+?>([0-9]+p).*?</span>", webpage) + + formats = [] + for q in sorted(qualities): + formats.append({ + "format_id": q, + "format": q, + "ext": "mp4", + "url": "http://spankbang.com/_{}/{}/title/{}__mp4".format(video_id, stream_key, q) + }) + + return { + "id": video_id, + "title": title, + "description": self._og_search_description(webpage), + "formats": formats + } + +# vim: tabstop=4 expandtab From 2e7daef50220ee90e8a2e2b979600f8bd4a3e40e Mon Sep 17 00:00:00 2001 From: newtonelectron <newton.electron@gmail.com> Date: Sun, 5 Apr 2015 13:43:21 -0700 Subject: [PATCH 0263/2721] [SpankBang] Use python2.6 compatible string formatting spec --- youtube_dl/extractor/spankbang.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 8e845ef26..61fd64d17 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -25,7 +25,7 @@ class SpankBangIE(InfoExtractor): "format_id": q, "format": q, "ext": "mp4", - "url": "http://spankbang.com/_{}/{}/title/{}__mp4".format(video_id, stream_key, q) + "url": "http://spankbang.com/_{0}/{1}/title/{2}__mp4".format(video_id, stream_key, q) }) return { From 5c1d459ae91d2681be88023e9056dcae3f48a70a Mon Sep 17 00:00:00 2001 From: newtonelectron <newton.electron@gmail.com> Date: Sun, 5 Apr 2015 13:57:59 -0700 Subject: [PATCH 0264/2721] [SpankBang] Add test --- youtube_dl/extractor/spankbang.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 61fd64d17..2e20a5ad5 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -8,6 +8,20 @@ class SpankBangIE(InfoExtractor): """Extractor for http://spankbang.com""" _VALID_URL = r"https?://(?:www\.)?spankbang\.com/(?P<id>\w+)/video/.*" + + _TEST = { + "url": "http://spankbang.com/3vvn/video/fantasy+solo", + "md5": "1cc433e1d6aa14bc376535b8679302f7", + "info_dict": { + "id": "3vvn", + "title": "fantasy solo", + "description": "Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.", + "format": "720p", + "format_id": "720p", + "ext": "mp4", + "url": "re:http://spankbang.com/_3vvn/IjE0MjgyNjY5MTcuMzUi.IaGrcF-vDrvktMhjd-1fWixiCzU/title/720p__mp4" + } + } def _real_extract(self, url): video_id = self._match_id(url) From c7ac5dce8c692f82f10363e40a7085ac53113bc8 Mon Sep 17 00:00:00 2001 From: newtonelectron <newton.electron@gmail.com> Date: Sun, 5 Apr 2015 14:02:05 -0700 Subject: [PATCH 0265/2721] [SpankBang] Remove regexp type prefix from _TEST url. --- youtube_dl/extractor/spankbang.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 2e20a5ad5..d0b5ba278 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -19,7 +19,7 @@ class SpankBangIE(InfoExtractor): "format": "720p", "format_id": "720p", "ext": "mp4", - "url": "re:http://spankbang.com/_3vvn/IjE0MjgyNjY5MTcuMzUi.IaGrcF-vDrvktMhjd-1fWixiCzU/title/720p__mp4" + "url": "http://spankbang.com/_3vvn/IjE0MjgyNjY5MTcuMzUi.IaGrcF-vDrvktMhjd-1fWixiCzU/title/720p__mp4" } } From d2272fcf6e2796583b516f4786733577459dec43 Mon Sep 17 00:00:00 2001 From: felix <m.p.isaev@yandex.com> Date: Mon, 6 Apr 2015 09:54:19 +0200 Subject: [PATCH 0266/2721] crooksandliars.com extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/crooksandliars.py | 71 ++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 youtube_dl/extractor/crooksandliars.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7eb9b4fbb..dc272af82 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -90,6 +90,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .condenast import CondeNastIE from .cracked import CrackedIE from .criterion import CriterionIE +from .crooksandliars import CrooksAndLiarsIE, CrooksAndLiarsArticleIE from .crunchyroll import ( CrunchyrollIE, CrunchyrollShowPlaylistIE diff --git a/youtube_dl/extractor/crooksandliars.py b/youtube_dl/extractor/crooksandliars.py new file mode 100644 index 000000000..afccca354 --- /dev/null +++ b/youtube_dl/extractor/crooksandliars.py @@ -0,0 +1,71 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import ( + mimetype2ext, +) + + +class CrooksAndLiarsIE(InfoExtractor): + _VALID_URL = r'(?:https?:)?//embed.crooksandliars.com/embed/(?P<id>[A-Za-z0-9]+)(?:$|[?#])' + + _TESTS = [{ + 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi', + 'info_dict': { + 'id': 'https://embed.crooksandliars.com/embed/8RUoRhRi', + 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!", + 'description': "Fox News, Fox & Friends Weekend, April 4, 2015. Read more... http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists", + 'timestamp': 1428207000, + 'thumbnail': '//crooksandliars.com/files/mediaposters/2015/04/31235.jpg?ts=1428207050', + 'uploader': "Heather", + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + manifest = json.loads(self._html_search_regex(r'var manifest = ({.*?})\n', webpage, 'manifest JSON')) + + formats = [] + for item in manifest['flavors']: + if not item['mime'].startswith('video/'): # XXX: or item['exclude']? + continue + formats.append({ + 'format_id': item['type'], + 'ext': mimetype2ext(item['mime']), + 'url': item['url'], + }) + + # XXX: manifest['url']? + return { + 'url': url, + 'id': video_id, + 'uploader': manifest['author'], + 'title': manifest['title'], + 'description': manifest['description'], + 'thumbnail': manifest['poster'], + 'duration': manifest['duration'], + 'timestamp': int(manifest['created']), + 'formats': formats, + } + +class CrooksAndLiarsArticleIE(InfoExtractor): + _VALID_URL = r'(?:https?:)?//crooksandliars.com/\d+/\d+/(?P<id>[a-z\-]+)(?:/|$)' + + _TESTS = [{ + 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + player_url = self._proto_relative_url(self._html_search_regex(r'<iframe src="(//embed.crooksandliars.com/.*)"', webpage, 'embedded player')) + + return { + '_type': 'url', + 'url': player_url + } From 6e53c91608d1c43a9fe1614f13a15db74e877a91 Mon Sep 17 00:00:00 2001 From: felix <m.p.isaev@yandex.com> Date: Mon, 6 Apr 2015 10:12:43 +0200 Subject: [PATCH 0267/2721] [crooksandliars] resolve protocol-relative URLs --- youtube_dl/extractor/crooksandliars.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crooksandliars.py b/youtube_dl/extractor/crooksandliars.py index afccca354..cee0603f4 100644 --- a/youtube_dl/extractor/crooksandliars.py +++ b/youtube_dl/extractor/crooksandliars.py @@ -18,7 +18,7 @@ class CrooksAndLiarsIE(InfoExtractor): 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!", 'description': "Fox News, Fox & Friends Weekend, April 4, 2015. Read more... http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists", 'timestamp': 1428207000, - 'thumbnail': '//crooksandliars.com/files/mediaposters/2015/04/31235.jpg?ts=1428207050', + 'thumbnail': 'https://crooksandliars.com/files/mediaposters/2015/04/31235.jpg?ts=1428207050', 'uploader': "Heather", } }] @@ -46,7 +46,7 @@ class CrooksAndLiarsIE(InfoExtractor): 'uploader': manifest['author'], 'title': manifest['title'], 'description': manifest['description'], - 'thumbnail': manifest['poster'], + 'thumbnail': self._proto_relative_url(manifest['poster']), 'duration': manifest['duration'], 'timestamp': int(manifest['created']), 'formats': formats, From d97aae75724fc301243a00c5a71ac93b235d62fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 6 Apr 2015 21:24:17 +0600 Subject: [PATCH 0268/2721] [spankbang] Improve and simplify --- youtube_dl/extractor/spankbang.py | 80 +++++++++++++++++-------------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index d0b5ba278..7f060b15b 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -1,25 +1,23 @@ -# coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor import re +from .common import InfoExtractor + + class SpankBangIE(InfoExtractor): - """Extractor for http://spankbang.com""" - - _VALID_URL = r"https?://(?:www\.)?spankbang\.com/(?P<id>\w+)/video/.*" - + _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video' _TEST = { - "url": "http://spankbang.com/3vvn/video/fantasy+solo", - "md5": "1cc433e1d6aa14bc376535b8679302f7", - "info_dict": { - "id": "3vvn", - "title": "fantasy solo", - "description": "Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.", - "format": "720p", - "format_id": "720p", - "ext": "mp4", - "url": "http://spankbang.com/_3vvn/IjE0MjgyNjY5MTcuMzUi.IaGrcF-vDrvktMhjd-1fWixiCzU/title/720p__mp4" + 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', + 'md5': '1cc433e1d6aa14bc376535b8679302f7', + 'info_dict': { + 'id': '3vvn', + 'ext': 'mp4', + 'title': 'fantasy solo', + 'description': 'dillion harper masturbates on a bed', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'silly2587', + 'age_limit': 18, } } @@ -27,26 +25,36 @@ class SpankBangIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r"<h1>(?:<img.+?>)?(.*?)</h1>", webpage, "title") - - stream_key = self._html_search_regex(r"""var\s+stream_key\s*[=]\s*['"](.+?)['"]\s*;""", webpage, "stream_key") - - qualities = re.findall(r"<span.+?>([0-9]+p).*?</span>", webpage) - - formats = [] - for q in sorted(qualities): - formats.append({ - "format_id": q, - "format": q, - "ext": "mp4", - "url": "http://spankbang.com/_{0}/{1}/title/{2}__mp4".format(video_id, stream_key, q) - }) + stream_key = self._html_search_regex( + r'''var\s+stream_key\s*=\s*['"](.+?)['"]''', + webpage, 'stream key') + + formats = [{ + 'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height), + 'ext': 'mp4', + 'format_id': '%sp' % height, + 'height': int(height), + } for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)] + self._sort_formats(formats) + + title = self._html_search_regex( + r'(?s)<h1>(.+?)</h1>', webpage, 'title') + description = self._search_regex( + r'class="desc"[^>]*>([^<]+)', + webpage, 'description', default=None) + thumbnail = self._og_search_thumbnail(webpage) + uploader = self._search_regex( + r'class="user"[^>]*>([^<]+)', + webpage, 'uploader', fatal=False) + + age_limit = self._rta_search(webpage) return { - "id": video_id, - "title": title, - "description": self._og_search_description(webpage), - "formats": formats + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'formats': formats, + 'age_limit': age_limit, } - -# vim: tabstop=4 expandtab From 8e4b83b96b8c2bc45aa1a9daa87c04853d3e7ade Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 6 Apr 2015 22:18:08 +0200 Subject: [PATCH 0269/2721] Remove check for ssl certs When it uses a capath instead of a cafile, 'get_ca_certs' or 'cert_store_stats' only returns certificates already used in a connection. (see #5364) --- youtube_dl/YoutubeDL.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ce4b72fd3..640b8c99d 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1768,14 +1768,6 @@ class YoutubeDL(object): debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) - # The ssl context is only available in python 2.7.9 and 3.x - if hasattr(https_handler, '_context'): - ctx = https_handler._context - # get_ca_certs is unavailable prior to python 3.4 - if hasattr(ctx, 'get_ca_certs') and len(ctx.get_ca_certs()) == 0: - self.report_warning( - 'No ssl certificates were loaded, urls that use https ' - 'won\'t work') ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) opener = compat_urllib_request.build_opener( proxy_handler, https_handler, cookie_processor, ydlh) From 29713e4268623ce5d471d5d25beea9ef433a2216 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 7 Apr 2015 14:59:13 +0200 Subject: [PATCH 0270/2721] [cnn] Match more affilliates --- youtube_dl/extractor/cnn.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 0a77e951c..5efc5f4fe 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -12,7 +12,7 @@ from ..utils import ( class CNNIE(InfoExtractor): _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/ - (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln|ktvk)(?:-ap)?|(?=&)))''' + (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))''' _TESTS = [{ 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', @@ -48,6 +48,9 @@ class CNNIE(InfoExtractor): }, { 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', 'only_matching': True, + }, { + 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg', + 'only_matching': True, }] def _real_extract(self, url): From beb10f843f245efab24bec8ac6fead2834c5cce3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 7 Apr 2015 21:00:22 +0600 Subject: [PATCH 0271/2721] [addanime] Add format quality (Closes #5371) --- youtube_dl/extractor/addanime.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 203936e54..d2a60a056 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -11,6 +11,7 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + qualities, ) @@ -63,8 +64,10 @@ class AddAnimeIE(InfoExtractor): note='Confirming after redirect') webpage = self._download_webpage(url, video_id) + FORMATS = ('normal', 'hq') + quality = qualities(FORMATS) formats = [] - for format_id in ('normal', 'hq'): + for format_id in FORMATS: rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id) video_url = self._search_regex(rex, webpage, 'video file URLx', fatal=False) @@ -73,6 +76,7 @@ class AddAnimeIE(InfoExtractor): formats.append({ 'format_id': format_id, 'url': video_url, + 'quality': quality(format_id), }) self._sort_formats(formats) video_title = self._og_search_title(webpage) From 5f4b5cf044cff5263fa062596ace6180d42a4534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 7 Apr 2015 21:00:52 +0600 Subject: [PATCH 0272/2721] [addanime] Extend _VALID_URL (Closes #5372) --- youtube_dl/extractor/addanime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index d2a60a056..28180bc3f 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -16,7 +16,7 @@ from ..utils import ( class AddAnimeIE(InfoExtractor): - _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<id>[\w_]+)(?:.*)' + _VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)' _TEST = { 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', 'md5': '72954ea10bc979ab5e2eb288b21425a0', From a35099bd331e42d96e647865f4a644b217bba62a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 7 Apr 2015 21:01:35 +0600 Subject: [PATCH 0273/2721] [addanime] Add test for #5372 --- youtube_dl/extractor/addanime.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 28180bc3f..e3e6d2113 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -17,7 +17,7 @@ from ..utils import ( class AddAnimeIE(InfoExtractor): _VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', 'md5': '72954ea10bc979ab5e2eb288b21425a0', 'info_dict': { @@ -26,7 +26,10 @@ class AddAnimeIE(InfoExtractor): 'description': 'One Piece 606', 'title': 'One Piece 606', } - } + }, { + 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From de5c54564874fd870fdfe3fd24f47e3e5f6cedf7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Apr 2015 03:45:02 +0800 Subject: [PATCH 0274/2721] [youtube] Skip WebVTT in DASH manifest (#5297) --- youtube_dl/extractor/youtube.py | 62 +++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 79ce39aa4..2774ec30b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -788,33 +788,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor): errnote='Could not download DASH manifest') formats = [] - for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): - url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') - if url_el is None: - continue - format_id = r.attrib['id'] - video_url = url_el.text - filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) - f = { - 'format_id': format_id, - 'url': video_url, - 'width': int_or_none(r.attrib.get('width')), - 'height': int_or_none(r.attrib.get('height')), - 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), - 'asr': int_or_none(r.attrib.get('audioSamplingRate')), - 'filesize': filesize, - 'fps': int_or_none(r.attrib.get('frameRate')), - } - try: - existing_format = next( - fo for fo in formats - if fo['format_id'] == format_id) - except StopIteration: - full_info = self._formats.get(format_id, {}).copy() - full_info.update(f) - formats.append(full_info) - else: - existing_format.update(f) + for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'): + mime_type = a.attrib.get('mimeType') + for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'): + url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') + if url_el is None: + continue + if mime_type == 'text/vtt': + # TODO implement WebVTT downloading + pass + elif mime_type.startswith('audio/') or mime_type.startswith('video/'): + format_id = r.attrib['id'] + video_url = url_el.text + filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) + f = { + 'format_id': format_id, + 'url': video_url, + 'width': int_or_none(r.attrib.get('width')), + 'height': int_or_none(r.attrib.get('height')), + 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), + 'asr': int_or_none(r.attrib.get('audioSamplingRate')), + 'filesize': filesize, + 'fps': int_or_none(r.attrib.get('frameRate')), + } + try: + existing_format = next( + fo for fo in formats + if fo['format_id'] == format_id) + except StopIteration: + full_info = self._formats.get(format_id, {}).copy() + full_info.update(f) + formats.append(full_info) + else: + existing_format.update(f) + else: + self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats def _real_extract(self, url): From cc55d08832f5008efb56228900a0dbacb428ed7f Mon Sep 17 00:00:00 2001 From: Pete Hemery <petehemery@hotmail.com> Date: Tue, 7 Apr 2015 22:33:18 +0100 Subject: [PATCH 0275/2721] [ffmpeg] adding exception catching for call to os.utime in run_ffmpeg_multiple_files --- youtube_dl/postprocessor/ffmpeg.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 0b60ac7e7..5ef5e0e54 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -146,7 +146,11 @@ class FFmpegPostProcessor(PostProcessor): stderr = stderr.decode('utf-8', 'replace') msg = stderr.strip().split('\n')[-1] raise FFmpegPostProcessorError(msg) - os.utime(encodeFilename(out_path), (oldest_mtime, oldest_mtime)) + try: + os.utime(encodeFilename(out_path), (oldest_mtime, oldest_mtime)) + except Exception: + self._downloader.report_warning('Cannot update utime of file') + if self._deletetempfiles: for ipath in input_paths: os.remove(ipath) From 418c5cc3fc3ea99b791ad1774a6b03504eab7086 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Apr 2015 17:26:51 +0800 Subject: [PATCH 0276/2721] [udn] Add new extractor --- test/test_utils.py | 5 +++ youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/generic.py | 20 ++++++++++ youtube_dl/extractor/udn.py | 66 ++++++++++++++++++++++++++++++++ youtube_dl/utils.py | 11 ++++++ 5 files changed, 103 insertions(+) create mode 100644 youtube_dl/extractor/udn.py diff --git a/test/test_utils.py b/test/test_utils.py index 2e3a6480c..8291edd9a 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -53,6 +53,7 @@ from youtube_dl.utils import ( uppercase_escape, url_basename, urlencode_postdata, + url_infer_protocol, version_tuple, xpath_with_ns, xpath_text, @@ -296,6 +297,10 @@ class TestUtil(unittest.TestCase): url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), 'trailer.mp4') + def test_url_infer_protocol(self): + self.assertEqual(url_infer_protocol('http://foo.com/', '//bar.com/'), 'http://bar.com/') + self.assertEqual(url_infer_protocol('http://foo.com/', 'https://bar.com/'), 'https://bar.com/') + def test_parse_duration(self): self.assertEqual(parse_duration(None), None) self.assertEqual(parse_duration(False), None) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e6fdf1297..8df1db83e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -557,6 +557,7 @@ from .udemy import ( UdemyIE, UdemyCourseIE ) +from .udn import UDNEmbedIE from .ultimedia import UltimediaIE from .unistra import UnistraIE from .urort import UrortIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2ff002643..9178d2b7b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -26,6 +26,7 @@ from ..utils import ( unsmuggle_url, UnsupportedError, url_basename, + url_infer_protocol, xpath_text, ) from .brightcove import BrightcoveIE @@ -34,6 +35,7 @@ from .ooyala import OoyalaIE from .rutv import RUTVIE from .smotri import SmotriIE from .condenast import CondeNastIE +from .udn import UDNEmbedIE class GenericIE(InfoExtractor): @@ -650,6 +652,17 @@ class GenericIE(InfoExtractor): 'title': "PFT Live: New leader in the 'new-look' defense", 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e', }, + }, + # UDN embed + { + 'url': 'http://www.udn.com/news/story/7314/822787', + 'md5': 'de06b4c90b042c128395a88f0384817e', + 'info_dict': { + 'id': '300040', + 'ext': 'mp4', + 'title': '生物老師男變女 全校挺"做自己"', + 'thumbnail': 're:^https?://.*\.jpg$', + } } ] @@ -1268,6 +1281,13 @@ class GenericIE(InfoExtractor): if nbc_sports_url: return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') + # Look for UDN embeds + mobj = re.search( + r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage) + if mobj is not None: + return self.url_result( + url_infer_protocol(url, mobj.group('url')), 'UDNEmbed') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py new file mode 100644 index 000000000..2b9a733e8 --- /dev/null +++ b/youtube_dl/extractor/udn.py @@ -0,0 +1,66 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +from .common import InfoExtractor +from ..utils import ( + url_infer_protocol, + js_to_json +) + + +class UDNEmbedIE(InfoExtractor): + _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://video.udn.com/embed/news/300040', + 'md5': 'de06b4c90b042c128395a88f0384817e', + 'info_dict': { + 'id': '300040', + 'ext': 'mp4', + 'title': '生物老師男變女 全校挺"做自己"', + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, { + 'url': '//video.udn.com/embed/news/300040', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + page = self._download_webpage(url, video_id) + + options = json.loads(js_to_json(self._html_search_regex( + r'var options\s*=\s*([^;]+);', page, 'video urls dictionary'))) + + video_urls = options['video'] + + if video_urls.get('youtube'): + return self.url_result(video_urls.get('youtube'), 'Youtube') + + try: + del video_urls['youtube'] + except KeyError: + pass + + formats = [{ + 'url': self._download_webpage( + url_infer_protocol(url, api_url), video_id, + 'retrieve url for %s video' % video_type), + 'format_id': video_type, + 'preference': 0 if video_type == 'mp4' else -1, + } for video_type, api_url in video_urls.items()] + + self._sort_formats(formats) + + thumbnail = None + + if options.get('gallery') and len(options['gallery']): + thumbnail = options['gallery'][0].get('original') + + return { + 'id': video_id, + 'formats': formats, + 'title': options['title'], + 'thumbnail': thumbnail + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 52f0dd09a..f3b8d9f81 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1711,6 +1711,17 @@ def determine_protocol(info_dict): return compat_urllib_parse_urlparse(url).scheme +def url_infer_protocol(ref_url, target_url): + """ Infer protocol for protocol independent target urls """ + parsed_target_url = list(compat_urllib_parse_urlparse(target_url)) + if parsed_target_url[0]: + return target_url + + parsed_target_url[0] = compat_urllib_parse_urlparse(ref_url).scheme + + return compat_urlparse.urlunparse(parsed_target_url) + + def render_table(header_row, data): """ Render a list of rows, each as a list of values """ table = [header_row] + data From 4a20c9f628b6038766a76ed1c3e37aa6a2b34718 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Apr 2015 17:42:26 +0800 Subject: [PATCH 0277/2721] [livestream] Extend _VALID_URL (fixes #5375) --- youtube_dl/extractor/livestream.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 2467f8bdd..ec309dadd 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -21,7 +21,7 @@ from ..utils import ( class LivestreamIE(InfoExtractor): IE_NAME = 'livestream' - _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])' + _VALID_URL = r'https?://(?:new\.)?livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])' _TESTS = [{ 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', 'md5': '53274c76ba7754fb0e8d072716f2292b', @@ -51,6 +51,9 @@ class LivestreamIE(InfoExtractor): }, { 'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640', 'only_matching': True, + }, { + 'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015', + 'only_matching': True, }] def _parse_smil(self, video_id, smil_url): From bd7a6478a2db228c7325a48e13e0e699502f56f4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Apr 2015 19:20:34 +0800 Subject: [PATCH 0278/2721] [theplatform] Fix video url extraction (fixes #5340) In SMIL 2.1, <switch> nodes may be enclosed in <par>. See http://www.w3.org/TR/SMIL2/smil-timing.html#edef-par --- youtube_dl/extractor/theplatform.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 0e3e627f4..a7d060459 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -28,7 +28,7 @@ class ThePlatformIE(InfoExtractor): (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)? |theplatform:)(?P<id>[^/\?&]+)''' - _TEST = { + _TESTS = [{ # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/ 'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true', 'info_dict': { @@ -42,7 +42,20 @@ class ThePlatformIE(InfoExtractor): # rtmp download 'skip_download': True, }, - } + # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/ + }, { + 'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT', + 'info_dict': { + 'id': '22d_qsQ6MIRT', + 'ext': 'flv', + 'description': 'md5:ac330c9258c04f9d7512cf26b9595409', + 'title': 'Tesla Model S: A second step towards a cleaner motoring future', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }] @staticmethod def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): @@ -126,7 +139,7 @@ class ThePlatformIE(InfoExtractor): formats = self._extract_f4m_formats(f4m_url, video_id) else: formats = [] - switch = body.find(_x('smil:switch')) + switch = body.find(_x('.//smil:switch')) if switch is not None: base_url = head.find(_x('smil:meta')).attrib['base'] for f in switch.findall(_x('smil:video')): From a662163fd5b3c9b1221b8aeaf54ed9083af8574f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Apr 2015 20:21:34 +0800 Subject: [PATCH 0279/2721] [theplatform] Rework on <switch> inside <par> --- youtube_dl/extractor/theplatform.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index a7d060459..bcddd23e4 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -139,7 +139,9 @@ class ThePlatformIE(InfoExtractor): formats = self._extract_f4m_formats(f4m_url, video_id) else: formats = [] - switch = body.find(_x('.//smil:switch')) + switch = body.find(_x('smil:switch')) + if switch is None: + switch = body.find(_x('smil:par//smil:switch')) if switch is not None: base_url = head.find(_x('smil:meta')).attrib['base'] for f in switch.findall(_x('smil:video')): From 0a1603634bcf799eeb769d95b6e716e66123b77f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 8 Apr 2015 21:39:34 +0800 Subject: [PATCH 0280/2721] [utils] Remove url_infer_protocol --- test/test_utils.py | 5 ----- youtube_dl/extractor/generic.py | 3 +-- youtube_dl/extractor/udn.py | 8 +++----- youtube_dl/utils.py | 11 ----------- 4 files changed, 4 insertions(+), 23 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 8291edd9a..2e3a6480c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -53,7 +53,6 @@ from youtube_dl.utils import ( uppercase_escape, url_basename, urlencode_postdata, - url_infer_protocol, version_tuple, xpath_with_ns, xpath_text, @@ -297,10 +296,6 @@ class TestUtil(unittest.TestCase): url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), 'trailer.mp4') - def test_url_infer_protocol(self): - self.assertEqual(url_infer_protocol('http://foo.com/', '//bar.com/'), 'http://bar.com/') - self.assertEqual(url_infer_protocol('http://foo.com/', 'https://bar.com/'), 'https://bar.com/') - def test_parse_duration(self): self.assertEqual(parse_duration(None), None) self.assertEqual(parse_duration(False), None) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9178d2b7b..6c212efac 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -26,7 +26,6 @@ from ..utils import ( unsmuggle_url, UnsupportedError, url_basename, - url_infer_protocol, xpath_text, ) from .brightcove import BrightcoveIE @@ -1286,7 +1285,7 @@ class GenericIE(InfoExtractor): r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage) if mobj is not None: return self.url_result( - url_infer_protocol(url, mobj.group('url')), 'UDNEmbed') + compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') def check_video(vurl): if YoutubeIE.suitable(vurl): diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index 2b9a733e8..bba25bb58 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -3,10 +3,8 @@ from __future__ import unicode_literals import json from .common import InfoExtractor -from ..utils import ( - url_infer_protocol, - js_to_json -) +from ..utils import js_to_json +from ..compat import compat_urlparse class UDNEmbedIE(InfoExtractor): @@ -45,7 +43,7 @@ class UDNEmbedIE(InfoExtractor): formats = [{ 'url': self._download_webpage( - url_infer_protocol(url, api_url), video_id, + compat_urlparse.urljoin(url, api_url), video_id, 'retrieve url for %s video' % video_type), 'format_id': video_type, 'preference': 0 if video_type == 'mp4' else -1, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f3b8d9f81..52f0dd09a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1711,17 +1711,6 @@ def determine_protocol(info_dict): return compat_urllib_parse_urlparse(url).scheme -def url_infer_protocol(ref_url, target_url): - """ Infer protocol for protocol independent target urls """ - parsed_target_url = list(compat_urllib_parse_urlparse(target_url)) - if parsed_target_url[0]: - return target_url - - parsed_target_url[0] = compat_urllib_parse_urlparse(ref_url).scheme - - return compat_urlparse.urlunparse(parsed_target_url) - - def render_table(header_row, data): """ Render a list of rows, each as a list of values """ table = [header_row] + data From aef8fdba1172d60983ba9685249c03b66e7a94f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 8 Apr 2015 21:03:11 +0600 Subject: [PATCH 0281/2721] [theplatform] Allow <par> without <swtich> at all Bare `wget` on http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRTl results in an XML without <switch> at all but with <par> and <video> inside it. Let's handle this possible outcome as well. --- youtube_dl/extractor/theplatform.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index bcddd23e4..2d2178331 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -142,6 +142,8 @@ class ThePlatformIE(InfoExtractor): switch = body.find(_x('smil:switch')) if switch is None: switch = body.find(_x('smil:par//smil:switch')) + if switch is None: + switch = body.find(_x('smil:par')) if switch is not None: base_url = head.find(_x('smil:meta')).attrib['base'] for f in switch.findall(_x('smil:video')): From dd29eb7f816bae7cf7807db9f26b7f5621b8d557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 8 Apr 2015 21:40:31 +0600 Subject: [PATCH 0282/2721] [postprocessor/common:postprocessor/ffmpeg] Generalize utime --- youtube_dl/postprocessor/common.py | 13 ++++++++++++- youtube_dl/postprocessor/ffmpeg.py | 12 ++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/youtube_dl/postprocessor/common.py b/youtube_dl/postprocessor/common.py index e54ae678d..ef9fdfa19 100644 --- a/youtube_dl/postprocessor/common.py +++ b/youtube_dl/postprocessor/common.py @@ -1,6 +1,11 @@ from __future__ import unicode_literals -from ..utils import PostProcessingError +import os + +from ..utils import ( + PostProcessingError, + encodeFilename, +) class PostProcessor(object): @@ -46,6 +51,12 @@ class PostProcessor(object): """ return None, information # by default, keep file and do nothing + def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'): + try: + os.utime(encodeFilename(path), (atime, mtime)) + except Exception: + self._downloader.report_warning(errnote) + class AudioConversionError(PostProcessingError): pass diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 5ef5e0e54..8e99a3c2c 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -146,10 +146,7 @@ class FFmpegPostProcessor(PostProcessor): stderr = stderr.decode('utf-8', 'replace') msg = stderr.strip().split('\n')[-1] raise FFmpegPostProcessorError(msg) - try: - os.utime(encodeFilename(out_path), (oldest_mtime, oldest_mtime)) - except Exception: - self._downloader.report_warning('Cannot update utime of file') + self.try_utime(out_path, oldest_mtime, oldest_mtime) if self._deletetempfiles: for ipath in input_paths: @@ -284,10 +281,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): # Try to update the date time for extracted audio file. if information.get('filetime') is not None: - try: - os.utime(encodeFilename(new_path), (time.time(), information['filetime'])) - except Exception: - self._downloader.report_warning('Cannot update utime of audio file') + self.try_utime( + new_path, time.time(), information['filetime'], + errnote='Cannot update utime of audio file') information['filepath'] = new_path return self._nopostoverwrites, information From 372f08c99057b6a994c72cf2591e184231b3e850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 8 Apr 2015 22:27:25 +0600 Subject: [PATCH 0283/2721] [theplatform] Fix for python 2.6 At least single depth level extraction... --- youtube_dl/extractor/theplatform.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 2d2178331..50c51d825 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -42,8 +42,8 @@ class ThePlatformIE(InfoExtractor): # rtmp download 'skip_download': True, }, - # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/ }, { + # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/ 'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT', 'info_dict': { 'id': '22d_qsQ6MIRT', @@ -128,7 +128,7 @@ class ThePlatformIE(InfoExtractor): head = meta.find(_x('smil:head')) body = meta.find(_x('smil:body')) - f4m_node = body.find(_x('smil:seq//smil:video')) + f4m_node = body.find(_x('smil:seq//smil:video')) or body.find(_x('smil:seq/smil:video')) if f4m_node is not None and '.f4m' in f4m_node.attrib['src']: f4m_url = f4m_node.attrib['src'] if 'manifest.f4m?' not in f4m_url: @@ -141,7 +141,7 @@ class ThePlatformIE(InfoExtractor): formats = [] switch = body.find(_x('smil:switch')) if switch is None: - switch = body.find(_x('smil:par//smil:switch')) + switch = body.find(_x('smil:par//smil:switch')) or body.find(_x('smil:par/smil:switch')) if switch is None: switch = body.find(_x('smil:par')) if switch is not None: @@ -162,7 +162,7 @@ class ThePlatformIE(InfoExtractor): 'vbr': vbr, }) else: - switch = body.find(_x('smil:seq//smil:switch')) + switch = body.find(_x('smil:seq//smil:switch')) or body.find(_x('smil:seq/smil:switch')) for f in switch.findall(_x('smil:video')): attr = f.attrib vbr = int(attr['system-bitrate']) // 1000 From 402a3efc927538684fb69e23c65a97b19ce4f663 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 8 Apr 2015 22:29:10 +0600 Subject: [PATCH 0284/2721] [theplatform] Modernize --- youtube_dl/extractor/theplatform.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 50c51d825..6a006b2d2 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -17,6 +17,7 @@ from ..utils import ( ExtractorError, xpath_with_ns, unsmuggle_url, + int_or_none, ) _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'}) @@ -148,9 +149,9 @@ class ThePlatformIE(InfoExtractor): base_url = head.find(_x('smil:meta')).attrib['base'] for f in switch.findall(_x('smil:video')): attr = f.attrib - width = int(attr['width']) - height = int(attr['height']) - vbr = int(attr['system-bitrate']) // 1000 + width = int_or_none(attr.get('width')) + height = int_or_none(attr.get('height')) + vbr = int_or_none(attr.get('system-bitrate'), 1000) format_id = '%dx%d_%dk' % (width, height, vbr) formats.append({ 'format_id': format_id, @@ -165,7 +166,7 @@ class ThePlatformIE(InfoExtractor): switch = body.find(_x('smil:seq//smil:switch')) or body.find(_x('smil:seq/smil:switch')) for f in switch.findall(_x('smil:video')): attr = f.attrib - vbr = int(attr['system-bitrate']) // 1000 + vbr = int_or_none(attr.get('system-bitrate'), 1000) ext = determine_ext(attr['src']) if ext == 'once': ext = 'mp4' @@ -184,5 +185,5 @@ class ThePlatformIE(InfoExtractor): 'formats': formats, 'description': info['description'], 'thumbnail': info['defaultThumbnailUrl'], - 'duration': info['duration'] // 1000, + 'duration': int_or_none(info.get('duration'), 1000), } From af76e8174d1f73e0c44e6bead56a780c665e299d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Apr 2015 02:25:31 +0600 Subject: [PATCH 0285/2721] [dailymotion:user] Improve _VALID_URL (Closes #5380) --- youtube_dl/extractor/dailymotion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 47d58330b..7615ecd4b 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -224,7 +224,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): class DailymotionUserIE(DailymotionPlaylistIE): IE_NAME = 'dailymotion:user' - _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)' _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' _TESTS = [{ 'url': 'https://www.dailymotion.com/user/nqtv', From 1dc2726f8da5ec7726e3c45ee718ce4464b83ead Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 9 Apr 2015 00:21:19 +0200 Subject: [PATCH 0286/2721] release 2015.04.09 --- README.md | 194 ++++++++++++++++++++--------------------- docs/supportedsites.md | 5 ++ youtube_dl/version.py | 2 +- 3 files changed, 103 insertions(+), 98 deletions(-) diff --git a/README.md b/README.md index 4f9fc8174..caa1478d9 100644 --- a/README.md +++ b/README.md @@ -45,21 +45,21 @@ which means you can modify it, redistribute it or use it however you like. youtube-dl [OPTIONS] URL [URL...] # OPTIONS - -h, --help print this help text and exit - --version print program version and exit - -U, --update update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed) - -i, --ignore-errors continue on download errors, for example to skip unavailable videos in a playlist + -h, --help Print this help text and exit + --version Print program version and exit + -U, --update Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed) + -i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist --abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs - --dump-user-agent display the current browser identification + --dump-user-agent Display the current browser identification --list-extractors List all supported extractors and the URLs they would handle --extractor-descriptions Output descriptions of all supported extractors - --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". + --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching. --ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows) --flat-playlist Do not extract the videos of a playlist, only list them. - --no-color Do not emit color codes in output. + --no-color Do not emit color codes in output ## Network Options: --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection @@ -71,70 +71,70 @@ which means you can modify it, redistribute it or use it however you like. not present) is used for the actual downloading. (experimental) ## Video Selection: - --playlist-start NUMBER playlist video to start at (default is 1) - --playlist-end NUMBER playlist video to end at (default is last) - --playlist-items ITEM_SPEC playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" + --playlist-start NUMBER Playlist video to start at (default is 1) + --playlist-end NUMBER Playlist video to end at (default is last) + --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13. - --match-title REGEX download only matching titles (regex or caseless sub-string) - --reject-title REGEX skip download for matching titles (regex or caseless sub-string) + --match-title REGEX Download only matching titles (regex or caseless sub-string) + --reject-title REGEX Skip download for matching titles (regex or caseless sub-string) --max-downloads NUMBER Abort after downloading NUMBER files --min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m) --max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m) - --date DATE download only videos uploaded in this date - --datebefore DATE download only videos uploaded on or before this date (i.e. inclusive) - --dateafter DATE download only videos uploaded on or after this date (i.e. inclusive) + --date DATE Download only videos uploaded in this date + --datebefore DATE Download only videos uploaded on or before this date (i.e. inclusive) + --dateafter DATE Download only videos uploaded on or after this date (i.e. inclusive) --min-views COUNT Do not download any videos with less than COUNT views --max-views COUNT Do not download any videos with more than COUNT views - --match-filter FILTER (Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, + --match-filter FILTER Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count <? 50 & description" . - --no-playlist If the URL refers to a video and a playlist, download only the video. - --yes-playlist If the URL refers to a video and a playlist, download the playlist. - --age-limit YEARS download only videos suitable for the given age + --no-playlist Download only the video, if the URL refers to a video and a playlist. + --yes-playlist Download the playlist, if the URL refers to a video and a playlist. + --age-limit YEARS Download only videos suitable for the given age --download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it. --include-ads Download advertisements as well (experimental) ## Download Options: - -r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. 50K or 4.2M) - -R, --retries RETRIES number of retries (default is 10), or "infinite". - --buffer-size SIZE size of download buffer (e.g. 1024 or 16K) (default is 1024) - --no-resize-buffer do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE. + -r, --rate-limit LIMIT Maximum download rate in bytes per second (e.g. 50K or 4.2M) + -R, --retries RETRIES Number of retries (default is 10), or "infinite". + --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) (default is 1024) + --no-resize-buffer Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE. --playlist-reverse Download playlist videos in reverse order - --xattr-set-filesize (experimental) set file xattribute ytdl.filesize with expected filesize - --hls-prefer-native (experimental) Use the native HLS downloader instead of ffmpeg. + --xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental) + --hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental) --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget - --external-downloader-args ARGS Give these arguments to the external downloader. + --external-downloader-args ARGS Give these arguments to the external downloader ## Filesystem Options: - -a, --batch-file FILE file containing URLs to download ('-' for stdin) - --id use only video ID in file name - -o, --output TEMPLATE output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader + -a, --batch-file FILE File containing URLs to download ('-' for stdin) + --id Use only video ID in file name + -o, --output TEMPLATE Output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for - the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube's itags: "137"), + the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like YouTube's itags: "137"), %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id, %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format. %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' . - --autonumber-size NUMBER Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given + --autonumber-size NUMBER Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames - -A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000 - -t, --title [deprecated] use title in file name (default) - -l, --literal [deprecated] alias of --title - -w, --no-overwrites do not overwrite files - -c, --continue force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible. - --no-continue do not resume partially downloaded files (restart from beginning) - --no-part do not use .part files - write directly into output file - --no-mtime do not use the Last-modified header to set the file modification time - --write-description write video description to a .description file - --write-info-json write video metadata to a .info.json file - --write-annotations write video annotations to a .annotation file - --load-info FILE json file containing the video information (created with the "--write-json" option) - --cookies FILE file to read cookies from and dump cookie jar in + -A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000 + -t, --title [deprecated] Use title in file name (default) + -l, --literal [deprecated] Alias of --title + -w, --no-overwrites Do not overwrite files + -c, --continue Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible. + --no-continue Do not resume partially downloaded files (restart from beginning) + --no-part Do not use .part files - write directly into output file + --no-mtime Do not use the Last-modified header to set the file modification time + --write-description Write video description to a .description file + --write-info-json Write video metadata to a .info.json file + --write-annotations Write video annotations to a .annotation file + --load-info FILE JSON file containing the video information (created with the "--write-info-json" option) + --cookies FILE File to read cookies from and dump cookie jar in --cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change. @@ -142,49 +142,49 @@ which means you can modify it, redistribute it or use it however you like. --rm-cache-dir Delete all filesystem cache files ## Thumbnail images: - --write-thumbnail write thumbnail image to disk - --write-all-thumbnails write all thumbnail image formats to disk + --write-thumbnail Write thumbnail image to disk + --write-all-thumbnails Write all thumbnail image formats to disk --list-thumbnails Simulate and list all available thumbnail formats ## Verbosity / Simulation Options: - -q, --quiet activates quiet mode + -q, --quiet Activate quiet mode --no-warnings Ignore warnings - -s, --simulate do not download the video and do not write anything to disk - --skip-download do not download the video - -g, --get-url simulate, quiet but print URL - -e, --get-title simulate, quiet but print title - --get-id simulate, quiet but print id - --get-thumbnail simulate, quiet but print thumbnail URL - --get-description simulate, quiet but print video description - --get-duration simulate, quiet but print video length - --get-filename simulate, quiet but print output filename - --get-format simulate, quiet but print output format - -j, --dump-json simulate, quiet but print JSON information. See --output for a description of available keys. - -J, --dump-single-json simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist + -s, --simulate Do not download the video and do not write anything to disk + --skip-download Do not download the video + -g, --get-url Simulate, quiet but print URL + -e, --get-title Simulate, quiet but print title + --get-id Simulate, quiet but print id + --get-thumbnail Simulate, quiet but print thumbnail URL + --get-description Simulate, quiet but print video description + --get-duration Simulate, quiet but print video length + --get-filename Simulate, quiet but print output filename + --get-format Simulate, quiet but print output format + -j, --dump-json Simulate, quiet but print JSON information. See --output for a description of available keys. + -J, --dump-single-json Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line. --print-json Be quiet and print the video information as JSON (video is still being downloaded). - --newline output progress bar as new lines - --no-progress do not print progress bar - --console-title display progress in console titlebar - -v, --verbose print various debugging information - --dump-pages print downloaded pages to debug problems (very verbose) + --newline Output progress bar as new lines + --no-progress Do not print progress bar + --console-title Display progress in console titlebar + -v, --verbose Print various debugging information + --dump-pages Print downloaded pages to debug problems (very verbose) --write-pages Write downloaded intermediary pages to files in the current directory to debug problems --print-traffic Display sent and read HTTP traffic - -C, --call-home Contact the youtube-dl server for debugging. - --no-call-home Do NOT contact the youtube-dl server for debugging. + -C, --call-home Contact the youtube-dl server for debugging + --no-call-home Do NOT contact the youtube-dl server for debugging ## Workarounds: --encoding ENCODING Force the specified encoding (experimental) - --no-check-certificate Suppress HTTPS certificate validation. + --no-check-certificate Suppress HTTPS certificate validation --prefer-insecure Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube) - --user-agent UA specify a custom user agent - --referer URL specify a custom referer, use if the video access is restricted to one domain - --add-header FIELD:VALUE specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times + --user-agent UA Specify a custom user agent + --referer URL Specify a custom referer, use if the video access is restricted to one domain + --add-header FIELD:VALUE Specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times --bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH --sleep-interval SECONDS Number of seconds to sleep before each download. ## Video Format Options: - -f, --format FORMAT video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by + -f, --format FORMAT Video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", "worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]"). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, @@ -194,44 +194,44 @@ which means you can modify it, redistribute it or use it however you like. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f bestvideo+bestaudio. - --all-formats download all available video formats - --prefer-free-formats prefer free video formats unless a specific one is requested - --max-quality FORMAT highest quality format to download - -F, --list-formats list all available formats + --all-formats Download all available video formats + --prefer-free-formats Prefer free video formats unless a specific one is requested + --max-quality FORMAT Highest quality format to download + -F, --list-formats List all available formats --youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos --merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no merge is required ## Subtitle Options: - --write-sub write subtitle file - --write-auto-sub write automatic subtitle file (youtube only) - --all-subs downloads all the available subtitles of the video - --list-subs lists all available subtitles for the video - --sub-format FORMAT subtitle format, accepts formats preference, for example: "ass/srt/best" - --sub-lang LANGS languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt' + --write-sub Write subtitle file + --write-auto-sub Write automatic subtitle file (YouTube only) + --all-subs Download all the available subtitles of the video + --list-subs List all available subtitles for the video + --sub-format FORMAT Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best" + --sub-lang LANGS Languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt' ## Authentication Options: - -u, --username USERNAME login with this account ID - -p, --password PASSWORD account password. If this option is left out, youtube-dl will ask interactively. - -2, --twofactor TWOFACTOR two-factor auth code - -n, --netrc use .netrc authentication data - --video-password PASSWORD video password (vimeo, smotri) + -u, --username USERNAME Login with this account ID + -p, --password PASSWORD Account password. If this option is left out, youtube-dl will ask interactively. + -2, --twofactor TWOFACTOR Two-factor auth code + -n, --netrc Use .netrc authentication data + --video-password PASSWORD Video password (vimeo, smotri) ## Post-processing Options: - -x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe) - --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default - --audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K - (default 5) + -x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe) + --audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default + --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default + 5) --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv) - -k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default - --no-post-overwrites do not overwrite post-processed files; the post-processed files are overwritten by default - --embed-subs embed subtitles in the video (only for mp4 videos) - --embed-thumbnail embed thumbnail in the audio as cover art - --add-metadata write metadata to the video file - --metadata-from-title FORMAT parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed + -k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default + --no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default + --embed-subs Embed subtitles in the video (only for mp4 videos) + --embed-thumbnail Embed thumbnail in the audio as cover art + --add-metadata Write metadata to the video file + --metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise" - --xattrs write metadata to the video file's xattrs (using dublin core and xdg standards) + --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards) --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; fix file if we can, warn otherwise) --prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2785b9587..c85a39918 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -166,6 +166,7 @@ - **Gamekings** - **GameOne** - **gameone:playlist** + - **Gamersyde** - **GameSpot** - **GameStar** - **Gametrailers** @@ -351,6 +352,7 @@ - **PornHub** - **PornHubPlaylist** - **Pornotube** + - **PornoVoisines** - **PornoXO** - **PrimeShareTV** - **PromptFile** @@ -362,6 +364,7 @@ - **radio.de** - **radiobremen** - **radiofrance** + - **RadioJavan** - **Rai** - **RBMARadio** - **RedTube** @@ -422,6 +425,7 @@ - **southpark.cc.com** - **southpark.de** - **Space** + - **SpankBang** - **Spankwire** - **Spiegel** - **Spiegel:Article**: Articles on spiegel.de @@ -501,6 +505,7 @@ - **Ubu** - **udemy** - **udemy:course** + - **UDNEmbed** - **Ultimedia** - **Unistra** - **Urort**: NRK P3 Urørt diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e1c385bec..1095fea2f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.04.03' +__version__ = '2015.04.09' From ce73839fe4bc1ac43d7a6540df040139f82948b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 9 Apr 2015 14:01:33 +0200 Subject: [PATCH 0287/2721] [rtve] Detect videos that are no longer available --- youtube_dl/extractor/rtve.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 8d9be1b98..849300140 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -8,6 +8,7 @@ import time from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( + ExtractorError, float_or_none, remove_end, std_headers, @@ -98,6 +99,8 @@ class RTVEALaCartaIE(InfoExtractor): info = self._download_json( 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, video_id)['page']['items'][0] + if info['state'] == 'DESPU': + raise ExtractorError('The video is no longer available', expected=True) png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id) png = self._download_webpage(png_url, video_id, 'Downloading url information') video_url = _decrypt_url(png) From aa2af7ba7469370f987aa178ac031fdad00aae3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Apr 2015 19:53:00 +0600 Subject: [PATCH 0288/2721] [dumpert] Add nsfw cookie (Closes #5382) --- youtube_dl/extractor/dumpert.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py index e43bc81b2..9c594b757 100644 --- a/youtube_dl/extractor/dumpert.py +++ b/youtube_dl/extractor/dumpert.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import base64 from .common import InfoExtractor +from ..compat import compat_urllib_request from ..utils import qualities @@ -23,7 +24,10 @@ class DumpertIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'nsfw=1') + webpage = self._download_webpage(req, video_id) files_base64 = self._search_regex( r'data-files="([^"]+)"', webpage, 'data files') From 6ac41a4ef50029fa3b0eee5083805d0133a4d1dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Apr 2015 22:32:22 +0600 Subject: [PATCH 0289/2721] [vine] Zero rate videos is perfectly valid (#5389) --- youtube_dl/extractor/vine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index c3187cfeb..353791e1d 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -37,7 +37,7 @@ class VineIE(InfoExtractor): 'vcodec': f['format'], 'quality': f['rate'], 'url': f['videoUrl'], - } for f in data['videoUrls'] if f.get('rate')] + } for f in data['videoUrls']] self._sort_formats(formats) From 58a9f1b86404e3a6c26c7143cb408db793aa5946 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Apr 2015 22:32:48 +0600 Subject: [PATCH 0290/2721] [vine] Fix post data regex (Closes #5389) --- youtube_dl/extractor/vine.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 353791e1d..13c13152e 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -30,7 +30,8 @@ class VineIE(InfoExtractor): webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id) data = json.loads(self._html_search_regex( - r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data')) + r'window\.POST_DATA = { %s: ({.+?}) };\s*</script>' % video_id, + webpage, 'vine data')) formats = [{ 'format_id': '%(format)s-%(rate)s' % f, From 3359fb661fdf3f9308611dc10c5b579b7a7ef56e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Apr 2015 22:37:54 +0600 Subject: [PATCH 0291/2721] [vine] Add tests for #5389 --- youtube_dl/extractor/vine.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 13c13152e..804adbdb0 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -10,7 +10,7 @@ from ..utils import unified_strdate class VineIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)' - _TEST = { + _TESTS = [{ 'url': 'https://vine.co/v/b9KOOWX7HUx', 'md5': '2f36fed6235b16da96ce9b4dc890940d', 'info_dict': { @@ -23,7 +23,33 @@ class VineIE(InfoExtractor): 'uploader': 'Jack Dorsey', 'uploader_id': '76', }, - } + }, { + 'url': 'https://vine.co/v/MYxVapFvz2z', + 'md5': '7b9a7cbc76734424ff942eb52c8f1065', + 'info_dict': { + 'id': 'MYxVapFvz2z', + 'ext': 'mp4', + 'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14', + 'alt_title': 'Vine by Luna', + 'description': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14', + 'upload_date': '20140815', + 'uploader': 'Luna', + 'uploader_id': '1102363502380728320', + }, + }, { + 'url': 'https://vine.co/v/bxVjBbZlPUH', + 'md5': 'ea27decea3fa670625aac92771a96b73', + 'info_dict': { + 'id': 'bxVjBbZlPUH', + 'ext': 'mp4', + 'title': '#mw3 #ac130 #killcam #angelofdeath', + 'alt_title': 'Vine by Z3k3', + 'description': '#mw3 #ac130 #killcam #angelofdeath', + 'upload_date': '20130430', + 'uploader': 'Z3k3', + 'uploader_id': '936470460173008896', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) From 64f1aba8f1a6dba88e0dd0edc799fee978c7ce76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Apr 2015 22:40:18 +0600 Subject: [PATCH 0292/2721] [vine] Extend _VALID_URL --- youtube_dl/extractor/vine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 804adbdb0..a4d5af147 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -9,7 +9,7 @@ from ..utils import unified_strdate class VineIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)' + _VALID_URL = r'https?://(?:www\.)?vine\.co/(?:v|oembed)/(?P<id>\w+)' _TESTS = [{ 'url': 'https://vine.co/v/b9KOOWX7HUx', 'md5': '2f36fed6235b16da96ce9b4dc890940d', @@ -49,6 +49,9 @@ class VineIE(InfoExtractor): 'uploader': 'Z3k3', 'uploader_id': '936470460173008896', }, + }, { + 'url': 'https://vine.co/oembed/MYxVapFvz2z.json', + 'only_matching': True, }] def _real_extract(self, url): From 4c4780c25e20d6ad281faff5d6aede3df4b58f66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 9 Apr 2015 22:41:41 +0600 Subject: [PATCH 0293/2721] [vine] Modernize --- youtube_dl/extractor/vine.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index a4d5af147..d4f5a991e 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -58,9 +58,11 @@ class VineIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id) - data = json.loads(self._html_search_regex( - r'window\.POST_DATA = { %s: ({.+?}) };\s*</script>' % video_id, - webpage, 'vine data')) + data = self._parse_json( + self._html_search_regex( + r'window\.POST_DATA = { %s: ({.+?}) };\s*</script>' % video_id, + webpage, 'vine data'), + video_id) formats = [{ 'format_id': '%(format)s-%(rate)s' % f, From ce9f47de99f3f607ad940897e457d02ea084795d Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Thu, 9 Apr 2015 23:54:53 +0300 Subject: [PATCH 0294/2721] [teamcoco] Fix extraction --- youtube_dl/extractor/teamcoco.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index a46a7ecba..b3cfe4e4f 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -4,7 +4,10 @@ import base64 import re from .common import InfoExtractor -from ..utils import qualities +from ..utils import ( + ExtractorError, + qualities, +) class TeamcocoIE(InfoExtractor): @@ -49,14 +52,12 @@ class TeamcocoIE(InfoExtractor): video_id = self._html_search_regex( self._VIDEO_ID_REGEXES, webpage, 'video id') - embed_url = 'http://teamcoco.com/embed/v/%s' % video_id - embed = self._download_webpage( - embed_url, video_id, 'Downloading embed page') - - player_data = self._parse_json(self._search_regex( - r'Y\.Ginger\.Module\.Player(?:;var\s*player\s*=\s*new\s*m)?\((\{.*?\})\);', embed, 'player data'), video_id) + preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage) + if not preloads: + raise ExtractorError('Preload information could not be extracted') + preload = max([(len(p), p) for p in preloads])[1] data = self._parse_json( - base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id) + base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id) formats = [] get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) From 5bb6328cb944f67f7981eddc40d92998a153f00d Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Thu, 9 Apr 2015 23:57:51 +0300 Subject: [PATCH 0295/2721] [teamcoco] Extract m3u8 URLs --- youtube_dl/extractor/teamcoco.py | 34 ++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index b3cfe4e4f..41677503c 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -62,23 +62,27 @@ class TeamcocoIE(InfoExtractor): formats = [] get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) for filed in data['files']: - m_format = re.search(r'(\d+(k|p))\.mp4', filed['url']) - if m_format is not None: - format_id = m_format.group(1) + if filed['type'] == 'hls': + formats.extend(self._extract_m3u8_formats( + filed['url'], video_id, ext='mp4')) else: - format_id = filed['bitrate'] - tbr = ( - int(filed['bitrate']) - if filed['bitrate'].isdigit() - else None) + m_format = re.search(r'(\d+(k|p))\.mp4', filed['url']) + if m_format is not None: + format_id = m_format.group(1) + else: + format_id = filed['bitrate'] + tbr = ( + int(filed['bitrate']) + if filed['bitrate'].isdigit() + else None) - formats.append({ - 'url': filed['url'], - 'ext': 'mp4', - 'tbr': tbr, - 'format_id': format_id, - 'quality': get_quality(format_id), - }) + formats.append({ + 'url': filed['url'], + 'ext': 'mp4', + 'tbr': tbr, + 'format_id': format_id, + 'quality': get_quality(format_id), + }) self._sort_formats(formats) From 7088f5b5fa07381ec9d484c8ef83616724654e3f Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Fri, 10 Apr 2015 02:03:38 +0300 Subject: [PATCH 0296/2721] [teamcoco] Extract duration --- youtube_dl/extractor/teamcoco.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 41677503c..1caf08cb7 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -21,6 +21,7 @@ class TeamcocoIE(InfoExtractor): 'ext': 'mp4', 'title': 'Conan Becomes A Mary Kay Beauty Consultant', 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.', + 'duration': 504, 'age_limit': 0, } }, { @@ -31,6 +32,7 @@ class TeamcocoIE(InfoExtractor): 'ext': 'mp4', 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.', 'title': 'Louis C.K. Interview Pt. 1 11/3/11', + 'duration': 288, 'age_limit': 0, } } @@ -93,5 +95,6 @@ class TeamcocoIE(InfoExtractor): 'title': data['title'], 'thumbnail': data.get('thumb', {}).get('href'), 'description': data.get('teaser'), + 'duration': data.get('duration'), 'age_limit': self._family_friendly_search(webpage), } From 8749477ed0a3cbc85d1726b6526fa5e794ce6072 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 10 Apr 2015 22:27:16 +0600 Subject: [PATCH 0297/2721] [rai] Fix extraction (Closes #5396) --- youtube_dl/extractor/rai.py | 72 ++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 144e33982..115cc64cc 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -13,7 +13,7 @@ from ..utils import ( class RaiIE(InfoExtractor): - _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' + _VALID_URL = r'(?P<url>(?P<host>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it))/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' _TESTS = [ { 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', @@ -64,32 +64,65 @@ class RaiIE(InfoExtractor): }, ] + def _extract_relinker_url(self, webpage): + return self._proto_relative_url(self._search_regex( + [r'name="videourl" content="([^"]+)"', r'var\s+videoURL(?:_MP4)?\s*=\s*"([^"]+)"'], + webpage, 'relinker url', default=None)) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + host = mobj.group('host') - media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON') + webpage = self._download_webpage(url, video_id) - title = media.get('name') - description = media.get('desc') - thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image') - duration = parse_duration(media.get('length')) - uploader = media.get('author') - upload_date = unified_strdate(media.get('date')) + relinker_url = self._extract_relinker_url(webpage) - formats = [] + if not relinker_url: + iframe_path = self._search_regex( + r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"', + webpage, 'iframe') + iframe_page = self._download_webpage( + '%s/%s' % (host, iframe_path), video_id) + relinker_url = self._extract_relinker_url(iframe_page) - for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']: - media_url = media.get(format_id) - if not media_url: - continue - formats.append({ + relinker = self._download_json( + '%s&output=47' % relinker_url, video_id) + + media_url = relinker['video'][0] + ct = relinker.get('ct') + if ct == 'f4m': + formats = self._extract_f4m_formats( + media_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id) + else: + formats = [{ 'url': media_url, - 'format_id': format_id, - 'ext': 'mp4', - }) + 'format_id': ct, + }] - subtitles = self.extract_subtitles(video_id, url) + json_link = self._html_search_meta( + 'jsonlink', webpage, 'JSON link', default=None) + if json_link: + media = self._download_json( + host + json_link, video_id, 'Downloading video JSON') + title = media.get('name') + description = media.get('desc') + thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image') + duration = parse_duration(media.get('length')) + uploader = media.get('author') + upload_date = unified_strdate(media.get('date')) + else: + title = self._search_regex( + r'var\s+videoTitolo\s*=\s*"([^"]+)";', + webpage, 'title', default=None) or self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + duration = None + uploader = self._html_search_meta('Editore', webpage, 'uploader') + upload_date = unified_strdate(self._html_search_meta( + 'item-date', webpage, 'upload date')) + + subtitles = self.extract_subtitles(video_id, webpage) return { 'id': video_id, @@ -103,8 +136,7 @@ class RaiIE(InfoExtractor): 'subtitles': subtitles, } - def _get_subtitles(self, video_id, url): - webpage = self._download_webpage(url, video_id) + def _get_subtitles(self, video_id, webpage): subtitles = {} m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage) if m: From d7c78decb0fb4adcf84e5e74cd0d4f858d48e2ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 10 Apr 2015 22:41:26 +0600 Subject: [PATCH 0298/2721] [rai] Improve extraction --- youtube_dl/extractor/rai.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 115cc64cc..12127c634 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -82,9 +82,9 @@ class RaiIE(InfoExtractor): iframe_path = self._search_regex( r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"', webpage, 'iframe') - iframe_page = self._download_webpage( + webpage = self._download_webpage( '%s/%s' % (host, iframe_path), video_id) - relinker_url = self._extract_relinker_url(iframe_page) + relinker_url = self._extract_relinker_url(webpage) relinker = self._download_json( '%s&output=47' % relinker_url, video_id) @@ -112,15 +112,15 @@ class RaiIE(InfoExtractor): uploader = media.get('author') upload_date = unified_strdate(media.get('date')) else: - title = self._search_regex( - r'var\s+videoTitolo\s*=\s*"([^"]+)";', - webpage, 'title', default=None) or self._og_search_title(webpage) + title = (self._search_regex( + r'var\s+videoTitolo\s*=\s*"(.+?)";', + webpage, 'title', default=None) or self._og_search_title(webpage)).replace('\\"', '"') description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) duration = None uploader = self._html_search_meta('Editore', webpage, 'uploader') upload_date = unified_strdate(self._html_search_meta( - 'item-date', webpage, 'upload date')) + 'item-date', webpage, 'upload date', default=None)) subtitles = self.extract_subtitles(video_id, webpage) From cd47a628fcfb4ec49d0559bdedf792d99a52d53d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 10 Apr 2015 22:41:52 +0600 Subject: [PATCH 0299/2721] [rai] Add test for #5396 --- youtube_dl/extractor/rai.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 12127c634..1631faf29 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -62,6 +62,17 @@ class RaiIE(InfoExtractor): 'description': 'Edizione delle ore 20:30 ', } }, + { + 'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html', + 'md5': '02b64456f7cc09f96ff14e7dd489017e', + 'info_dict': { + 'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6', + 'ext': 'flv', + 'title': 'Il Candidato - Primo episodio: "Le Primarie"', + 'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!', + 'uploader': 'RaiTre', + } + } ] def _extract_relinker_url(self, webpage): From 66ee7b3234482ddcb24849d3ab64db382fff5bfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 10 Apr 2015 23:36:28 +0600 Subject: [PATCH 0300/2721] [ted] Extract all formats (Closes #5397) --- youtube_dl/extractor/ted.py | 51 +++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 4cec06f8b..a2dc14c2b 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -5,9 +5,8 @@ import re from .common import InfoExtractor -from ..compat import ( - compat_str, -) +from ..compat import compat_str +from ..utils import int_or_none class TEDIE(InfoExtractor): @@ -170,17 +169,41 @@ class TEDIE(InfoExtractor): finfo = self._NATIVE_FORMATS.get(f['format_id']) if finfo: f.update(finfo) - else: - # Use rtmp downloads - formats = [{ - 'format_id': f['name'], - 'url': talk_info['streamer'], - 'play_path': f['file'], - 'ext': 'flv', - 'width': f['width'], - 'height': f['height'], - 'tbr': f['bitrate'], - } for f in talk_info['resources']['rtmp']] + + for format_id, resources in talk_info['resources'].items(): + if format_id == 'h264': + for resource in resources: + bitrate = int_or_none(resource.get('bitrate')) + formats.append({ + 'url': resource['file'], + 'format_id': '%s-%sk' % (format_id, bitrate), + 'tbr': bitrate, + }) + elif format_id == 'rtmp': + streamer = talk_info.get('streamer') + if not streamer: + continue + for resource in resources: + formats.append({ + 'format_id': '%s-%s' % (format_id, resource.get('name')), + 'url': streamer, + 'play_path': resource['file'], + 'ext': 'flv', + 'width': int_or_none(resource.get('width')), + 'height': int_or_none(resource.get('height')), + 'tbr': int_or_none(resource.get('bitrate')), + }) + elif format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + resources.get('stream'), video_name, 'mp4', m3u8_id=format_id)) + + audio_download = talk_info.get('audioDownload') + if audio_download: + formats.append({ + 'url': audio_download, + 'format_id': 'audio', + }) + self._sort_formats(formats) video_id = compat_str(talk_info['id']) From 65939effb55087f584ecd5d4b304eadbdef875d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Apr 2015 18:52:41 +0600 Subject: [PATCH 0301/2721] [hitbox:live] Fix hls extration (Closes #5315) --- youtube_dl/extractor/hitbox.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py index 84bd7c080..d606429ca 100644 --- a/youtube_dl/extractor/hitbox.py +++ b/youtube_dl/extractor/hitbox.py @@ -10,6 +10,7 @@ from ..utils import ( float_or_none, int_or_none, compat_str, + determine_ext, ) @@ -147,12 +148,27 @@ class HitboxLiveIE(HitboxIE): servers.append(base_url) for stream in cdn.get('bitrates'): label = stream.get('label') - if label != 'Auto': + if label == 'Auto': + continue + stream_url = stream.get('url') + if not stream_url: + continue + bitrate = int_or_none(stream.get('bitrate')) + if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8': + if not stream_url.startswith('http'): + continue formats.append({ - 'url': '%s/%s' % (base_url, stream.get('url')), + 'url': stream_url, 'ext': 'mp4', - 'vbr': stream.get('bitrate'), - 'resolution': label, + 'tbr': bitrate, + 'format_note': label, + 'rtmp_live': True, + }) + else: + formats.append({ + 'url': '%s/%s' % (base_url, stream_url), + 'ext': 'mp4', + 'tbr': bitrate, 'rtmp_live': True, 'format_note': host, 'page_url': url, From 7a91d1fc43f8873e6636c316006c908d7efe2f07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Apr 2015 20:03:12 +0600 Subject: [PATCH 0302/2721] [crooksandliars] Improve embed extractor and remove article extractor --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/crooksandliars.py | 87 +++++++++++--------------- 2 files changed, 39 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 894aa5b43..bbf3be41d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -90,7 +90,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .condenast import CondeNastIE from .cracked import CrackedIE from .criterion import CriterionIE -from .crooksandliars import CrooksAndLiarsIE, CrooksAndLiarsArticleIE +from .crooksandliars import CrooksAndLiarsIE from .crunchyroll import ( CrunchyrollIE, CrunchyrollShowPlaylistIE diff --git a/youtube_dl/extractor/crooksandliars.py b/youtube_dl/extractor/crooksandliars.py index cee0603f4..143509004 100644 --- a/youtube_dl/extractor/crooksandliars.py +++ b/youtube_dl/extractor/crooksandliars.py @@ -1,71 +1,60 @@ from __future__ import unicode_literals -import json - from .common import InfoExtractor from ..utils import ( - mimetype2ext, + int_or_none, + qualities, ) class CrooksAndLiarsIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//embed.crooksandliars.com/embed/(?P<id>[A-Za-z0-9]+)(?:$|[?#])' - + _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)' _TESTS = [{ 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi', 'info_dict': { - 'id': 'https://embed.crooksandliars.com/embed/8RUoRhRi', + 'id': '8RUoRhRi', + 'ext': 'mp4', 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!", - 'description': "Fox News, Fox & Friends Weekend, April 4, 2015. Read more... http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists", + 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', + 'thumbnail': 're:^https?://.*\.jpg', 'timestamp': 1428207000, - 'thumbnail': 'https://crooksandliars.com/files/mediaposters/2015/04/31235.jpg?ts=1428207050', - 'uploader': "Heather", + 'upload_date': '20150405', + 'uploader': 'Heather', + 'duration': 236, } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - manifest = json.loads(self._html_search_regex(r'var manifest = ({.*?})\n', webpage, 'manifest JSON')) - - formats = [] - for item in manifest['flavors']: - if not item['mime'].startswith('video/'): # XXX: or item['exclude']? - continue - formats.append({ - 'format_id': item['type'], - 'ext': mimetype2ext(item['mime']), - 'url': item['url'], - }) - - # XXX: manifest['url']? - return { - 'url': url, - 'id': video_id, - 'uploader': manifest['author'], - 'title': manifest['title'], - 'description': manifest['description'], - 'thumbnail': self._proto_relative_url(manifest['poster']), - 'duration': manifest['duration'], - 'timestamp': int(manifest['created']), - 'formats': formats, - } - -class CrooksAndLiarsArticleIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//crooksandliars.com/\d+/\d+/(?P<id>[a-z\-]+)(?:/|$)' - - _TESTS = [{ - 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists', + }, { + 'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - player_url = self._proto_relative_url(self._html_search_regex(r'<iframe src="(//embed.crooksandliars.com/.*)"', webpage, 'embedded player')) + + webpage = self._download_webpage( + 'http://embed.crooksandliars.com/embed/%s' % video_id, video_id) + + manifest = self._parse_json( + self._search_regex( + r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'), + video_id) + + quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high')) + + formats = [{ + 'url': item['url'], + 'format_id': item['type'], + 'quality': quality(item['type']), + } for item in manifest['flavors'] if item['mime'].startswith('video/')] + self._sort_formats(formats) return { - '_type': 'url', - 'url': player_url + 'url': url, + 'id': video_id, + 'title': manifest['title'], + 'description': manifest.get('description'), + 'thumbnail': self._proto_relative_url(manifest.get('poster')), + 'timestamp': int_or_none(manifest.get('created')), + 'uploader': manifest.get('author'), + 'duration': int_or_none(manifest.get('duration')), + 'formats': formats, } From 18153f1b3249701a82c24ec492f5d58929292caf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Apr 2015 20:20:20 +0600 Subject: [PATCH 0303/2721] [generic] Add support for Crooks and Liars embeds --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6c212efac..0dc52ed23 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1275,6 +1275,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin') + # Look for Crooks and Liars embeds + mobj = re.search( + r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for NBC Sports VPlayer embeds nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) if nbc_sports_url: From a4257017ef2ee665d35dd71905db03ca3913c92e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Apr 2015 20:26:42 +0600 Subject: [PATCH 0304/2721] [generic] Add tests for Crooks and Liars embeds --- youtube_dl/extractor/generic.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0dc52ed23..7ad555e9f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -642,6 +642,32 @@ class GenericIE(InfoExtractor): 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } }, + # Crooks and Liars embed + { + 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists', + 'info_dict': { + 'id': '8RUoRhRi', + 'ext': 'mp4', + 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!", + 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', + 'timestamp': 1428207000, + 'upload_date': '20150405', + 'uploader': 'Heather', + }, + }, + # Crooks and Liars external embed + { + 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/', + 'info_dict': { + 'id': 'MTE3MjUtMzQ2MzA', + 'ext': 'mp4', + 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5', + 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec', + 'timestamp': 1265032391, + 'upload_date': '20100201', + 'uploader': 'Heather', + }, + }, # NBC Sports vplayer embed { 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a', From ed5641e2499de9dfbfa78c6684e92e9581402f16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 11 Apr 2015 20:27:39 +0600 Subject: [PATCH 0305/2721] [crooksandliars] Quotes consistency --- youtube_dl/extractor/crooksandliars.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crooksandliars.py b/youtube_dl/extractor/crooksandliars.py index 143509004..443eb7691 100644 --- a/youtube_dl/extractor/crooksandliars.py +++ b/youtube_dl/extractor/crooksandliars.py @@ -14,7 +14,7 @@ class CrooksAndLiarsIE(InfoExtractor): 'info_dict': { 'id': '8RUoRhRi', 'ext': 'mp4', - 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!", + 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', 'thumbnail': 're:^https?://.*\.jpg', 'timestamp': 1428207000, From b04b94da5fbe944e2a9d2946ab8b3acb212f9f70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Apr 2015 03:57:56 +0600 Subject: [PATCH 0306/2721] [options] Fix file based configurations for python 2 (Closes #5401) --- youtube_dl/options.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 5720fb424..11603f60d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -794,21 +794,22 @@ def parseOpts(overrideArguments=None): if opts.verbose: write_string('[debug] Override config: ' + repr(overrideArguments) + '\n') else: - command_line_conf = sys.argv[1:] - # Workaround for Python 2.x, where argv is a byte list - if sys.version_info < (3,): - command_line_conf = [ - a.decode(preferredencoding(), 'replace') for a in command_line_conf] + def compat_conf(conf): + if sys.version_info < (3,): + return [a.decode(preferredencoding(), 'replace') for a in conf] + return conf + + command_line_conf = compat_conf(sys.argv[1:]) if '--ignore-config' in command_line_conf: system_conf = [] user_conf = [] else: - system_conf = _readOptions('/etc/youtube-dl.conf') + system_conf = compat_conf(_readOptions('/etc/youtube-dl.conf')) if '--ignore-config' in system_conf: user_conf = [] else: - user_conf = _readUserConf() + user_conf = compat_conf(_readUserConf()) argv = system_conf + user_conf + command_line_conf opts, args = parser.parse_args(argv) From 7d2ba6394c489879578e316e533eeb282942e54b Mon Sep 17 00:00:00 2001 From: snipem <mail@matthias-kuech.de> Date: Sun, 12 Apr 2015 11:23:01 +0200 Subject: [PATCH 0307/2721] [FootyRoom] Fixed missing http prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some reason FootyTube is missing the „http:“ prefix on some Playwire links for some videos --- youtube_dl/extractor/footyroom.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py index 2b4691ae8..dc784db96 100644 --- a/youtube_dl/extractor/footyroom.py +++ b/youtube_dl/extractor/footyroom.py @@ -6,14 +6,25 @@ from .common import InfoExtractor class FootyRoomIE(InfoExtractor): _VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)' - _TEST = { + _TESTS = [{ 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/', + 'md5': 'f38d400d32f19724570040d5ce3a505f', 'info_dict': { 'id': 'schalke-04-0-2-real-madrid-2015-02', 'title': 'Schalke 04 0 – 2 Real Madrid', }, 'playlist_count': 3, - } + }, + { + 'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/', + 'info_dict': { + 'id': 'georgia-0-2-germany-2015-03', + 'title': 'Georgia 0 – 2 Germany', + }, + 'playlist_count': 1, + }, + + ] def _real_extract(self, url): playlist_id = self._match_id(url) @@ -35,6 +46,8 @@ class FootyRoomIE(InfoExtractor): playwire_url = self._search_regex( r'data-config="([^"]+)"', payload, 'playwire url', default=None) + if not playwire_url.startswith("http:"): + playwire_url = "http:" + playwire_url if playwire_url: entries.append(self.url_result(playwire_url, 'Playwire')) From 9a4d8fae82f10afe8b2d0611f2f054af60dc7acc Mon Sep 17 00:00:00 2001 From: snipem <mail@matthias-kuech.de> Date: Sun, 12 Apr 2015 11:31:58 +0200 Subject: [PATCH 0308/2721] [FootyTube] Fixed wrong md5 checksum --- youtube_dl/extractor/footyroom.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py index dc784db96..70dfb0659 100644 --- a/youtube_dl/extractor/footyroom.py +++ b/youtube_dl/extractor/footyroom.py @@ -8,7 +8,6 @@ class FootyRoomIE(InfoExtractor): _VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/', - 'md5': 'f38d400d32f19724570040d5ce3a505f', 'info_dict': { 'id': 'schalke-04-0-2-real-madrid-2015-02', 'title': 'Schalke 04 0 – 2 Real Madrid', From 504c1cedfeb1562b089fa83ca258966e22acc773 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Apr 2015 17:09:52 +0600 Subject: [PATCH 0309/2721] [footyroom] Improve --- youtube_dl/extractor/footyroom.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py index 70dfb0659..385a99b13 100644 --- a/youtube_dl/extractor/footyroom.py +++ b/youtube_dl/extractor/footyroom.py @@ -45,9 +45,8 @@ class FootyRoomIE(InfoExtractor): playwire_url = self._search_regex( r'data-config="([^"]+)"', payload, 'playwire url', default=None) - if not playwire_url.startswith("http:"): - playwire_url = "http:" + playwire_url if playwire_url: - entries.append(self.url_result(playwire_url, 'Playwire')) + entries.append(self.url_result(self._proto_relative_url( + playwire_url, 'http:'), 'Playwire')) return self.playlist_result(entries, playlist_id, playlist_title) From ac58e68bc396904b8e7afa21c9d65a55d2a4852b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Apr 2015 17:11:11 +0600 Subject: [PATCH 0310/2721] [footyroom] Remove superfluous whitespace --- youtube_dl/extractor/footyroom.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py index 385a99b13..4c7dbca40 100644 --- a/youtube_dl/extractor/footyroom.py +++ b/youtube_dl/extractor/footyroom.py @@ -13,17 +13,14 @@ class FootyRoomIE(InfoExtractor): 'title': 'Schalke 04 0 – 2 Real Madrid', }, 'playlist_count': 3, - }, - { + }, { 'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/', 'info_dict': { 'id': 'georgia-0-2-germany-2015-03', 'title': 'Georgia 0 – 2 Germany', }, 'playlist_count': 1, - }, - - ] + }] def _real_extract(self, url): playlist_id = self._match_id(url) From e91b2d14e3100801269ec9e5e853fd8a21c29443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Apr 2015 17:17:31 +0600 Subject: [PATCH 0311/2721] Credit @snipem for gamersyde (#5352) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 9c65dc1d4..db3f42b26 100644 --- a/AUTHORS +++ b/AUTHORS @@ -122,3 +122,4 @@ Joram Schrijver Will W. Mohammad Teimori Pabandi Roman Le Négrate +Matthias Küch From c36a95954947fd5d9fb8df2539115a7f9a6b3a59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Apr 2015 17:36:29 +0600 Subject: [PATCH 0312/2721] [YoutubeDL] Try to download worst audio+video served by a single file first (Closes #5408) --- youtube_dl/YoutubeDL.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 640b8c99d..a68b24ab4 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -919,6 +919,11 @@ class YoutubeDL(object): if format_spec == 'best' or format_spec is None: return available_formats[-1] elif format_spec == 'worst': + audiovideo_formats = [ + f for f in available_formats + if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] + if audiovideo_formats: + return audiovideo_formats[0] return available_formats[0] elif format_spec == 'bestaudio': audio_formats = [ From 830d53bfae7a665b55656dd50c9f35f0d0b0161d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Apr 2015 23:11:47 +0600 Subject: [PATCH 0313/2721] [utils] Add `video_title` for `url_result` --- youtube_dl/extractor/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 530c449c1..8ed97f8dd 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -492,7 +492,7 @@ class InfoExtractor(object): # Methods for following #608 @staticmethod - def url_result(url, ie=None, video_id=None): + def url_result(url, ie=None, video_id=None, video_title=None): """Returns a url that points to a page that should be processed""" # TODO: ie should be the class used for getting the info video_info = {'_type': 'url', @@ -500,6 +500,8 @@ class InfoExtractor(object): 'ie_key': ie} if video_id is not None: video_info['id'] = video_id + if video_title is not None: + video_info['title'] = video_title return video_info @staticmethod From fb69240ca0934299583bf6c7a855d5c602a4a7e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 12 Apr 2015 23:19:00 +0600 Subject: [PATCH 0314/2721] [youtube] Extract video titles for channel playlist if possible (Closes #4971) --- youtube_dl/extractor/youtube.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2774ec30b..791e1fe62 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1370,10 +1370,18 @@ class YoutubeChannelIE(InfoExtractor): def extract_videos_from_page(self, page): ids_in_page = [] - for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page): - if mobj.group(1) not in ids_in_page: - ids_in_page.append(mobj.group(1)) - return ids_in_page + titles_in_page = [] + for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page): + video_id = mobj.group('id') + video_title = unescapeHTML(mobj.group('title')) + try: + idx = ids_in_page.index(video_id) + if video_title and not titles_in_page[idx]: + titles_in_page[idx] = video_title + except ValueError: + ids_in_page.append(video_id) + titles_in_page.append(video_title) + return zip(ids_in_page, titles_in_page) def _real_extract(self, url): channel_id = self._match_id(url) @@ -1390,10 +1398,12 @@ class YoutubeChannelIE(InfoExtractor): if autogenerated: # The videos are contained in a single page # the ajax pages can't be used, they are empty - video_ids = self.extract_videos_from_page(channel_page) + videos = self.extract_videos_from_page(channel_page) entries = [ - self.url_result(video_id, 'Youtube', video_id=video_id) - for video_id in video_ids] + self.url_result( + video_id, 'Youtube', video_id=video_id, + video_title=video_title) + for video_id, video_title in videos] return self.playlist_result(entries, channel_id) def _entries(): @@ -1401,9 +1411,10 @@ class YoutubeChannelIE(InfoExtractor): for pagenum in itertools.count(1): ids_in_page = self.extract_videos_from_page(content_html) - for video_id in ids_in_page: + for video_id, video_title in ids_in_page: yield self.url_result( - video_id, 'Youtube', video_id=video_id) + video_id, 'Youtube', video_id=video_id, + video_title=video_title) mobj = re.search( r'data-uix-load-more-href="/?(?P<more>[^"]+)"', From 7bd930368c48222b0b14211840ac7951fd62f3cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Apr 2015 00:08:39 +0600 Subject: [PATCH 0315/2721] [youtube] Remove unused variable --- youtube_dl/extractor/youtube.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 791e1fe62..dc9e15e98 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1386,7 +1386,6 @@ class YoutubeChannelIE(InfoExtractor): def _real_extract(self, url): channel_id = self._match_id(url) - video_ids = [] url = 'https://www.youtube.com/channel/%s/videos' % channel_id channel_page = self._download_webpage(url, channel_id) autogenerated = re.search(r'''(?x) From 51f1244600beb8c3182dc0df756f61cbcfb6c13c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 13 Apr 2015 19:26:15 +0800 Subject: [PATCH 0316/2721] [vine] flake8 --- youtube_dl/extractor/vine.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index d4f5a991e..65c459fad 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import re -import json import itertools from .common import InfoExtractor From 8f02ad4f12549865a2a4436328075f4b20b906ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 13 Apr 2015 20:28:16 +0600 Subject: [PATCH 0317/2721] [youtube] Simplify --- youtube_dl/extractor/youtube.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index dc9e15e98..52909b0da 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1397,20 +1397,18 @@ class YoutubeChannelIE(InfoExtractor): if autogenerated: # The videos are contained in a single page # the ajax pages can't be used, they are empty - videos = self.extract_videos_from_page(channel_page) entries = [ self.url_result( video_id, 'Youtube', video_id=video_id, video_title=video_title) - for video_id, video_title in videos] + for video_id, video_title in self.extract_videos_from_page(channel_page)] return self.playlist_result(entries, channel_id) def _entries(): more_widget_html = content_html = channel_page for pagenum in itertools.count(1): - ids_in_page = self.extract_videos_from_page(content_html) - for video_id, video_title in ids_in_page: + for video_id, video_title in self.extract_videos_from_page(content_html): yield self.url_result( video_id, 'Youtube', video_id=video_id, video_title=video_title) From 37b44fe7c1e462b748171c4af743f30c01c95fe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 13 Apr 2015 22:50:40 +0200 Subject: [PATCH 0318/2721] [postprocessor/atomicparsley] Don't try to remove the temporary and original files if the format is unsupported (fixes #5419) --- youtube_dl/postprocessor/atomicparsley.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/postprocessor/atomicparsley.py b/youtube_dl/postprocessor/atomicparsley.py index 448ccc5f3..a5dfc136a 100644 --- a/youtube_dl/postprocessor/atomicparsley.py +++ b/youtube_dl/postprocessor/atomicparsley.py @@ -50,8 +50,13 @@ class AtomicParsleyPP(PostProcessor): msg = stderr.decode('utf-8', 'replace').strip() raise AtomicParsleyPPError(msg) - os.remove(encodeFilename(filename)) os.remove(encodeFilename(temp_thumbnail)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + # for formats that don't support thumbnails (like 3gp) AtomicParsley + # won't create to the temporary file + if b'No changes' in stdout: + self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail') + else: + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return True, info From edfcf7abe2f31bb8309ca032e738c90dc3a5722e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 14 Apr 2015 12:45:43 +0800 Subject: [PATCH 0319/2721] [generic] Support another type of Ooyala embedded video --- youtube_dl/extractor/generic.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7ad555e9f..92a62e866 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -688,6 +688,20 @@ class GenericIE(InfoExtractor): 'title': '生物老師男變女 全校挺"做自己"', 'thumbnail': 're:^https?://.*\.jpg$', } + }, + # Ooyala embed + { + 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T', + 'info_dict': { + 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs', + 'ext': 'mp4', + 'description': 'VIDEO: Index/Match versus VLOOKUP.', + 'title': 'This is what separates the Excel masters from the wannabes', + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + } } ] @@ -1092,7 +1106,8 @@ class GenericIE(InfoExtractor): # Look for Ooyala videos mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or - re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)) + re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or + re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage)) if mobj is not None: return OoyalaIE._build_url_result(mobj.group('ec')) From 01c58f84738e056733717174d1076ec465c62500 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 14 Apr 2015 13:10:10 +0800 Subject: [PATCH 0320/2721] [generic] Fix test generic_51 The website replaced the original video with a new one --- youtube_dl/extractor/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 92a62e866..eaf9c769a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -681,11 +681,11 @@ class GenericIE(InfoExtractor): # UDN embed { 'url': 'http://www.udn.com/news/story/7314/822787', - 'md5': 'de06b4c90b042c128395a88f0384817e', + 'md5': 'fd2060e988c326991037b9aff9df21a6', 'info_dict': { - 'id': '300040', + 'id': '300346', 'ext': 'mp4', - 'title': '生物老師男變女 全校挺"做自己"', + 'title': '中一中男師變性 全校師生力挺', 'thumbnail': 're:^https?://.*\.jpg$', } }, From 8da1bb04186d4147f89923abc09a9db0fa2a4fec Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 14 Apr 2015 15:27:56 +0800 Subject: [PATCH 0321/2721] [miomio] Enhance error checking and replace dead test case --- youtube_dl/extractor/miomio.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index cc3f27194..d41195a96 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( xpath_text, int_or_none, + ExtractorError, ) @@ -14,13 +15,14 @@ class MioMioIE(InfoExtractor): IE_NAME = 'miomio.tv' _VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)' _TESTS = [{ - 'url': 'http://www.miomio.tv/watch/cc179734/', - 'md5': '48de02137d0739c15b440a224ad364b9', + # "type=video" in flashvars + 'url': 'http://www.miomio.tv/watch/cc88912/', + 'md5': '317a5f7f6b544ce8419b784ca8edae65', 'info_dict': { - 'id': '179734', + 'id': '88912', 'ext': 'flv', - 'title': '手绘动漫鬼泣但丁全程画法', - 'duration': 354, + 'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕', + 'duration': 5923, }, }, { 'url': 'http://www.miomio.tv/watch/cc184024/', @@ -42,7 +44,7 @@ class MioMioIE(InfoExtractor): r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path') xml_config = self._search_regex( - r'flashvars="type=sina&(.+?)&', + r'flashvars="type=(?:sina|video)&(.+?)&', webpage, 'xml config') # skipping the following page causes lags and eventually connection drop-outs @@ -59,6 +61,9 @@ class MioMioIE(InfoExtractor): 'Referer': 'http://www.miomio.tv%s' % mioplayer_path, } + if not int_or_none(xpath_text(vid_config, 'timelength')): + raise ExtractorError('Unable to load videos!', expected=True) + entries = [] for f in vid_config.findall('./durl'): segment_url = xpath_text(f, 'url', 'video url') From 476e1095fac398b648f953a826ed3b191ce8d5d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Apr 2015 17:48:41 +0600 Subject: [PATCH 0322/2721] [brightcove] Improve brightcove experience regex (Closes #5421) --- youtube_dl/extractor/brightcove.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 0733bece7..b37857b2e 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -183,7 +183,7 @@ class BrightcoveIE(InfoExtractor): (?: [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] | [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ - ).+?</object>''', + ).+?>\s*</object>''', webpage) return [cls._build_brighcove_url(m) for m in matches] From 94c1255782414cdcda565717e31c91734ec00990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 14 Apr 2015 17:50:53 +0600 Subject: [PATCH 0323/2721] [brightcove] Handle non well-formed XMLs (#5421) --- youtube_dl/extractor/brightcove.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index b37857b2e..117cb00e6 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -117,7 +117,10 @@ class BrightcoveIE(InfoExtractor): object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str) object_str = fix_xml_ampersands(object_str) - object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8')) + try: + object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8')) + except xml.etree.ElementTree.ParseError: + return fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') if fv_el is not None: @@ -185,7 +188,7 @@ class BrightcoveIE(InfoExtractor): [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ ).+?>\s*</object>''', webpage) - return [cls._build_brighcove_url(m) for m in matches] + return filter(None, [cls._build_brighcove_url(m) for m in matches]) def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) From ae849ca1701dc5bdf25e648a57a1e6ec64f2f968 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 15 Apr 2015 17:45:28 +0800 Subject: [PATCH 0324/2721] [tumblr] Dismiss warnings for optional fields (fixes #5202) --- youtube_dl/extractor/tumblr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 2a1ae5a71..828c808a6 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -56,6 +56,6 @@ class TumblrIE(InfoExtractor): 'url': video_url, 'ext': 'mp4', 'title': video_title, - 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description(webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), } From 9c5335a0270042b7e84f02e550676447ccf603b1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 15 Apr 2015 19:56:21 +0800 Subject: [PATCH 0325/2721] [teamcoco] Fix "preload" data extraction (fixes #5179) --- youtube_dl/extractor/teamcoco.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 1caf08cb7..2381676b4 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from __future__ import unicode_literals import base64 @@ -35,6 +36,17 @@ class TeamcocoIE(InfoExtractor): 'duration': 288, 'age_limit': 0, } + }, { + 'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey', + 'info_dict': { + 'id': '88748', + 'ext': 'mp4', + 'title': 'Timothy Olyphant Raises A Toast To “Justified”', + 'description': 'md5:15501f23f020e793aeca761205e42c24', + }, + 'params': { + 'skip_download': True, # m3u8 downloads + } } ] _VIDEO_ID_REGEXES = ( @@ -54,10 +66,23 @@ class TeamcocoIE(InfoExtractor): video_id = self._html_search_regex( self._VIDEO_ID_REGEXES, webpage, 'video id') + preload = None preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage) - if not preloads: - raise ExtractorError('Preload information could not be extracted') - preload = max([(len(p), p) for p in preloads])[1] + if preloads: + preload = max([(len(p), p) for p in preloads])[1] + + if not preload: + preload = ''.join(re.findall(r'this\.push\("([^"]+)"\);', webpage)) + + if not preload: + preload = self._html_search_regex([ + r'player,\[?"([^"]+)"\]?', r'player.init\(\[?"([^"]+)"\]?\)' + ], webpage.replace('","', ''), 'preload data', default=None) + + if not preload: + raise ExtractorError( + 'Preload information could not be extracted', expected=True) + data = self._parse_json( base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id) From 4d1cdb5bfebcc93d73ffb710ec7af0752d698dca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Apr 2015 20:58:48 +0600 Subject: [PATCH 0326/2721] [spike] Extend _VALID_URL (Closes #5420) --- youtube_dl/extractor/spike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index e529bb55c..182f286df 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -5,7 +5,7 @@ from .mtv import MTVServicesInfoExtractor class SpikeIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)https?:// - (?:www\.spike\.com/(?:video-clips|(?:full-)?episodes)/.+| + (?:www\.spike\.com/(?:video-(?:clips|playlists)|(?:full-)?episodes)/.+| m\.spike\.com/videos/video\.rbml\?id=(?P<id>[^&]+)) ''' _TEST = { From 0dfe9bc9d286ceb0fd35b0fc8857ba1a86cecdd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Apr 2015 21:02:34 +0600 Subject: [PATCH 0327/2721] [mtv] Capture and output error message (#5420) --- youtube_dl/extractor/mtv.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index c11de1cb6..4430b3416 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -118,6 +118,14 @@ class MTVServicesInfoExtractor(InfoExtractor): mediagen_doc = self._download_xml(mediagen_url, video_id, 'Downloading video urls') + item = mediagen_doc.find('./video/item') + if item is not None and item.get('type') == 'text': + message = '%s returned error: ' % self.IE_NAME + if item.get('code') is not None: + message += '%s - ' % item.get('code') + message += item.text + raise ExtractorError(message, expected=True) + description_node = itemdoc.find('description') if description_node is not None: description = description_node.text.strip() From 2dcc114f84ebd6ea18b60a57070bcb81861d8f84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Apr 2015 22:10:08 +0600 Subject: [PATCH 0328/2721] [generic] Add support for playwire embeds (Closes #5430) --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index eaf9c769a..ab3538694 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1310,6 +1310,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'Pladform') + # Look for Playwire embeds + mobj = re.search( + r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for 5min embeds mobj = re.search( r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage) From c798f15b989bc8c3578c5b0baf75f4fb4760ba81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 15 Apr 2015 22:13:01 +0600 Subject: [PATCH 0329/2721] [generic] Add test for playwire embed (#5430) --- youtube_dl/extractor/generic.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ab3538694..7f2faa935 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -622,6 +622,17 @@ class GenericIE(InfoExtractor): 'age_limit': 0, }, }, + # Playwire embed + { + 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html', + 'info_dict': { + 'id': '3519514', + 'ext': 'mp4', + 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer', + 'thumbnail': 're:^https?://.*\.png$', + 'duration': 45.115, + }, + }, # 5min embed { 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/', From 9fc03aa87c74cb199f9e1e146ac5cb71b166d3e7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 16 Apr 2015 00:27:39 +0800 Subject: [PATCH 0330/2721] [brightcove] Always return lists from _extract_brightcove_urls In Python 3, filter() returns an iterable object, which is equivalently to True even for an empty result set. It causes false positive playlists in generic extraction logic. --- youtube_dl/extractor/brightcove.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 117cb00e6..4f60d5366 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -188,7 +188,7 @@ class BrightcoveIE(InfoExtractor): [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ ).+?>\s*</object>''', webpage) - return filter(None, [cls._build_brighcove_url(m) for m in matches]) + return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) From afe4a8c7699e70ecaa7f273b55a038b4d8a1050f Mon Sep 17 00:00:00 2001 From: FireDart <firedartonline@gmail.com> Date: Wed, 15 Apr 2015 22:17:45 -0400 Subject: [PATCH 0331/2721] [gfycat] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/gfycat.py | 107 +++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 youtube_dl/extractor/gfycat.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bbf3be41d..d32f1cbd2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -185,6 +185,7 @@ from .gametrailers import GametrailersIE from .gazeta import GazetaIE from .gdcvault import GDCVaultIE from .generic import GenericIE +from .gfycat import GfycatIE from .giantbomb import GiantBombIE from .giga import GigaIE from .glide import GlideIE diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py new file mode 100644 index 000000000..d103693cd --- /dev/null +++ b/youtube_dl/extractor/gfycat.py @@ -0,0 +1,107 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +import datetime + +from .common import InfoExtractor + +class GfycatIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', + 'info_dict': { + 'id': 'DeadlyDecisiveGermanpinscher', + 'title': 'Ghost in the Shell', + 'ext': 'mp4', + 'upload_date': '20140913' + } + },{ + 'url': 'http://gfycat.com/pleasinghilariouskusimanse', + 'info_dict': { + 'id': 'pleasinghilariouskusimanse', + 'title': 'PleasingHilariousKusimanse', + 'ext': 'webm', + 'upload_date': '20150412' + } + },{ + 'url': 'http://gfycat.com/requiredunkemptbuzzard', + 'info_dict': { + 'id': 'requiredunkemptbuzzard', + 'title': 'Headshot!', + 'ext': 'gif', + 'upload_date': '20150130' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + json = self._download_json("http://gfycat.com/cajax/get/" + video_id, video_id, 'Downloading video info')['gfyItem'] + + # Title + # Use user title first, else fallback to url formated name + if json['title']: + video_title = json['title'] + else: + video_title = json['gfyName'] + + # Formats + # Pref: mp4, webm, gif + formats = [{ + 'format_id': 'mp4', + 'ext': 'mp4', + 'url': json['mp4Url'], + 'width': json['width'], + 'height': json['height'], + 'fps': json['frameRate'], + 'filesize': json['mp4Size'], + 'preference': '-1' + }, { + 'format_id': 'webm', + 'ext': 'webm', + 'url': json['webmUrl'], + 'width': json['width'], + 'height': json['height'], + 'fps': json['frameRate'], + 'filesize': json['webmSize'], + 'preference': 0 + }, { + 'format_id': 'gif', + 'ext': 'gif', + 'url': json['gifUrl'], + 'width': json['width'], + 'height': json['height'], + 'fps': json['frameRate'], + 'filesize': json['gifSize'], + 'preference': 1 + }] + + self._sort_formats(formats) + + # Date + date = datetime.datetime.fromtimestamp( + int(json['createDate']) + ).strftime('%Y%m%d') + + # Length + duration = json['numFrames'] / json['frameRate'] + + # Age limit + # 1 = nsfw / 0 = sfw + if json['nsfw'] == 1: + age_limit = 18 + else: + age_limit = 0 + + return { + 'id': video_id, + 'title': video_title, + 'formats': formats, + 'creator': json['userName'], + 'description': json['description'], + 'upload_date': date, + 'categories': json['tags'], + 'age_limit': age_limit, + 'duration': duration, + 'view_count': json['views'] + } From d0eb724e22dc2e48f206dac45f9db9c17dcb26e1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 16 Apr 2015 17:04:53 +0800 Subject: [PATCH 0332/2721] [UDNEmbed] Enhance error checking and extend _VALID_URL --- youtube_dl/extractor/udn.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index bba25bb58..c08428acf 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -3,12 +3,15 @@ from __future__ import unicode_literals import json from .common import InfoExtractor -from ..utils import js_to_json +from ..utils import ( + js_to_json, + ExtractorError, +) from ..compat import compat_urlparse class UDNEmbedIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)' + _VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)' _TESTS = [{ 'url': 'http://video.udn.com/embed/news/300040', 'md5': 'de06b4c90b042c128395a88f0384817e', @@ -19,7 +22,11 @@ class UDNEmbedIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', } }, { - 'url': '//video.udn.com/embed/news/300040', + 'url': 'https://video.udn.com/embed/news/300040', + 'only_matching': True, + }, { + # From https://video.udn.com/news/303776 + 'url': 'https://video.udn.com/play/news/303776', 'only_matching': True, }] @@ -47,7 +54,10 @@ class UDNEmbedIE(InfoExtractor): 'retrieve url for %s video' % video_type), 'format_id': video_type, 'preference': 0 if video_type == 'mp4' else -1, - } for video_type, api_url in video_urls.items()] + } for video_type, api_url in video_urls.items() if api_url] + + if not formats: + raise ExtractorError('No videos found', expected=True) self._sort_formats(formats) From d6fd958c5f6847f39f4ed653e82832f2f44657ba Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 16 Apr 2015 17:16:11 +0800 Subject: [PATCH 0333/2721] [generic] Extract videos from SMIL manifests (closes #5145 and fixes #5135) --- youtube_dl/extractor/generic.py | 37 ++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7f2faa935..8c859f068 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -713,6 +713,20 @@ class GenericIE(InfoExtractor): # m3u8 downloads 'skip_download': True, } + }, + # Contains a SMIL manifest + { + 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html', + 'info_dict': { + 'id': 'file', + 'ext': 'flv', + 'title': '+ Football: Lottery Champions League Europe', + 'uploader': 'www.telewebion.com', + }, + 'params': { + # rtmpe downloads + 'skip_download': True, + } } ] @@ -1440,13 +1454,22 @@ class GenericIE(InfoExtractor): # here's a fun little line of code for you: video_id = os.path.splitext(video_id)[0] - entries.append({ - 'id': video_id, - 'url': video_url, - 'uploader': video_uploader, - 'title': video_title, - 'age_limit': age_limit, - }) + if determine_ext(video_url) == 'smil': + entries.append({ + 'id': video_id, + 'formats': self._extract_smil_formats(video_url, video_id), + 'uploader': video_uploader, + 'title': video_title, + 'age_limit': age_limit, + }) + else: + entries.append({ + 'id': video_id, + 'url': video_url, + 'uploader': video_uploader, + 'title': video_title, + 'age_limit': age_limit, + }) if len(entries) == 1: return entries[0] From 5d98908b26a4bf593abd6340d34d983f8d0980df Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 17 Mar 2015 19:03:29 +0800 Subject: [PATCH 0334/2721] [QQMusic] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/qqmusic.py | 56 ++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 youtube_dl/extractor/qqmusic.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bbf3be41d..63bcf666f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -396,6 +396,7 @@ from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .puls4 import Puls4IE from .pyvideo import PyvideoIE +from .qqmusic import QQMusicIE from .quickvid import QuickVidIE from .r7 import R7IE from .radiode import RadioDeIE diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py new file mode 100644 index 000000000..3dc637392 --- /dev/null +++ b/youtube_dl/extractor/qqmusic.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import strip_jsonp + +# guid is a random number generated in javascript, but seems a fixed number +# also works +guid = '1' + + +class QQMusicIE(InfoExtractor): + _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' + _TESTS = [{ + 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', + 'md5': 'bed90b6db2a7a7a7e11bc585f471f63a', + 'info_dict': { + 'id': '004295Et37taLD', + 'ext': 'm4a', + 'title': '可惜没如果', + 'upload_date': '20141227', + 'creator': '林俊杰', + } + }] + + def _real_extract(self, url): + mid = self._match_id(url) + + detail_info_page = self._download_webpage( + 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, + mid, note='Download sont detail info', + errnote='Unable to get song detail info') + + song_name = self._html_search_regex( + r"songname:\s*'([^']+)'", detail_info_page, 'song name') + + publish_time = self._html_search_regex( + r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page, + 'publish time').replace('-', '') + + singer = self._html_search_regex( + r"singer:\s*'([^']+)", detail_info_page, 'singer') + + vkey = self._download_json( + 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid, + mid, note='Retrieve vkey', errnote='Unable to get vkey', + transform_source=strip_jsonp)['key'] + song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid) + + return { + 'id': mid, + 'url': song_url, + 'title': song_name, + 'upload_date': publish_time, + 'creator': singer, + } From a2043572aa99c7517c84c31fe8a7e2051d27bf09 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 18 Mar 2015 13:56:02 +0800 Subject: [PATCH 0335/2721] [QQMusic] Implement the guid algorithm --- youtube_dl/extractor/qqmusic.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 3dc637392..93440b954 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -1,13 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals +import random +import time + from .common import InfoExtractor from ..utils import strip_jsonp -# guid is a random number generated in javascript, but seems a fixed number -# also works -guid = '1' - class QQMusicIE(InfoExtractor): _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' @@ -23,6 +22,13 @@ class QQMusicIE(InfoExtractor): } }] + # Reference: m_r_GetRUin() in top_player.js + # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js + @staticmethod + def m_r_get_ruin(): + curMs = int(time.time() * 1000) % 1000 + return int(round(random.random() * 2147483647) * curMs % 1E10) + def _real_extract(self, url): mid = self._match_id(url) @@ -41,6 +47,8 @@ class QQMusicIE(InfoExtractor): singer = self._html_search_regex( r"singer:\s*'([^']+)", detail_info_page, 'singer') + guid = self.m_r_get_ruin() + vkey = self._download_json( 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid, mid, note='Retrieve vkey', errnote='Unable to get vkey', From 8afff9f849e43c9c4194e302c06b5bec38edd83f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 18 Mar 2015 14:59:33 +0800 Subject: [PATCH 0336/2721] [QQMusic] Add singer info extractor --- youtube_dl/extractor/__init__.py | 5 ++- youtube_dl/extractor/qqmusic.py | 53 +++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 63bcf666f..65dbcb6cf 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -396,7 +396,10 @@ from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .puls4 import Puls4IE from .pyvideo import PyvideoIE -from .qqmusic import QQMusicIE +from .qqmusic import ( + QQMusicIE, + QQMusicSingerIE +) from .quickvid import QuickVidIE from .r7 import R7IE from .radiode import RadioDeIE diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 93440b954..1c22943a5 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -3,9 +3,11 @@ from __future__ import unicode_literals import random import time +import re from .common import InfoExtractor from ..utils import strip_jsonp +from ..compat import compat_urllib_request class QQMusicIE(InfoExtractor): @@ -34,7 +36,7 @@ class QQMusicIE(InfoExtractor): detail_info_page = self._download_webpage( 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, - mid, note='Download sont detail info', + mid, note='Download song detail info', errnote='Unable to get song detail info') song_name = self._html_search_regex( @@ -62,3 +64,52 @@ class QQMusicIE(InfoExtractor): 'upload_date': publish_time, 'creator': singer, } + + +class QQMusicSingerIE(InfoExtractor): + _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' + _TEST = { + 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', + 'info_dict': { + 'id': '001BLpXF2DyJe2', + 'title': '林俊杰', + 'description': 'md5:2a222d89ba4455a3af19940c0481bb78', + }, + 'playlist_count': 12, + } + + def _real_extract(self, url): + mid = self._match_id(url) + + singer_page = self._download_webpage( + 'http://y.qq.com/y/static/singer/%s/%s/%s.html' % (mid[-2], mid[-1], mid), + 'Download singer page') + + entries = [] + + for item in re.findall(r'<span class="data">([^<>]+)</span>', singer_page): + song_mid = item.split('|')[-5] + entries.append(self.url_result( + 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', song_mid)) + + singer_name = self._html_search_regex( + r"singername\s*:\s*'([^']+)'", singer_page, 'singer name', + default=None) + + singer_id = self._html_search_regex( + r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id', + default=None) + + singer_desc = None + + if singer_id: + req = compat_urllib_request.Request( + 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id) + req.add_header( + 'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html') + singer_desc_page = self._download_xml( + req, 'Donwload singer description XML') + + singer_desc = singer_desc_page.find('./data/info/desc').text + + return self.playlist_result(entries, mid, singer_name, singer_desc) From 5edea45fab73874c269655b4cf08da0bbc5ea479 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 19 Mar 2015 01:47:07 +0800 Subject: [PATCH 0337/2721] [QQMusic] Add album info extractor --- youtube_dl/extractor/__init__.py | 3 +- youtube_dl/extractor/qqmusic.py | 72 +++++++++++++++++++++++++++----- 2 files changed, 63 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 65dbcb6cf..36860f72c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -398,7 +398,8 @@ from .puls4 import Puls4IE from .pyvideo import PyvideoIE from .qqmusic import ( QQMusicIE, - QQMusicSingerIE + QQMusicSingerIE, + QQMusicAlbumIE, ) from .quickvid import QuickVidIE from .r7 import R7IE diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 1c22943a5..d0ea4a769 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -6,7 +6,10 @@ import time import re from .common import InfoExtractor -from ..utils import strip_jsonp +from ..utils import ( + strip_jsonp, + unescapeHTML, +) from ..compat import compat_urllib_request @@ -66,7 +69,28 @@ class QQMusicIE(InfoExtractor): } -class QQMusicSingerIE(InfoExtractor): +class QQPlaylistBaseIE(InfoExtractor): + @staticmethod + def qq_static_url(category, mid): + return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) + + @staticmethod + def qq_song_url(mid): + return 'http://y.qq.com/#type=song&mid=%s' % mid + + @classmethod + def get_entries_from_page(cls, page): + entries = [] + + for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page): + song_mid = unescapeHTML(item).split('|')[-5] + entries.append(cls.url_result( + cls.qq_song_url(song_mid), 'QQMusic', song_mid)) + + return entries + + +class QQMusicSingerIE(QQPlaylistBaseIE): _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' _TEST = { 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', @@ -82,15 +106,9 @@ class QQMusicSingerIE(InfoExtractor): mid = self._match_id(url) singer_page = self._download_webpage( - 'http://y.qq.com/y/static/singer/%s/%s/%s.html' % (mid[-2], mid[-1], mid), - 'Download singer page') + self.qq_static_url('singer', mid), mid, 'Download singer page') - entries = [] - - for item in re.findall(r'<span class="data">([^<>]+)</span>', singer_page): - song_mid = item.split('|')[-5] - entries.append(self.url_result( - 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', song_mid)) + entries = self.get_entries_from_page(singer_page) singer_name = self._html_search_regex( r"singername\s*:\s*'([^']+)'", singer_page, 'singer name', @@ -108,8 +126,40 @@ class QQMusicSingerIE(InfoExtractor): req.add_header( 'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html') singer_desc_page = self._download_xml( - req, 'Donwload singer description XML') + req, mid, 'Donwload singer description XML') singer_desc = singer_desc_page.find('./data/info/desc').text return self.playlist_result(entries, mid, singer_name, singer_desc) + + +class QQMusicAlbumIE(QQPlaylistBaseIE): + _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' + + _TEST = { + 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0', + 'info_dict': { + 'id': '000gXCTb2AhRR1', + 'title': '我们都是这样长大的', + 'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6', + }, + 'playlist_count': 4, + } + + def _real_extract(self, url): + mid = self._match_id(url) + + album_page = self._download_webpage( + self.qq_static_url('album', mid), mid, 'Download album page') + + entries = self.get_entries_from_page(album_page) + + album_name = self._html_search_regex( + r"albumname\s*:\s*'([^']+)',", album_page, 'album name', + default=None) + + album_detail = self._html_search_regex( + r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>', + album_page, 'album details', default=None) + + return self.playlist_result(entries, mid, album_name, album_detail) From a685ae511a7f2148afb765d0a30bca42a75cf861 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 Mar 2015 11:38:53 +0800 Subject: [PATCH 0338/2721] [QQMusic] Song extractor: Add lyrics as description Note: Test fails on python 3 due to encoding issues --- youtube_dl/extractor/qqmusic.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index d0ea4a769..e8aacbc3d 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -24,6 +24,7 @@ class QQMusicIE(InfoExtractor): 'title': '可惜没如果', 'upload_date': '20141227', 'creator': '林俊杰', + 'description': 'md5:242c97c2847e0495583b7b13764f7106', } }] @@ -47,10 +48,16 @@ class QQMusicIE(InfoExtractor): publish_time = self._html_search_regex( r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page, - 'publish time').replace('-', '') + 'publish time', default=None) + if publish_time: + publish_time = publish_time.replace('-', '') singer = self._html_search_regex( - r"singer:\s*'([^']+)", detail_info_page, 'singer') + r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None) + + lrc_content = self._html_search_regex( + r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>', + detail_info_page, 'LRC lyrics', default=None) guid = self.m_r_get_ruin() @@ -66,6 +73,7 @@ class QQMusicIE(InfoExtractor): 'title': song_name, 'upload_date': publish_time, 'creator': singer, + 'description': lrc_content, } @@ -74,10 +82,6 @@ class QQPlaylistBaseIE(InfoExtractor): def qq_static_url(category, mid): return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) - @staticmethod - def qq_song_url(mid): - return 'http://y.qq.com/#type=song&mid=%s' % mid - @classmethod def get_entries_from_page(cls, page): entries = [] @@ -85,7 +89,8 @@ class QQPlaylistBaseIE(InfoExtractor): for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page): song_mid = unescapeHTML(item).split('|')[-5] entries.append(cls.url_result( - cls.qq_song_url(song_mid), 'QQMusic', song_mid)) + 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', + song_mid)) return entries From c9a779695da56c1f9561af8586ecc586103dc254 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 21 Mar 2015 12:21:27 +0800 Subject: [PATCH 0339/2721] [extractor/common] Add the encoding parameter The QQMusic info extractor need forced encoding for correct working. --- youtube_dl/extractor/common.py | 34 ++++++++++++++++++++------------- youtube_dl/extractor/qqmusic.py | 4 ++-- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 8ed97f8dd..28f672e42 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -324,7 +324,7 @@ class InfoExtractor(object): self._downloader.report_warning(errmsg) return False - def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True): + def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None): """ Returns a tuple (page content as string, URL handle) """ # Strip hashes from the URL (#1038) if isinstance(url_or_request, (compat_str, str)): @@ -334,14 +334,11 @@ class InfoExtractor(object): if urlh is False: assert not fatal return False - content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal) + content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding) return (content, urlh) - def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None): - content_type = urlh.headers.get('Content-Type', '') - webpage_bytes = urlh.read() - if prefix is not None: - webpage_bytes = prefix + webpage_bytes + @staticmethod + def _guess_encoding_from_content(content_type, webpage_bytes): m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) if m: encoding = m.group(1) @@ -354,6 +351,16 @@ class InfoExtractor(object): encoding = 'utf-16' else: encoding = 'utf-8' + + return encoding + + def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None): + content_type = urlh.headers.get('Content-Type', '') + webpage_bytes = urlh.read() + if prefix is not None: + webpage_bytes = prefix + webpage_bytes + if not encoding: + encoding = self._guess_encoding_from_content(content_type, webpage_bytes) if self._downloader.params.get('dump_intermediate_pages', False): try: url = url_or_request.get_full_url() @@ -410,13 +417,13 @@ class InfoExtractor(object): return content - def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5): + def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): """ Returns the data of the page as a string """ success = False try_count = 0 while success is False: try: - res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal) + res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding) success = True except compat_http_client.IncompleteRead as e: try_count += 1 @@ -431,10 +438,10 @@ class InfoExtractor(object): def _download_xml(self, url_or_request, video_id, note='Downloading XML', errnote='Unable to download XML', - transform_source=None, fatal=True): + transform_source=None, fatal=True, encoding=None): """Return the xml as an xml.etree.ElementTree.Element""" xml_string = self._download_webpage( - url_or_request, video_id, note, errnote, fatal=fatal) + url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding) if xml_string is False: return xml_string if transform_source: @@ -445,9 +452,10 @@ class InfoExtractor(object): note='Downloading JSON metadata', errnote='Unable to download JSON metadata', transform_source=None, - fatal=True): + fatal=True, encoding=None): json_string = self._download_webpage( - url_or_request, video_id, note, errnote, fatal=fatal) + url_or_request, video_id, note, errnote, fatal=fatal, + encoding=encoding) if (not fatal) and json_string is False: return None return self._parse_json( diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index e8aacbc3d..174c8e0ae 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -24,7 +24,7 @@ class QQMusicIE(InfoExtractor): 'title': '可惜没如果', 'upload_date': '20141227', 'creator': '林俊杰', - 'description': 'md5:242c97c2847e0495583b7b13764f7106', + 'description': 'md5:4348ff1dd24036906baa7b6f973f8d30', } }] @@ -41,7 +41,7 @@ class QQMusicIE(InfoExtractor): detail_info_page = self._download_webpage( 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, mid, note='Download song detail info', - errnote='Unable to get song detail info') + errnote='Unable to get song detail info', encoding='gbk') song_name = self._html_search_regex( r"songname:\s*'([^']+)'", detail_info_page, 'song name') From c052ce6cde53f25949c21d00e4243669f523c0a7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 16 Apr 2015 22:00:45 +0800 Subject: [PATCH 0340/2721] [Srf] Add new extractor (fixes #981) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/srf.py | 86 ++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 youtube_dl/extractor/srf.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 36860f72c..830090346 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -485,6 +485,7 @@ from .spike import SpikeIE from .sport5 import Sport5IE from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE +from .srf import SrfIE from .srmediathek import SRMediathekIE from .ssa import SSAIE from .stanfordoc import StanfordOpenClassroomIE diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py new file mode 100644 index 000000000..87b4a676a --- /dev/null +++ b/youtube_dl/extractor/srf.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..utils import ( + determine_ext, + parse_iso8601, + xpath_text, +) + + +class SrfIE(InfoExtractor): + _VALID_URL = r'http://www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P<display_id>[^?]+)\?id=(?P<id>[0-9a-f\-]{36})' + _TESTS = [{ + 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', + 'md5': '4cd93523723beff51bb4bee974ee238d', + 'info_dict': { + 'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5', + 'display_id': 'snowden-beantragt-asyl-in-russland', + 'ext': 'm4v', + 'upload_date': '20130701', + 'title': 'Snowden beantragt Asyl in Russland', + 'timestamp': 1372713995, + } + }, { + # No Speichern (Save) button + 'url': 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa', + 'info_dict': { + 'id': '677f5829-e473-4823-ac83-a1087fe97faa', + 'display_id': 'jaguar-xk120-shadow-und-tornado-dampflokomotive', + 'ext': 'mp4', + 'upload_date': '20130710', + 'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive', + 'timestamp': 1373493600, + }, + 'params': { + # Require ffmpeg/avconv + 'skip_download': True, + } + }, { + 'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video_data = self._download_xml( + 'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/%s.xml' % video_id, + video_id) + + display_id = re.match(self._VALID_URL, url).group('display_id') + title = xpath_text( + video_data, './AssetMetadatas/AssetMetadata/title', fatal=True) + thumbnails = [{ + 'url': s.text + } for s in video_data.findall('.//ImageRepresentation/url')] + timestamp = parse_iso8601(xpath_text(video_data, './createdDate')) + # The <duration> field in XML is different from the exact duration, skipping + + formats = [] + for item in video_data.findall('./Playlists/Playlist') + video_data.findall('./Downloads/Download'): + url_node = item.find('url') + quality = url_node.attrib['quality'] + full_url = url_node.text + original_ext = determine_ext(full_url) + if original_ext == 'f4m': + full_url += '?hdcore=3.4.0' # Without this, you get a 403 error + formats.append({ + 'url': full_url, + 'ext': 'mp4' if original_ext == 'm3u8' else original_ext, + 'format_id': '%s-%s' % (quality, item.attrib['protocol']), + 'preference': 0 if 'HD' in quality else -1, + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'title': title, + 'thumbnails': thumbnails, + 'timestamp': timestamp, + } From 355c524bfaec10aa8f5ff31bc95d2c50ef1a3113 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Apr 2015 20:31:02 +0600 Subject: [PATCH 0341/2721] [srf] Extract all formats and prefer direct links over hls and hds --- youtube_dl/extractor/srf.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py index 87b4a676a..4be329a2f 100644 --- a/youtube_dl/extractor/srf.py +++ b/youtube_dl/extractor/srf.py @@ -61,18 +61,25 @@ class SrfIE(InfoExtractor): formats = [] for item in video_data.findall('./Playlists/Playlist') + video_data.findall('./Downloads/Download'): - url_node = item.find('url') - quality = url_node.attrib['quality'] - full_url = url_node.text - original_ext = determine_ext(full_url) - if original_ext == 'f4m': - full_url += '?hdcore=3.4.0' # Without this, you get a 403 error - formats.append({ - 'url': full_url, - 'ext': 'mp4' if original_ext == 'm3u8' else original_ext, - 'format_id': '%s-%s' % (quality, item.attrib['protocol']), - 'preference': 0 if 'HD' in quality else -1, - }) + for url_node in item.findall('url'): + quality = url_node.attrib['quality'] + full_url = url_node.text + original_ext = determine_ext(full_url) + format_id = '%s-%s' % (quality, item.attrib['protocol']) + if original_ext == 'f4m': + formats.extend(self._extract_f4m_formats( + full_url + '?hdcore=3.4.0', video_id, f4m_id=format_id)) + elif original_ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + full_url, video_id, 'mp4', m3u8_id=format_id)) + else: + formats.append({ + 'url': full_url, + 'ext': 'mp4' if original_ext == 'm3u8' else original_ext, + 'format_id': format_id, + 'quality': 0 if 'HD' in quality else -1, + 'preference': 1, + }) self._sort_formats(formats) From 820b06480493ab1b3c146c3edfc2c89fc80061cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Apr 2015 20:48:17 +0600 Subject: [PATCH 0342/2721] [srf] Extract subtitles --- youtube_dl/extractor/srf.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py index 4be329a2f..a080eb7ca 100644 --- a/youtube_dl/extractor/srf.py +++ b/youtube_dl/extractor/srf.py @@ -83,6 +83,16 @@ class SrfIE(InfoExtractor): self._sort_formats(formats) + subtitles = {} + subtitles_data = video_data.find('Subtitles') + if subtitles_data is not None: + subtitles_list = [{ + 'url': sub.text, + 'ext': determine_ext(sub.text), + } for sub in subtitles_data] + if subtitles_list: + subtitles['de'] = subtitles_list + return { 'id': video_id, 'display_id': display_id, @@ -90,4 +100,5 @@ class SrfIE(InfoExtractor): 'title': title, 'thumbnails': thumbnails, 'timestamp': timestamp, + 'subtitles': subtitles, } From fbbb2194097afcc3d3fb4f28888279f1b970b253 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Apr 2015 21:28:21 +0600 Subject: [PATCH 0343/2721] [srf] Fix direct links ext --- youtube_dl/extractor/srf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py index a080eb7ca..2ac55dcd1 100644 --- a/youtube_dl/extractor/srf.py +++ b/youtube_dl/extractor/srf.py @@ -75,7 +75,7 @@ class SrfIE(InfoExtractor): else: formats.append({ 'url': full_url, - 'ext': 'mp4' if original_ext == 'm3u8' else original_ext, + 'ext': original_ext, 'format_id': format_id, 'quality': 0 if 'HD' in quality else -1, 'preference': 1, From 89c09e2a0884301e405d014fdc9706cf1750f798 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Apr 2015 21:30:13 +0600 Subject: [PATCH 0344/2721] [srf] Update test --- youtube_dl/extractor/srf.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py index 2ac55dcd1..e30351602 100644 --- a/youtube_dl/extractor/srf.py +++ b/youtube_dl/extractor/srf.py @@ -26,18 +26,15 @@ class SrfIE(InfoExtractor): }, { # No Speichern (Save) button 'url': 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa', + 'md5': 'd97e236e80d1d24729e5d0953d276a4f', 'info_dict': { 'id': '677f5829-e473-4823-ac83-a1087fe97faa', 'display_id': 'jaguar-xk120-shadow-und-tornado-dampflokomotive', - 'ext': 'mp4', + 'ext': 'flv', 'upload_date': '20130710', 'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive', 'timestamp': 1373493600, }, - 'params': { - # Require ffmpeg/avconv - 'skip_download': True, - } }, { 'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', 'only_matching': True, From 5cb91ceaa50ffe5b8551075765a8ad920efd0b59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Apr 2015 21:33:01 +0600 Subject: [PATCH 0345/2721] [pladform] Update test --- youtube_dl/extractor/pladform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py index abde34b94..551c8c9f0 100644 --- a/youtube_dl/extractor/pladform.py +++ b/youtube_dl/extractor/pladform.py @@ -30,7 +30,7 @@ class PladformIE(InfoExtractor): 'info_dict': { 'id': '100183293', 'ext': 'mp4', - 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть', + 'title': 'Тайны перевала Дятлова • 1 серия 2 часть', 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 694, From 62259846816568917d3cff41dbbce24a706c9fc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Apr 2015 21:37:15 +0600 Subject: [PATCH 0346/2721] [generic] Update pladform embed test --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8c859f068..e645d1bb3 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -615,7 +615,7 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': '100183293', 'ext': 'mp4', - 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть', + 'title': 'Тайны перевала Дятлова • 1 серия 2 часть', 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 694, From 5141249c5930943cbf6890db0e36ba50f06a20ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Apr 2015 21:47:42 +0600 Subject: [PATCH 0347/2721] [srf] Extend _VALID_URL --- youtube_dl/extractor/srf.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py index e30351602..25b589cb9 100644 --- a/youtube_dl/extractor/srf.py +++ b/youtube_dl/extractor/srf.py @@ -11,7 +11,7 @@ from ..utils import ( class SrfIE(InfoExtractor): - _VALID_URL = r'http://www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P<display_id>[^?]+)\?id=(?P<id>[0-9a-f\-]{36})' + _VALID_URL = r'https?://(?:www\.srf\.ch/play(?:er)?/tv/[^/]+/video/(?P<display_id>[^?]+)\?id=|tp\.srgssr\.ch/p/flash\?urn=urn:srf:ais:video:)(?P<id>[0-9a-f\-]{36})' _TESTS = [{ 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', 'md5': '4cd93523723beff51bb4bee974ee238d', @@ -38,6 +38,9 @@ class SrfIE(InfoExtractor): }, { 'url': 'http://www.srf.ch/player/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', 'only_matching': True, + }, { + 'url': 'https://tp.srgssr.ch/p/flash?urn=urn:srf:ais:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5', + 'only_matching': True, }] def _real_extract(self, url): From 65c1a750f5e5327a2ec8f702509eba05364ca4fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Apr 2015 21:48:22 +0600 Subject: [PATCH 0348/2721] [srf] Show display_id when present --- youtube_dl/extractor/srf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/srf.py b/youtube_dl/extractor/srf.py index 25b589cb9..77eec0bc7 100644 --- a/youtube_dl/extractor/srf.py +++ b/youtube_dl/extractor/srf.py @@ -45,12 +45,12 @@ class SrfIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + display_id = re.match(self._VALID_URL, url).group('display_id') or video_id video_data = self._download_xml( 'http://il.srgssr.ch/integrationlayer/1.0/ue/srf/video/play/%s.xml' % video_id, - video_id) + display_id) - display_id = re.match(self._VALID_URL, url).group('display_id') title = xpath_text( video_data, './AssetMetadatas/AssetMetadata/title', fatal=True) thumbnails = [{ @@ -68,10 +68,10 @@ class SrfIE(InfoExtractor): format_id = '%s-%s' % (quality, item.attrib['protocol']) if original_ext == 'f4m': formats.extend(self._extract_f4m_formats( - full_url + '?hdcore=3.4.0', video_id, f4m_id=format_id)) + full_url + '?hdcore=3.4.0', display_id, f4m_id=format_id)) elif original_ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - full_url, video_id, 'mp4', m3u8_id=format_id)) + full_url, display_id, 'mp4', m3u8_id=format_id)) else: formats.append({ 'url': full_url, From be531ef1ec90cb5ee5b3f02d817a140313b76412 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 16 Apr 2015 22:12:38 +0600 Subject: [PATCH 0349/2721] [utils] Fix splitunc deprecation warning --- youtube_dl/utils.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 52f0dd09a..e628fac81 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -312,17 +312,17 @@ def sanitize_path(s): """Sanitizes and normalizes path on Windows""" if sys.platform != 'win32': return s - drive, _ = os.path.splitdrive(s) - unc, _ = os.path.splitunc(s) - unc_or_drive = unc or drive - norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep) - if unc_or_drive: + drive_or_unc, _ = os.path.splitdrive(s) + if sys.version_info < (2, 7) and not drive_or_unc: + drive_or_unc, _ = os.path.splitunc(s) + norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep) + if drive_or_unc: norm_path.pop(0) sanitized_path = [ path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part) for path_part in norm_path] - if unc_or_drive: - sanitized_path.insert(0, unc_or_drive + os.path.sep) + if drive_or_unc: + sanitized_path.insert(0, drive_or_unc + os.path.sep) return os.path.join(*sanitized_path) From 4aec95f3c932ee7042ca4dcae9fcd8c57341bb55 Mon Sep 17 00:00:00 2001 From: FireDart <firedartonline@gmail.com> Date: Thu, 16 Apr 2015 18:10:53 -0400 Subject: [PATCH 0350/2721] [gfycat] Updated tests. --- youtube_dl/extractor/gfycat.py | 58 +++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index d103693cd..6de78c49d 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -8,31 +8,39 @@ from .common import InfoExtractor class GfycatIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)' - _TESTS = [{ - 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', - 'info_dict': { - 'id': 'DeadlyDecisiveGermanpinscher', - 'title': 'Ghost in the Shell', - 'ext': 'mp4', - 'upload_date': '20140913' - } - },{ - 'url': 'http://gfycat.com/pleasinghilariouskusimanse', - 'info_dict': { - 'id': 'pleasinghilariouskusimanse', - 'title': 'PleasingHilariousKusimanse', - 'ext': 'webm', - 'upload_date': '20150412' - } - },{ - 'url': 'http://gfycat.com/requiredunkemptbuzzard', - 'info_dict': { - 'id': 'requiredunkemptbuzzard', - 'title': 'Headshot!', - 'ext': 'gif', - 'upload_date': '20150130' - } - }] + _TESTS = [ + { + 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', + 'info_dict': { + 'id': 'DeadlyDecisiveGermanpinscher', + 'title': 'Ghost in the Shell', + 'ext': 'mp4', + 'upload_date': '20140913' + } + },{ + 'url': 'http://gfycat.com/pleasinghilariouskusimanse', + 'info_dict': { + 'id': 'pleasinghilariouskusimanse', + 'title': 'PleasingHilariousKusimanse', + 'ext': 'webm', + 'upload_date': '20150412' + }, + 'params': { + 'format': 'webm', + }, + },{ + 'url': 'http://gfycat.com/requiredunkemptbuzzard', + 'info_dict': { + 'id': 'requiredunkemptbuzzard', + 'title': 'Headshot!', + 'ext': 'gif', + 'upload_date': '20150129' + }, + 'params': { + 'format': 'gif', + }, + }, + ] def _real_extract(self, url): video_id = self._match_id(url) From 954352c4c08dab0dd2d9ca20f5a414a307cea96f Mon Sep 17 00:00:00 2001 From: FireDart <firedartonline@gmail.com> Date: Thu, 16 Apr 2015 18:11:30 -0400 Subject: [PATCH 0351/2721] [gfycat] Fixed preferences. --- youtube_dl/extractor/gfycat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index 6de78c49d..5e70ed3f6 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -63,7 +63,7 @@ class GfycatIE(InfoExtractor): 'height': json['height'], 'fps': json['frameRate'], 'filesize': json['mp4Size'], - 'preference': '-1' + 'preference': 2 }, { 'format_id': 'webm', 'ext': 'webm', @@ -72,7 +72,7 @@ class GfycatIE(InfoExtractor): 'height': json['height'], 'fps': json['frameRate'], 'filesize': json['webmSize'], - 'preference': 0 + 'preference': 1 }, { 'format_id': 'gif', 'ext': 'gif', @@ -81,7 +81,7 @@ class GfycatIE(InfoExtractor): 'height': json['height'], 'fps': json['frameRate'], 'filesize': json['gifSize'], - 'preference': 1 + 'preference': 0 }] self._sort_formats(formats) From f11554092b419baa919875432fe6ebc1f22f5307 Mon Sep 17 00:00:00 2001 From: Tjark Saul <tjark.saul@wachplan.net> Date: Fri, 17 Apr 2015 09:21:54 +0200 Subject: [PATCH 0352/2721] [Lecture2Go] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/lecture2go.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 youtube_dl/extractor/lecture2go.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bbf3be41d..3d6e981b2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -249,6 +249,7 @@ from .krasview import KrasViewIE from .ku6 import Ku6IE from .la7 import LA7IE from .laola1tv import Laola1TvIE +from .lecture2go import Lecture2GoIE from .letv import ( LetvIE, LetvTvIE, diff --git a/youtube_dl/extractor/lecture2go.py b/youtube_dl/extractor/lecture2go.py new file mode 100644 index 000000000..9cf28e31c --- /dev/null +++ b/youtube_dl/extractor/lecture2go.py @@ -0,0 +1,33 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class Lecture2GoIE(InfoExtractor): + _VALID_URL = r'https?://lecture2go.uni-hamburg.de/veranstaltungen/-/v/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473', + 'md5': 'a9e76f83b3ef58019c4b7dbc35f406c1', + 'info_dict': { + 'id': '17473', + 'ext': 'mp4', + 'url': 'https://fms1.rrz.uni-hamburg.de/abo/64.050_FrankHeitmann_2015-04-13_14-35.mp4', + 'title': '2 - Endliche Automaten und reguläre Sprachen' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'<em class="title">(.*?)</em>', webpage, 'title') + video_url = self._search_regex(r'b.isFirefox..a.useHTML5\).b.setOption.a,"src","(.*.mp4)"\).else', webpage, 'video_url') + creator = self._html_search_regex(r'<div id="description">(.*)</div>', webpage, 'creator') + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'creator': creator + } From 024ebb270663fbe27cfab52b1a8b9a21f227d985 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 17 Apr 2015 10:46:25 +0200 Subject: [PATCH 0353/2721] [soundcloud] Handle 'secret_token' for 'w.soundcloud.com/player/?url=*' urls (fixes #5453) --- youtube_dl/extractor/soundcloud.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 316b2c90f..7efc6aff1 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -221,7 +221,12 @@ class SoundcloudIE(InfoExtractor): info_json_url += "&secret_token=" + token elif mobj.group('player'): query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - return self.url_result(query['url'][0]) + real_url = query['url'][0] + # If the token is in the query of the original url we have to + # manually add it + if 'secret_token' in query: + real_url += '?secret_token=' + query['secret_token'][0] + return self.url_result(real_url) else: # extract uploader (which is in the url) uploader = mobj.group('uploader') From 3220c50f9af3a1df239656025aebfe00fec00e45 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 17 Apr 2015 11:14:25 +0200 Subject: [PATCH 0354/2721] release 2015.04.17 --- docs/supportedsites.md | 5 +++++ youtube_dl/version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c85a39918..80e86c1b6 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -98,6 +98,7 @@ - **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED - **Cracked** - **Criterion** + - **CrooksAndLiars** - **Crunchyroll** - **crunchyroll:playlist** - **CSpan**: C-SPAN @@ -359,6 +360,9 @@ - **prosiebensat1**: ProSiebenSat.1 Digital - **Puls4** - **Pyvideo** + - **QQMusic** + - **QQMusicAlbum** + - **QQMusicSinger** - **QuickVid** - **R7** - **radio.de** @@ -434,6 +438,7 @@ - **Sport5** - **SportBox** - **SportDeutschland** + - **Srf** - **SRMediathek**: Saarländischer Rundfunk - **SSA** - **stanfordoc**: Stanford Open ClassRoom diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 1095fea2f..3fd0e7e56 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.04.09' +__version__ = '2015.04.17' From 08f2a92c9c23cf460b00a290b5b3819c7972231b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 17 Apr 2015 14:55:24 +0200 Subject: [PATCH 0355/2721] InfoExtractor._search_regex: Suggest updating when the regex is not found (suggested in #5442) Reuse the same message from ExtractorError --- youtube_dl/extractor/common.py | 4 ++-- youtube_dl/utils.py | 19 ++++++++++++------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 28f672e42..7757bf950 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -23,6 +23,7 @@ from ..compat import ( ) from ..utils import ( age_restricted, + bug_reports_message, clean_html, compiled_regex_type, ExtractorError, @@ -556,8 +557,7 @@ class InfoExtractor(object): elif fatal: raise RegexNotFoundError('Unable to extract %s' % _name) else: - self._downloader.report_warning('unable to extract %s; ' - 'please report this issue on http://yt-dl.org/bug' % _name) + self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message()) return None def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e628fac81..edeee1853 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -452,6 +452,17 @@ def make_HTTPS_handler(params, **kwargs): return YoutubeDLHTTPSHandler(params, context=context, **kwargs) +def bug_reports_message(): + if ytdl_is_updateable(): + update_cmd = 'type youtube-dl -U to update' + else: + update_cmd = 'see https://yt-dl.org/update on how to update' + msg = '; please report this issue on https://yt-dl.org/bug .' + msg += ' Make sure you are using the latest version; %s.' % update_cmd + msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' + return msg + + class ExtractorError(Exception): """Error during info extraction.""" @@ -467,13 +478,7 @@ class ExtractorError(Exception): if cause: msg += ' (caused by %r)' % cause if not expected: - if ytdl_is_updateable(): - update_cmd = 'type youtube-dl -U to update' - else: - update_cmd = 'see https://yt-dl.org/update on how to update' - msg += '; please report this issue on https://yt-dl.org/bug .' - msg += ' Make sure you are using the latest version; %s.' % update_cmd - msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' + msg += bug_reports_message() super(ExtractorError, self).__init__(msg) self.traceback = tb From d8e7ef04dcb583f3271a6b6a099a3da2e650fb45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Apr 2015 22:56:26 +0600 Subject: [PATCH 0356/2721] [vimple] Fix extraction (Closes #5448) --- youtube_dl/extractor/vimple.py | 71 ++++++++++++---------------------- 1 file changed, 25 insertions(+), 46 deletions(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index ee3d86117..aa3d6ddfd 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -1,75 +1,54 @@ -# coding: utf-8 from __future__ import unicode_literals -import base64 -import re -import xml.etree.ElementTree -import zlib - from .common import InfoExtractor from ..utils import int_or_none class VimpleIE(InfoExtractor): - IE_DESC = 'Vimple.ru' - _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})' + IE_DESC = 'Vimple - one-click video hosting' + _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})' _TESTS = [ { 'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf', 'md5': '2e750a330ed211d3fd41821c6ad9a279', 'info_dict': { - 'id': 'c0f6b1687dcd4000a97ebe70068039cf', + 'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf', 'ext': 'mp4', 'title': 'Sunset', 'duration': 20, 'thumbnail': 're:https?://.*?\.jpg', }, - }, + }, { + 'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9', + 'only_matching': True, + } ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) - iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id + webpage = self._download_webpage( + 'http://player.vimple.ru/iframe/%s' % video_id, video_id) - iframe = self._download_webpage( - iframe_url, video_id, - note='Downloading iframe', errnote='unable to fetch iframe') - player_url = self._html_search_regex( - r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url') + playlist = self._parse_json( + self._search_regex( + r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'), + video_id)['playlist'][0] - player = self._request_webpage( - player_url, video_id, note='Downloading swf player').read() + title = playlist['title'] + video_id = playlist.get('videoId') or video_id + thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl') + duration = int_or_none(playlist.get('duration')) - player = zlib.decompress(player[8:]) - - xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player) - xml_pieces = [piece[1:-1] for piece in xml_pieces] - - xml_data = b''.join(xml_pieces) - xml_data = base64.b64decode(xml_data) - - xml_data = xml.etree.ElementTree.fromstring(xml_data) - - video = xml_data.find('Video') - quality = video.get('quality') - q_tag = video.find(quality.capitalize()) - - formats = [ - { - 'url': q_tag.get('url'), - 'tbr': int(q_tag.get('bitrate')), - 'filesize': int(q_tag.get('filesize')), - 'format_id': quality, - }, - ] + formats = [{ + 'url': f['url'], + } for f in playlist['video']] + self._sort_formats(formats) return { 'id': video_id, - 'title': video.find('Title').text, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, 'formats': formats, - 'thumbnail': video.find('Poster').get('url'), - 'duration': int_or_none(video.get('duration')), - 'webpage_url': video.find('Share').get('videoPageUrl'), } From c5826a491b7b214a7e81030ad53103c4aca04dc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 17 Apr 2015 19:02:49 +0200 Subject: [PATCH 0357/2721] [mixcloud] Simplify url extraction On the tracks I tested the server number in the url from the webpage is valid for the mp3 or the m4a file and any other number is invalid, it's a waste of time to check them. --- youtube_dl/extractor/mixcloud.py | 72 +++++++------------------------- 1 file changed, 15 insertions(+), 57 deletions(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 84f291558..425a4ccf1 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import re -import itertools from .common import InfoExtractor from ..compat import ( @@ -46,20 +45,16 @@ class MixcloudIE(InfoExtractor): }, }] - def _get_url(self, track_id, template_url, server_number): - boundaries = (1, 30) - for nr in server_numbers(server_number, boundaries): - url = template_url % nr - try: - # We only want to know if the request succeed - # don't download the whole file - self._request_webpage( - HEADRequest(url), track_id, - 'Checking URL %d/%d ...' % (nr, boundaries[-1])) - return url - except ExtractorError: - pass - return None + def _check_url(self, url, track_id, ext): + try: + # We only want to know if the request succeed + # don't download the whole file + self._request_webpage( + HEADRequest(url), track_id, + 'Trying %s URL' % ext) + return True + except ExtractorError: + return False def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -72,15 +67,10 @@ class MixcloudIE(InfoExtractor): preview_url = self._search_regex( r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url') song_url = preview_url.replace('/previews/', '/c/originals/') - server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number')) - template_url = re.sub(r'(stream\d*)', 'stream%d', song_url) - final_song_url = self._get_url(track_id, template_url, server_number) - if final_song_url is None: - self.to_screen('Trying with m4a extension') - template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') - final_song_url = self._get_url(track_id, template_url, server_number) - if final_song_url is None: - raise ExtractorError('Unable to extract track url') + if not self._check_url(song_url, track_id, 'mp3'): + song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/') + if not self._check_url(song_url, track_id, 'm4a'): + raise ExtractorError('Unable to extract track url') PREFIX = ( r'm-play-on-spacebar[^>]+' @@ -107,7 +97,7 @@ class MixcloudIE(InfoExtractor): return { 'id': track_id, 'title': title, - 'url': final_song_url, + 'url': song_url, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, @@ -115,35 +105,3 @@ class MixcloudIE(InfoExtractor): 'view_count': view_count, 'like_count': like_count, } - - -def server_numbers(first, boundaries): - """ Server numbers to try in descending order of probable availability. - Starting from first (i.e. the number of the server hosting the preview file) - and going further and further up to the higher boundary and down to the - lower one in an alternating fashion. Namely: - - server_numbers(2, (1, 5)) - - # Where the preview server is 2, min number is 1 and max is 5. - # Yields: 2, 3, 1, 4, 5 - - Why not random numbers or increasing sequences? Since from what I've seen, - full length files seem to be hosted on servers whose number is closer to - that of the preview; to be confirmed. - """ - zip_longest = getattr(itertools, 'zip_longest', None) - if zip_longest is None: - # python 2.x - zip_longest = itertools.izip_longest - - if len(boundaries) != 2: - raise ValueError("boundaries should be a two-element tuple") - min, max = boundaries - highs = range(first + 1, max + 1) - lows = range(first - 1, min - 1, -1) - rest = filter( - None, itertools.chain.from_iterable(zip_longest(highs, lows))) - yield first - for n in rest: - yield n From 214e74bf6f6eee15cf85c9d5a4defce14b56d586 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 17 Apr 2015 19:24:30 +0200 Subject: [PATCH 0358/2721] [soundcloud] Raise an error instead of calling 'report_error' --- youtube_dl/extractor/soundcloud.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 7efc6aff1..183ff50f4 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -279,9 +279,8 @@ class SoundcloudSetIE(SoundcloudIE): info = self._download_json(resolv_url, full_title) if 'errors' in info: - for err in info['errors']: - self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message'])) - return + msgs = (compat_str(err['error_message']) for err in info['errors']) + raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs)) return { '_type': 'playlist', From 7691a7a3bd77dd2c169a9a8592283d99a9266973 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 17 Apr 2015 23:41:07 +0600 Subject: [PATCH 0359/2721] [comedycentral] Fix feed uri request (Closes #5449, closes #5455) --- youtube_dl/extractor/comedycentral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index e5edcc84b..91ebb0ce5 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -201,7 +201,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor): uri = mMovieParams[0][1] # Correct cc.com in uri - uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri) + uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri) index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri})) idoc = self._download_xml( From ce81b1411d182fbfe7bd6da7b875d50f37ae38d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 17 Apr 2015 22:29:30 +0200 Subject: [PATCH 0360/2721] FFmpegExtractAudioPP: Simplify handling of already existing files --- youtube_dl/postprocessor/ffmpeg.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 8e99a3c2c..4c4a038f9 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -264,15 +264,14 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): new_path = prefix + sep + extension # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. - if new_path == path: - self._nopostoverwrites = True + if (new_path == path or + (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))): + self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path) + return True, information try: - if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)): - self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path) - else: - self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path) - self.run_ffmpeg(path, new_path, acodec, more_opts) + self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path) + self.run_ffmpeg(path, new_path, acodec, more_opts) except AudioConversionError as e: raise PostProcessingError( 'audio conversion failed: ' + e.msg) @@ -286,7 +285,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): errnote='Cannot update utime of audio file') information['filepath'] = new_path - return self._nopostoverwrites, information + return False, information class FFmpegVideoConvertorPP(FFmpegPostProcessor): From ecc6bd1341cd03fd335058371c3efa63dfd31152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 17 Apr 2015 22:38:14 +0200 Subject: [PATCH 0361/2721] YoutubeDL.post_process: simplify keep_video handling Since keep_video started as None we always set it to keep_video_wish unless it was None, so in the end keep_video == keep_video_wish. This should have been changed in f3ff1a3696c4080468e2cc5810c34273b148bd3e, but I didn't notice it. --- youtube_dl/YoutubeDL.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index a68b24ab4..6ac85f4e7 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1486,16 +1486,9 @@ class YoutubeDL(object): pps_chain.extend(ie_info['__postprocessors']) pps_chain.extend(self._pps) for pp in pps_chain: - keep_video = None old_filename = info['filepath'] try: - keep_video_wish, info = pp.run(info) - if keep_video_wish is not None: - if keep_video_wish: - keep_video = keep_video_wish - elif keep_video is None: - # No clear decision yet, let IE decide - keep_video = keep_video_wish + keep_video, info = pp.run(info) except PostProcessingError as e: self.report_error(e.msg) if keep_video is False and not self.params.get('keepvideo', False): From f52e66505a9f9403d2278d22732ed98b711292fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Apr 2015 03:50:22 +0600 Subject: [PATCH 0362/2721] [gfycat] Simplify (Closes #5439, Closes #5394) --- youtube_dl/extractor/gfycat.py | 179 ++++++++++++++------------------- 1 file changed, 77 insertions(+), 102 deletions(-) diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py index 5e70ed3f6..397f1d42e 100644 --- a/youtube_dl/extractor/gfycat.py +++ b/youtube_dl/extractor/gfycat.py @@ -1,115 +1,90 @@ # coding: utf-8 - from __future__ import unicode_literals -import datetime - from .common import InfoExtractor +from ..utils import ( + int_or_none, + float_or_none, + qualities, +) + class GfycatIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)' - _TESTS = [ - { - 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', - 'info_dict': { - 'id': 'DeadlyDecisiveGermanpinscher', - 'title': 'Ghost in the Shell', - 'ext': 'mp4', - 'upload_date': '20140913' - } - },{ - 'url': 'http://gfycat.com/pleasinghilariouskusimanse', - 'info_dict': { - 'id': 'pleasinghilariouskusimanse', - 'title': 'PleasingHilariousKusimanse', - 'ext': 'webm', - 'upload_date': '20150412' - }, - 'params': { - 'format': 'webm', - }, - },{ - 'url': 'http://gfycat.com/requiredunkemptbuzzard', - 'info_dict': { - 'id': 'requiredunkemptbuzzard', - 'title': 'Headshot!', - 'ext': 'gif', - 'upload_date': '20150129' - }, - 'params': { - 'format': 'gif', - }, - }, - ] + _TEST = { + 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', + 'info_dict': { + 'id': 'DeadlyDecisiveGermanpinscher', + 'ext': 'mp4', + 'title': 'Ghost in the Shell', + 'timestamp': 1410656006, + 'upload_date': '20140914', + 'uploader': 'anonymous', + 'duration': 10.4, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'categories': list, + 'age_limit': 0, + } + } def _real_extract(self, url): video_id = self._match_id(url) - json = self._download_json("http://gfycat.com/cajax/get/" + video_id, video_id, 'Downloading video info')['gfyItem'] - - # Title - # Use user title first, else fallback to url formated name - if json['title']: - video_title = json['title'] - else: - video_title = json['gfyName'] - - # Formats - # Pref: mp4, webm, gif - formats = [{ - 'format_id': 'mp4', - 'ext': 'mp4', - 'url': json['mp4Url'], - 'width': json['width'], - 'height': json['height'], - 'fps': json['frameRate'], - 'filesize': json['mp4Size'], - 'preference': 2 - }, { - 'format_id': 'webm', - 'ext': 'webm', - 'url': json['webmUrl'], - 'width': json['width'], - 'height': json['height'], - 'fps': json['frameRate'], - 'filesize': json['webmSize'], - 'preference': 1 - }, { - 'format_id': 'gif', - 'ext': 'gif', - 'url': json['gifUrl'], - 'width': json['width'], - 'height': json['height'], - 'fps': json['frameRate'], - 'filesize': json['gifSize'], - 'preference': 0 - }] - + + gfy = self._download_json( + 'http://gfycat.com/cajax/get/%s' % video_id, + video_id, 'Downloading video info')['gfyItem'] + + title = gfy.get('title') or gfy['gfyName'] + description = gfy.get('description') + timestamp = int_or_none(gfy.get('createDate')) + uploader = gfy.get('userName') + view_count = int_or_none(gfy.get('views')) + like_count = int_or_none(gfy.get('likes')) + dislike_count = int_or_none(gfy.get('dislikes')) + age_limit = 18 if gfy.get('nsfw') == '1' else 0 + + width = int_or_none(gfy.get('width')) + height = int_or_none(gfy.get('height')) + fps = int_or_none(gfy.get('frameRate')) + num_frames = int_or_none(gfy.get('numFrames')) + + duration = float_or_none(num_frames, fps) if num_frames and fps else None + + categories = gfy.get('tags') or gfy.get('extraLemmas') or [] + + FORMATS = ('gif', 'webm', 'mp4') + quality = qualities(FORMATS) + + formats = [] + for format_id in FORMATS: + video_url = gfy.get('%sUrl' % format_id) + if not video_url: + continue + filesize = gfy.get('%sSize' % format_id) + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'width': width, + 'height': height, + 'fps': fps, + 'filesize': filesize, + 'quality': quality(format_id), + }) self._sort_formats(formats) - - # Date - date = datetime.datetime.fromtimestamp( - int(json['createDate']) - ).strftime('%Y%m%d') - - # Length - duration = json['numFrames'] / json['frameRate'] - - # Age limit - # 1 = nsfw / 0 = sfw - if json['nsfw'] == 1: - age_limit = 18 - else: - age_limit = 0 - + return { - 'id': video_id, - 'title': video_title, - 'formats': formats, - 'creator': json['userName'], - 'description': json['description'], - 'upload_date': date, - 'categories': json['tags'], - 'age_limit': age_limit, - 'duration': duration, - 'view_count': json['views'] + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'uploader': uploader, + 'duration': duration, + 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'categories': categories, + 'age_limit': age_limit, + 'formats': formats, } From bf12cbe07c3a22deb46848df3bd3242da645eab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Apr 2015 03:51:21 +0600 Subject: [PATCH 0363/2721] Credit @julianrichen for gfycat (#5440) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index db3f42b26..267b8da1e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -123,3 +123,4 @@ Will W. Mohammad Teimori Pabandi Roman Le Négrate Matthias Küch +Julian Richen From c62566971fb8877d5ac52bc613d3bd53fe87a8d8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 17 Apr 2015 00:08:52 +0800 Subject: [PATCH 0364/2721] [facebook] Extend _VALID_URL --- youtube_dl/extractor/facebook.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index f0e575320..f3a68b21d 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -24,8 +24,12 @@ class FacebookIE(InfoExtractor): _VALID_URL = r'''(?x) https?://(?:\w+\.)?facebook\.com/ (?:[^#]*?\#!/)? - (?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?) - (?:v|video_id)=(?P<id>[0-9]+) + (?: + (?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?) + (?:v|video_id)=| + [^/]+/videos/ + ) + (?P<id>[0-9]+) (?:.*)''' _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1' _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1' @@ -50,6 +54,9 @@ class FacebookIE(InfoExtractor): }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, + }, { + 'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf', + 'only_matching': True, }] def _login(self): From 53faa3ca5f62c46bb56c0a85d1ed87b911b7ffa4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 18 Apr 2015 16:08:24 +0800 Subject: [PATCH 0365/2721] [facebook] Extend _VALID_URL take 2 (#5120) --- youtube_dl/extractor/facebook.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index f3a68b21d..937b28fcc 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -27,7 +27,7 @@ class FacebookIE(InfoExtractor): (?: (?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?) (?:v|video_id)=| - [^/]+/videos/ + [^/]+/videos/(?:[^/]+/)? ) (?P<id>[0-9]+) (?:.*)''' @@ -57,6 +57,9 @@ class FacebookIE(InfoExtractor): }, { 'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf', 'only_matching': True, + }, { + 'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater', + 'only_matching': True, }] def _login(self): From 592e97e8550389e22b716eb33c30584aa3a8d656 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 18 Apr 2015 11:36:42 +0200 Subject: [PATCH 0366/2721] Postprocessors: use a list for the files that can be deleted We could only know if we had to delete the original file, but this system allows to specify us more files (like subtitles). --- test/test_YoutubeDL.py | 21 ++++++++---- youtube_dl/YoutubeDL.py | 13 +++---- youtube_dl/postprocessor/atomicparsley.py | 2 +- youtube_dl/postprocessor/common.py | 8 ++--- youtube_dl/postprocessor/execafterdownload.py | 2 +- youtube_dl/postprocessor/ffmpeg.py | 34 +++++++++---------- youtube_dl/postprocessor/metadatafromtitle.py | 2 +- youtube_dl/postprocessor/xattrpp.py | 4 +-- 8 files changed, 48 insertions(+), 38 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 652519831..820e55ec2 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -443,27 +443,36 @@ class TestYoutubeDL(unittest.TestCase): def run(self, info): with open(audiofile, 'wt') as f: f.write('EXAMPLE') - info['filepath'] - return False, info + return [info['filepath']], info - def run_pp(params): + def run_pp(params, PP): with open(filename, 'wt') as f: f.write('EXAMPLE') ydl = YoutubeDL(params) - ydl.add_post_processor(SimplePP()) + ydl.add_post_processor(PP()) ydl.post_process(filename, {'filepath': filename}) - run_pp({'keepvideo': True}) + run_pp({'keepvideo': True}, SimplePP) self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) os.unlink(filename) os.unlink(audiofile) - run_pp({'keepvideo': False}) + run_pp({'keepvideo': False}, SimplePP) self.assertFalse(os.path.exists(filename), '%s exists' % filename) self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) os.unlink(audiofile) + class ModifierPP(PostProcessor): + def run(self, info): + with open(info['filepath'], 'wt') as f: + f.write('MODIFIED') + return [], info + + run_pp({'keepvideo': False}, ModifierPP) + self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) + os.unlink(filename) + def test_match_filter(self): class FilterYDL(YDL): def __init__(self, *args, **kwargs): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 6ac85f4e7..8d8b146b2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1488,15 +1488,16 @@ class YoutubeDL(object): for pp in pps_chain: old_filename = info['filepath'] try: - keep_video, info = pp.run(info) + files_to_delete, info = pp.run(info) except PostProcessingError as e: self.report_error(e.msg) - if keep_video is False and not self.params.get('keepvideo', False): - try: + if files_to_delete and not self.params.get('keepvideo', False): + for old_filename in files_to_delete: self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) - os.remove(encodeFilename(old_filename)) - except (IOError, OSError): - self.report_warning('Unable to remove downloaded video file') + try: + os.remove(encodeFilename(old_filename)) + except (IOError, OSError): + self.report_warning('Unable to remove downloaded original file') def _make_archive_id(self, info_dict): # Future-proof against any change in case diff --git a/youtube_dl/postprocessor/atomicparsley.py b/youtube_dl/postprocessor/atomicparsley.py index a5dfc136a..e4e198695 100644 --- a/youtube_dl/postprocessor/atomicparsley.py +++ b/youtube_dl/postprocessor/atomicparsley.py @@ -59,4 +59,4 @@ class AtomicParsleyPP(PostProcessor): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return True, info + return [], info diff --git a/youtube_dl/postprocessor/common.py b/youtube_dl/postprocessor/common.py index ef9fdfa19..3b0e8ddd8 100644 --- a/youtube_dl/postprocessor/common.py +++ b/youtube_dl/postprocessor/common.py @@ -42,14 +42,14 @@ class PostProcessor(object): one has an extra field called "filepath" that points to the downloaded file. - This method returns a tuple, the first element of which describes - whether the original file should be kept (i.e. not deleted - None for - no preference), and the second of which is the updated information. + This method returns a tuple, the first element is a list of the files + that can be deleted, and the second of which is the updated + information. In addition, this method may raise a PostProcessingError exception if post processing fails. """ - return None, information # by default, keep file and do nothing + return [], information # by default, keep file and do nothing def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'): try: diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py index 75c0f7bbe..341437575 100644 --- a/youtube_dl/postprocessor/execafterdownload.py +++ b/youtube_dl/postprocessor/execafterdownload.py @@ -25,4 +25,4 @@ class ExecAfterDownloadPP(PostProcessor): raise PostProcessingError( 'Command returned error code %d' % retCode) - return None, information # by default, keep file and do nothing + return [], information diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 4c4a038f9..4cdbfce63 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -267,7 +267,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): if (new_path == path or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))): self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path) - return True, information + return [], information try: self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path) @@ -285,7 +285,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): errnote='Cannot update utime of audio file') information['filepath'] = new_path - return False, information + return [path], information class FFmpegVideoConvertorPP(FFmpegPostProcessor): @@ -299,13 +299,13 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor): outpath = prefix + sep + self._preferedformat if information['ext'] == self._preferedformat: self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat)) - return True, information + return [], information self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath) self.run_ffmpeg(path, outpath, []) information['filepath'] = outpath information['format'] = self._preferedformat information['ext'] = self._preferedformat - return False, information + return [path], information class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): @@ -505,11 +505,11 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): def run(self, information): if information['ext'] != 'mp4': self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files') - return True, information + return [], information subtitles = information.get('requested_subtitles') if not subtitles: self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed') - return True, information + return [], information sub_langs = list(subtitles.keys()) filename = information['filepath'] @@ -535,7 +535,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return True, information + return [], information class FFmpegMetadataPP(FFmpegPostProcessor): @@ -561,7 +561,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): if not metadata: self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add') - return True, info + return [], info filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') @@ -578,7 +578,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): self.run_ffmpeg(filename, temp_filename, options) os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return True, info + return [], info class FFmpegMergerPP(FFmpegPostProcessor): @@ -587,7 +587,7 @@ class FFmpegMergerPP(FFmpegPostProcessor): args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0'] self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) - return True, info + return [], info class FFmpegAudioFixPP(FFmpegPostProcessor): @@ -602,14 +602,14 @@ class FFmpegAudioFixPP(FFmpegPostProcessor): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return True, info + return [], info class FFmpegFixupStretchedPP(FFmpegPostProcessor): def run(self, info): stretched_ratio = info.get('stretched_ratio') if stretched_ratio is None or stretched_ratio == 1: - return True, info + return [], info filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') @@ -621,13 +621,13 @@ class FFmpegFixupStretchedPP(FFmpegPostProcessor): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return True, info + return [], info class FFmpegFixupM4aPP(FFmpegPostProcessor): def run(self, info): if info.get('container') != 'm4a_dash': - return True, info + return [], info filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') @@ -639,7 +639,7 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return True, info + return [], info class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): @@ -656,7 +656,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): new_format = 'webvtt' if subs is None: self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert') - return True, info + return [], info self._downloader.to_screen('[ffmpeg] Converting subtitles') for lang, sub in subs.items(): ext = sub['ext'] @@ -676,4 +676,4 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): 'data': f.read(), } - return True, info + return [], info diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py index 5019433d3..a56077f20 100644 --- a/youtube_dl/postprocessor/metadatafromtitle.py +++ b/youtube_dl/postprocessor/metadatafromtitle.py @@ -44,4 +44,4 @@ class MetadataFromTitlePP(PostProcessor): info[attribute] = value self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value) - return True, info + return [], info diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py index f6c63fe97..0cba99fc3 100644 --- a/youtube_dl/postprocessor/xattrpp.py +++ b/youtube_dl/postprocessor/xattrpp.py @@ -105,8 +105,8 @@ class XAttrMetadataPP(PostProcessor): byte_value = value.encode('utf-8') write_xattr(filename, xattrname, byte_value) - return True, info + return [], info except (subprocess.CalledProcessError, OSError): self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)") - return False, info + return [], info From 14523ed9695975704fae441a3518daf8b0e382fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 18 Apr 2015 11:44:42 +0200 Subject: [PATCH 0367/2721] FFmpegEmbedSubtitlePP: remove the subtitle files if '--keep-video' is not given (closes #5435) --- youtube_dl/postprocessor/ffmpeg.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 4cdbfce63..852dc3e44 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -513,7 +513,8 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): sub_langs = list(subtitles.keys()) filename = information['filepath'] - input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()] + sub_filenames = [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()] + input_files = [filename] + sub_filenames opts = [ '-map', '0', @@ -535,7 +536,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return [], information + return sub_filenames, information class FFmpegMetadataPP(FFmpegPostProcessor): From d47aeb2252aa0289199bc3ea9bb533997051ec0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 18 Apr 2015 11:52:36 +0200 Subject: [PATCH 0368/2721] FFmpegMergerPP: use the new system for specifying which files can be delete --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/postprocessor/ffmpeg.py | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 8d8b146b2..944571881 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1361,7 +1361,7 @@ class YoutubeDL(object): if info_dict.get('requested_formats') is not None: downloaded = [] success = True - merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) + merger = FFmpegMergerPP(self) if not merger.available: postprocessors = [] self.report_warning('You have requested multiple ' diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 852dc3e44..4d619236e 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -28,9 +28,8 @@ class FFmpegPostProcessorError(PostProcessingError): class FFmpegPostProcessor(PostProcessor): - def __init__(self, downloader=None, deletetempfiles=False): + def __init__(self, downloader=None): PostProcessor.__init__(self, downloader) - self._deletetempfiles = deletetempfiles self._determine_executables() def check_version(self): @@ -148,10 +147,6 @@ class FFmpegPostProcessor(PostProcessor): raise FFmpegPostProcessorError(msg) self.try_utime(out_path, oldest_mtime, oldest_mtime) - if self._deletetempfiles: - for ipath in input_paths: - os.remove(ipath) - def run_ffmpeg(self, path, out_path, opts): self.run_ffmpeg_multiple_files([path], out_path, opts) @@ -588,7 +583,7 @@ class FFmpegMergerPP(FFmpegPostProcessor): args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0'] self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) - return [], info + return info['__files_to_merge'], info class FFmpegAudioFixPP(FFmpegPostProcessor): From cc36e2295a19796380e4a4f2abf618e727617efe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 18 Apr 2015 13:27:35 +0200 Subject: [PATCH 0369/2721] [ign] Fix extraction of some videos in articles Give higher preference to the hero-poster regex because some articles may contain other videos --- youtube_dl/extractor/ign.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index 3aade9e74..bf2d2041b 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -61,7 +61,7 @@ class IGNIE(InfoExtractor): }, { 'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch', - 'md5': '4e9a0bda1e5eebd31ddcf86ec0b9b3c7', + 'md5': '618fedb9c901fd086f6f093564ef8558', 'info_dict': { 'id': '078fdd005f6d3c02f63d795faa1b984f', 'ext': 'mp4', @@ -77,10 +77,10 @@ class IGNIE(InfoExtractor): def _find_video_id(self, webpage): res_id = [ r'"video_id"\s*:\s*"(.*?)"', + r'class="hero-poster[^"]*?"[^>]*id="(.+?)"', r'data-video-id="(.+?)"', r'<object id="vid_(.+?)"', r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"', - r'class="hero-poster[^"]*?"[^>]*id="(.+?)"', ] return self._search_regex(res_id, webpage, 'video id') From 8f4e8bf28080a7d6e969fea66e59afd3a1b2f085 Mon Sep 17 00:00:00 2001 From: hedii <hedi.chaibs@gmail.com> Date: Sat, 18 Apr 2015 15:40:40 +0200 Subject: [PATCH 0370/2721] Update wat.py line 116, modify 'Downloding' to 'Downloading'. It looks like nothing, but it is very annoying when youtube-dl command's output is parsed to find progress on a php (or other language) website for example. --- youtube_dl/extractor/wat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index bf9e40bad..affcc52f6 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -113,7 +113,7 @@ class WatIE(InfoExtractor): video_url = self._download_webpage( 'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country), real_id, - 'Downloding %s video URL' % fmt[0], + 'Downloading %s video URL' % fmt[0], 'Failed to download %s video URL' % fmt[0], False) if not video_url: From fec2d97ca2f1aa8b64c24b28d8c63cab052e9db4 Mon Sep 17 00:00:00 2001 From: Jeff Buchbinder <jeff@ourexchange.net> Date: Thu, 15 Jan 2015 21:28:57 -0500 Subject: [PATCH 0371/2721] Add megavideoz.eu support. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/megavideozeu.py | 39 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 youtube_dl/extractor/megavideozeu.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9e9e20589..6657695cb 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -274,6 +274,7 @@ from .macgamestore import MacGameStoreIE from .mailru import MailRuIE from .malemotion import MalemotionIE from .mdr import MDRIE +from .megavideozeu import MegavideozeuIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE diff --git a/youtube_dl/extractor/megavideozeu.py b/youtube_dl/extractor/megavideozeu.py new file mode 100644 index 000000000..e77b5f734 --- /dev/null +++ b/youtube_dl/extractor/megavideozeu.py @@ -0,0 +1,39 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_filesize, + unified_strdate, +) + + +class MegavideozeuIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?megavideoz\.eu/video/(?P<id>.*)(?:.*)' + + def _real_extract(self, url): + tmp_video_id = self._match_id(url) + + webpage = self._download_webpage(url, tmp_video_id) + + config_php = self._html_search_regex( + r'var cnf = \'([^\']+)\'', webpage, 'config.php url') + + configpage = self._download_webpage(config_php, tmp_video_id) + + video_id = self._html_search_regex( + r'<mediaid>([^<]+)', configpage, 'video id') + video_url = self._html_search_regex( + r'<file>([^<]+)', configpage, 'video URL') + title = self._html_search_regex( + r'<title><!\[CDATA\[([^\]]+)', configpage, 'title') + duration = int_or_none(self._html_search_regex( + r'<duration>([0-9]+)', configpage, 'duration', fatal=False)) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'duration': duration + } From f32cb5cb147e8e6f05625ed6b5880eb566101f03 Mon Sep 17 00:00:00 2001 From: Jeff Buchbinder <jeff@ourexchange.net> Date: Fri, 17 Apr 2015 11:25:01 -0400 Subject: [PATCH 0372/2721] [megavideoez] Add working test --- youtube_dl/extractor/megavideozeu.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/megavideozeu.py b/youtube_dl/extractor/megavideozeu.py index e77b5f734..ee26b0f2e 100644 --- a/youtube_dl/extractor/megavideozeu.py +++ b/youtube_dl/extractor/megavideozeu.py @@ -11,6 +11,18 @@ from ..utils import ( class MegavideozeuIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?megavideoz\.eu/video/(?P<id>.*)(?:.*)' + _TESTS = [ + { + 'url': 'http://megavideoz.eu/video/WM6UB919XMXH/SMPTE-Universal-Film-Leader', + 'info_dict': { + 'id': '48723', + 'ext': 'mp4', + 'duration': '10', + 'title': 'SMPTE Universal Film Leader', + } + } + ] + def _real_extract(self, url): tmp_video_id = self._match_id(url) @@ -29,7 +41,7 @@ class MegavideozeuIE(InfoExtractor): title = self._html_search_regex( r'<title><!\[CDATA\[([^\]]+)', configpage, 'title') duration = int_or_none(self._html_search_regex( - r'<duration>([0-9]+)', configpage, 'duration', fatal=False)) + r'<duration>([0-9\.]+)', configpage, 'duration', fatal=False)) return { 'id': video_id, From 31f224008e39b629d793a8fc3f8fafc7a4baf417 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Apr 2015 04:07:45 +0600 Subject: [PATCH 0373/2721] [megavideozeu] Simplify (Closes #5454) --- youtube_dl/extractor/megavideozeu.py | 59 ++++++++++++++-------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/megavideozeu.py b/youtube_dl/extractor/megavideozeu.py index ee26b0f2e..f98080caa 100644 --- a/youtube_dl/extractor/megavideozeu.py +++ b/youtube_dl/extractor/megavideozeu.py @@ -1,51 +1,52 @@ # encoding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( - int_or_none, - parse_filesize, - unified_strdate, + float_or_none, + xpath_text, ) class MegavideozeuIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?megavideoz\.eu/video/(?P<id>.*)(?:.*)' - _TESTS = [ - { - 'url': 'http://megavideoz.eu/video/WM6UB919XMXH/SMPTE-Universal-Film-Leader', - 'info_dict': { - 'id': '48723', - 'ext': 'mp4', - 'duration': '10', - 'title': 'SMPTE Universal Film Leader', - } + _VALID_URL = r'https?://(?:www\.)?megavideoz\.eu/video/(?P<id>[^/]+)(?:/(?P<display_id>[^/]+))?' + _TEST = { + 'url': 'http://megavideoz.eu/video/WM6UB919XMXH/SMPTE-Universal-Film-Leader', + 'info_dict': { + 'id': '48723', + 'display_id': 'SMPTE-Universal-Film-Leader', + 'ext': 'mp4', + 'title': 'SMPTE Universal Film Leader', + 'thumbnail': 're:https?://.*?\.jpg', + 'duration': 10.93, } - ] - + } def _real_extract(self, url): - tmp_video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id - webpage = self._download_webpage(url, tmp_video_id) + webpage = self._download_webpage(url, display_id) - config_php = self._html_search_regex( - r'var cnf = \'([^\']+)\'', webpage, 'config.php url') + config = self._download_xml( + self._search_regex( + r"var\s+cnf\s*=\s*'([^']+)'", webpage, 'cnf url'), + display_id) - configpage = self._download_webpage(config_php, tmp_video_id) - - video_id = self._html_search_regex( - r'<mediaid>([^<]+)', configpage, 'video id') - video_url = self._html_search_regex( - r'<file>([^<]+)', configpage, 'video URL') - title = self._html_search_regex( - r'<title><!\[CDATA\[([^\]]+)', configpage, 'title') - duration = int_or_none(self._html_search_regex( - r'<duration>([0-9\.]+)', configpage, 'duration', fatal=False)) + video_url = xpath_text(config, './file', 'video url', fatal=True) + title = xpath_text(config, './title', 'title', fatal=True) + thumbnail = xpath_text(config, './image', 'thumbnail') + duration = float_or_none(xpath_text(config, './duration', 'duration')) + video_id = xpath_text(config, './mediaid', 'video id') or video_id return { 'id': video_id, + 'display_id': display_id, 'url': video_url, 'title': title, + 'thumbnail': thumbnail, 'duration': duration } From cc9b9df0b680da1cf610da57fbf397d8ebd75e74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Apr 2015 04:08:29 +0600 Subject: [PATCH 0374/2721] [megavideozeu] Rename extractor --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/{megavideozeu.py => megavideoz.py} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename youtube_dl/extractor/{megavideozeu.py => megavideoz.py} (97%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6657695cb..1dabea7d6 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -274,7 +274,7 @@ from .macgamestore import MacGameStoreIE from .mailru import MailRuIE from .malemotion import MalemotionIE from .mdr import MDRIE -from .megavideozeu import MegavideozeuIE +from .megavideoz import MegaVideozIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE diff --git a/youtube_dl/extractor/megavideozeu.py b/youtube_dl/extractor/megavideoz.py similarity index 97% rename from youtube_dl/extractor/megavideozeu.py rename to youtube_dl/extractor/megavideoz.py index f98080caa..d80f3633e 100644 --- a/youtube_dl/extractor/megavideozeu.py +++ b/youtube_dl/extractor/megavideoz.py @@ -10,7 +10,7 @@ from ..utils import ( ) -class MegavideozeuIE(InfoExtractor): +class MegaVideozIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?megavideoz\.eu/video/(?P<id>[^/]+)(?:/(?P<display_id>[^/]+))?' _TEST = { 'url': 'http://megavideoz.eu/video/WM6UB919XMXH/SMPTE-Universal-Film-Leader', From 6e218b3f9a57f42756aa3016fa9dd530fb58c452 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Apr 2015 04:09:01 +0600 Subject: [PATCH 0375/2721] [megavideoz] Check non-existing videos --- youtube_dl/extractor/megavideoz.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/megavideoz.py b/youtube_dl/extractor/megavideoz.py index d80f3633e..a15b6a32a 100644 --- a/youtube_dl/extractor/megavideoz.py +++ b/youtube_dl/extractor/megavideoz.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, float_or_none, xpath_text, ) @@ -31,6 +32,9 @@ class MegaVideozIE(InfoExtractor): webpage = self._download_webpage(url, display_id) + if '>Video Not Found<' in webpage: + raise ExtractorError('Video %s does not exist' % video_id, expected=True) + config = self._download_xml( self._search_regex( r"var\s+cnf\s*=\s*'([^']+)'", webpage, 'cnf url'), From 163965d86188e5545caae4b5e33ff25278fee5e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Apr 2015 04:14:58 +0600 Subject: [PATCH 0376/2721] [megavideoz] Improve non-existing videos check --- youtube_dl/extractor/megavideoz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/megavideoz.py b/youtube_dl/extractor/megavideoz.py index a15b6a32a..af7ff07ea 100644 --- a/youtube_dl/extractor/megavideoz.py +++ b/youtube_dl/extractor/megavideoz.py @@ -32,7 +32,7 @@ class MegaVideozIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - if '>Video Not Found<' in webpage: + if any(p in webpage for p in ('>Video Not Found<', '>404 Error<')): raise ExtractorError('Video %s does not exist' % video_id, expected=True) config = self._download_xml( From 880ee801cf8a67643cd0a1130ea46cf3be046bf1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 19 Apr 2015 19:08:37 +0800 Subject: [PATCH 0377/2721] [tests] Allow multi_video to be tested as playlists --- test/helper.py | 2 +- test/test_download.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/helper.py b/test/helper.py index 12afdf184..e1129e58f 100644 --- a/test/helper.py +++ b/test/helper.py @@ -150,7 +150,7 @@ def expect_info_dict(self, got_dict, expected_dict): 'invalid value for field %s, expected %r, got %r' % (info_field, expected, got)) # Check for the presence of mandatory fields - if got_dict.get('_type') != 'playlist': + if got_dict.get('_type') not in ('playlist', 'multi_video'): for key in ('id', 'url', 'title', 'ext'): self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) # Check for mandatory fields that are automatically set by YoutubeDL diff --git a/test/test_download.py b/test/test_download.py index 6a149ae4f..1110357a7 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -153,7 +153,7 @@ def generator(test_case): break if is_playlist: - self.assertEqual(res_dict['_type'], 'playlist') + self.assertTrue(res_dict['_type'] in ['playlist', 'multi_video']) self.assertTrue('entries' in res_dict) expect_info_dict(self, res_dict, test_case.get('info_dict', {})) From 8b0e8990c241789d7deb1f5e27fc1aea00b4fa5e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 19 Apr 2015 19:12:23 +0800 Subject: [PATCH 0378/2721] [miomio] Replace the slow test case MioMio_1 takes about 25~35 seconds on information retrieval --- youtube_dl/extractor/miomio.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index d41195a96..a784fc5fb 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -31,6 +31,14 @@ class MioMioIE(InfoExtractor): 'title': '《动漫同人插画绘制》', }, 'playlist_mincount': 86, + 'skip': 'This video takes time too long for retrieving the URL', + }, { + 'url': 'http://www.miomio.tv/watch/cc173113/', + 'info_dict': { + 'id': '173113', + 'title': 'The New Macbook 2015 上手试玩与简评' + }, + 'playlist_mincount': 2, }] def _real_extract(self, url): From f158799bbe72e1fe99ec057cc968d3ec874fb1dd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 19 Apr 2015 19:19:44 +0800 Subject: [PATCH 0379/2721] [Sohu] Fix title extraction --- youtube_dl/extractor/sohu.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 11edf616a..f8a4840f7 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -47,6 +47,7 @@ class SohuIE(InfoExtractor): 'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml', 'info_dict': { 'id': '78910339', + 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', }, 'playlist': [{ 'md5': 'bdbfb8f39924725e6589c146bc1883ad', @@ -110,7 +111,7 @@ class SohuIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) + title = re.sub(r' - 搜狐视频$', '', self._og_search_title(webpage)) vid = self._html_search_regex( r'var vid ?= ?["\'](\d+)["\']', @@ -172,9 +173,10 @@ class SohuIE(InfoExtractor): info['id'] = video_id else: info = { - '_type': 'playlist', + '_type': 'multi_video', 'entries': playlist, 'id': video_id, + 'title': title, } return info From 5b5fbc0867f0eb73416c10c9d692fceee92b5766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 19 Apr 2015 16:56:22 +0200 Subject: [PATCH 0380/2721] Detect already merged videos Without the '--keep-video' option the two files would be downloaded again and even using the option, ffmpeg would be run again, which for some videos can take a long time. We use a temporary file with ffmpeg so that the final file only exists if it success --- youtube_dl/YoutubeDL.py | 25 +++++++++++++++---------- youtube_dl/postprocessor/ffmpeg.py | 4 +++- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 944571881..28cf1662e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1369,16 +1369,21 @@ class YoutubeDL(object): ' The formats won\'t be merged') else: postprocessors = [merger] - for f in info_dict['requested_formats']: - new_info = dict(info_dict) - new_info.update(f) - fname = self.prepare_filename(new_info) - fname = prepend_extension(fname, 'f%s' % f['format_id']) - downloaded.append(fname) - partial_success = dl(fname, new_info) - success = success and partial_success - info_dict['__postprocessors'] = postprocessors - info_dict['__files_to_merge'] = downloaded + if os.path.exists(encodeFilename(filename)): + self.to_screen( + '[download] %s has already been downloaded and ' + 'merged' % filename) + else: + for f in info_dict['requested_formats']: + new_info = dict(info_dict) + new_info.update(f) + fname = self.prepare_filename(new_info) + fname = prepend_extension(fname, 'f%s' % f['format_id']) + downloaded.append(fname) + partial_success = dl(fname, new_info) + success = success and partial_success + info_dict['__postprocessors'] = postprocessors + info_dict['__files_to_merge'] = downloaded else: # Just a single file success = dl(filename, info_dict) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 4d619236e..df6fb6665 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -580,9 +580,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor): class FFmpegMergerPP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0'] self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) - self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) + self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return info['__files_to_merge'], info From feccf29c876869f44a9c983977371073b9801a51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Apr 2015 01:14:10 +0600 Subject: [PATCH 0381/2721] [YoutubeDL] Make `bestvideo+bestaudio/best` default format when merger is available --- youtube_dl/YoutubeDL.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 28cf1662e..e5d497b3f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1091,7 +1091,11 @@ class YoutubeDL(object): req_format = self.params.get('format') if req_format is None: - req_format = 'best' + req_format_list = [] + if info_dict['extractor'] == 'youtube' and FFmpegMergerPP(self).available: + req_format_list.append('bestvideo+bestaudio') + req_format_list.append('best') + req_format = '/'.join(req_format_list) formats_to_download = [] if req_format == 'all': formats_to_download = formats From 81cd954a512e48fb5cccc3159cd0581088bff0b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Apr 2015 03:00:35 +0600 Subject: [PATCH 0382/2721] [YoutubeDL] Merge incompatible formats into mkv (#5456) --- youtube_dl/YoutubeDL.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e5d497b3f..5dd9d2430 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1373,12 +1373,34 @@ class YoutubeDL(object): ' The formats won\'t be merged') else: postprocessors = [merger] + + def compatible_formats(formats): + video, audio = formats + # Check extension + video_ext, audio_ext = audio.get('ext'), video.get('ext') + if video_ext and audio_ext: + COMPATIBLE_EXTS = ( + ('mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'), + ('webm') + ) + for exts in COMPATIBLE_EXTS: + if video_ext in exts and audio_ext in exts: + return True + # TODO: Check acodec/vcodec + return False + + requested_formats = info_dict['requested_formats'] + # Merge incompatible formats into mkv + if not compatible_formats(requested_formats): + filename = os.path.splitext(filename)[0] + '.mkv' + self.report_warning('You have requested formats uncompatible for merge. ' + 'The formats will be merged into mkv') if os.path.exists(encodeFilename(filename)): self.to_screen( '[download] %s has already been downloaded and ' 'merged' % filename) else: - for f in info_dict['requested_formats']: + for f in requested_formats: new_info = dict(info_dict) new_info.update(f) fname = self.prepare_filename(new_info) From 70947ea7b13d22a55756dc2b6d086e058edd91b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 18 Apr 2015 03:07:59 +0600 Subject: [PATCH 0383/2721] [parameters.json] Set default `format` parameter to `best` --- test/parameters.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/parameters.json b/test/parameters.json index cbff9bd16..48b5a062e 100644 --- a/test/parameters.json +++ b/test/parameters.json @@ -7,7 +7,7 @@ "forcethumbnail": false, "forcetitle": false, "forceurl": false, - "format": null, + "format": "best", "format_limit": null, "ignoreerrors": false, "listformats": null, From c0dea0a782a1035225ad25d556e398e2909f62db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 19 Apr 2015 22:33:52 +0600 Subject: [PATCH 0384/2721] [YoutubeDL] Respect explicit `--merge-format-output` for uncompatible formats as well --- youtube_dl/YoutubeDL.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5dd9d2430..3bb350e2a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1390,8 +1390,7 @@ class YoutubeDL(object): return False requested_formats = info_dict['requested_formats'] - # Merge incompatible formats into mkv - if not compatible_formats(requested_formats): + if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats): filename = os.path.splitext(filename)[0] + '.mkv' self.report_warning('You have requested formats uncompatible for merge. ' 'The formats will be merged into mkv') From a38050925958c48175bf56a988e9714edef8f12f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 19 Apr 2015 18:37:38 +0200 Subject: [PATCH 0385/2721] Move the documentation for the `--format` option to the manpage It's too big for beeing embedded in the help message and it's easier to edit in the markdown file. --- README.md | 19 +++++++++---------- youtube_dl/options.py | 27 +-------------------------- 2 files changed, 10 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index caa1478d9..68426b685 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ youtube-dl - download videos from youtube.com or other video platforms - [OPTIONS](#options) - [CONFIGURATION](#configuration) - [OUTPUT TEMPLATE](#output-template) +- [FORMAT SELECTION](#format-selection) - [VIDEO SELECTION](#video-selection) - [FAQ](#faq) - [DEVELOPER INSTRUCTIONS](#developer-instructions) @@ -184,16 +185,7 @@ which means you can modify it, redistribute it or use it however you like. --sleep-interval SECONDS Number of seconds to sleep before each download. ## Video Format Options: - -f, --format FORMAT Video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by - extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", - "worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]"). - This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, - vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a - question mark (?) after the operator. You can combine format filters, so -f "[height <=? 720][tbr>500]" selects up to 720p videos - (or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality. - Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio - of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f - bestvideo+bestaudio. + -f, --format FORMAT Video format code, see the "FORMAT SELECTION" for all the info --all-formats Download all available video formats --prefer-free-formats Prefer free video formats unless a specific one is requested --max-quality FORMAT Highest quality format to download @@ -271,6 +263,13 @@ $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filena youtube-dl_test_video_.mp4 # A simple file name ``` +# FORMAT SELECTION + +By default youtube-dl tries to download the best quality, but sometimes you may want to download other format. +The simplest case is requesting a specific format, for example `-f 22`. You can get the list of available formats using `--list-formats`, you can also use a file extension (currently it supports aac, m4a, mp3, mp4, ogg, wav, webm) or the special names `best`, `bestvideo`, `bestaudio` and `worst`. + +If you are want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`. + # VIDEO SELECTION Videos can be filtered by their upload date using the options `--date`, `--datebefore` or `--dateafter`, they accept dates in two formats: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 11603f60d..39c38c980 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -322,32 +322,7 @@ def parseOpts(overrideArguments=None): video_format.add_option( '-f', '--format', action='store', dest='format', metavar='FORMAT', default=None, - help=( - 'Video format code, specify the order of preference using' - ' slashes, as in -f 22/17/18 . ' - ' Instead of format codes, you can select by extension for the ' - 'extensions aac, m4a, mp3, mp4, ogg, wav, webm. ' - 'You can also use the special names "best",' - ' "bestvideo", "bestaudio", "worst". ' - ' You can filter the video results by putting a condition in' - ' brackets, as in -f "best[height=720]"' - ' (or -f "[filesize>10M]"). ' - ' This works for filesize, height, width, tbr, abr, vbr, asr, and fps' - ' and the comparisons <, <=, >, >=, =, !=' - ' and for ext, acodec, vcodec, container, and protocol' - ' and the comparisons =, != .' - ' Formats for which the value is not known are excluded unless you' - ' put a question mark (?) after the operator.' - ' You can combine format filters, so ' - '-f "[height <=? 720][tbr>500]" ' - 'selects up to 720p videos (or videos where the height is not ' - 'known) with a bitrate of at least 500 KBit/s.' - ' By default, youtube-dl will pick the best quality.' - ' Use commas to download multiple audio formats, such as' - ' -f 136/137/mp4/bestvideo,140/m4a/bestaudio.' - ' You can merge the video and audio of two formats into a single' - ' file using -f <video-format>+<audio-format> (requires ffmpeg or' - ' avconv), for example -f bestvideo+bestaudio.')) + help='Video format code, see the "FORMAT SELECTION" for all the info') video_format.add_option( '--all-formats', action='store_const', dest='format', const='all', From 7b071e317bd79c81fac0c529d021da381801c3ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 19 Apr 2015 18:52:01 +0200 Subject: [PATCH 0386/2721] README: document bestvideo+bestaudio/best (#5447) --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 68426b685..7dbe66995 100644 --- a/README.md +++ b/README.md @@ -270,6 +270,8 @@ The simplest case is requesting a specific format, for example `-f 22`. You can If you are want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`. +youtube-dl uses `-f bestvideo+bestaudio/best` if ffmpeg or avconv are installed (`best` is needed for videos that don't come from YouTube because they don't provide the audio and video in two different files). If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. + # VIDEO SELECTION Videos can be filtered by their upload date using the options `--date`, `--datebefore` or `--dateafter`, they accept dates in two formats: From b524a001d68767298272ef59274c826a6ea6a1b7 Mon Sep 17 00:00:00 2001 From: Quentin Rameau <quinq@quinq.eu.org> Date: Mon, 20 Apr 2015 06:25:55 +0200 Subject: [PATCH 0387/2721] [bandcamp] fix video_id parsing (fixes #4861) --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 869294967..505877b77 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -72,7 +72,7 @@ class BandcampIE(InfoExtractor): download_link = m_download.group(1) video_id = self._search_regex( - r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$', + r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$', webpage, 'video id') download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') From 3ded7bac166c71f99b8d8036c072552c483d2364 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Apr 2015 21:13:31 +0600 Subject: [PATCH 0388/2721] [extractor/common] Add ability to specify custom field preference for `_sort_formats` --- youtube_dl/extractor/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7757bf950..71230323c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -708,7 +708,7 @@ class InfoExtractor(object): return self._html_search_meta('twitter:player', html, 'twitter card player') - def _sort_formats(self, formats): + def _sort_formats(self, formats, field_preference=None): if not formats: raise ExtractorError('No video formats found') @@ -718,6 +718,9 @@ class InfoExtractor(object): if not f.get('ext') and 'url' in f: f['ext'] = determine_ext(f['url']) + if isinstance(field_preference, (list, tuple)): + return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference) + preference = f.get('preference') if preference is None: proto = f.get('protocol') From 736785ab63104b4d285545e4755fa03ef4496a47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Apr 2015 21:42:20 +0600 Subject: [PATCH 0389/2721] [ted] Clarify audio/video-only formats --- youtube_dl/extractor/ted.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index a2dc14c2b..2c8acfef6 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -194,14 +194,18 @@ class TEDIE(InfoExtractor): 'tbr': int_or_none(resource.get('bitrate')), }) elif format_id == 'hls': - formats.extend(self._extract_m3u8_formats( - resources.get('stream'), video_name, 'mp4', m3u8_id=format_id)) + hls_formats = self._extract_m3u8_formats( + resources.get('stream'), video_name, 'mp4', m3u8_id=format_id) + for f in hls_formats: + f['acodec'] = 'none' + formats.extend(hls_formats) audio_download = talk_info.get('audioDownload') if audio_download: formats.append({ 'url': audio_download, 'format_id': 'audio', + 'vcodec': 'none', }) self._sort_formats(formats) From cfbee8a431fdc22ff98dc115a59e0a48cace2c0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Apr 2015 21:42:42 +0600 Subject: [PATCH 0390/2721] [ted] Clarify IE_NAME --- youtube_dl/extractor/ted.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 2c8acfef6..e2cbbc121 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -10,6 +10,7 @@ from ..utils import int_or_none class TEDIE(InfoExtractor): + IE_NAME = 'ted' _VALID_URL = r'''(?x) (?P<proto>https?://) (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/ From 17c8675853f679d3671c1558f6b2f060f2c8e23a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Apr 2015 21:58:29 +0600 Subject: [PATCH 0391/2721] [YoutubeDL] Allow bestvideo+bestaudio/best strategy for ted extractor --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 3bb350e2a..b5fddb8e7 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1092,7 +1092,7 @@ class YoutubeDL(object): req_format = self.params.get('format') if req_format is None: req_format_list = [] - if info_dict['extractor'] == 'youtube' and FFmpegMergerPP(self).available: + if info_dict['extractor'] in ['youtube', 'ted'] and FFmpegMergerPP(self).available: req_format_list.append('bestvideo+bestaudio') req_format_list.append('best') req_format = '/'.join(req_format_list) From 6728187ac0b1d5d083e8654e35a18d33485f2614 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Apr 2015 21:58:46 +0600 Subject: [PATCH 0392/2721] [YoutubeDL] mp3 is compatible with mp4 --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b5fddb8e7..5b2c3aa38 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1380,7 +1380,7 @@ class YoutubeDL(object): video_ext, audio_ext = audio.get('ext'), video.get('ext') if video_ext and audio_ext: COMPATIBLE_EXTS = ( - ('mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'), + ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'), ('webm') ) for exts in COMPATIBLE_EXTS: From 0f0b5736dab5e45189d1b35e0b7074585edd057b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Apr 2015 22:01:02 +0600 Subject: [PATCH 0393/2721] [ted] Fix hls audio/video-only formats --- youtube_dl/extractor/ted.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index e2cbbc121..c788feb5d 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -198,7 +198,10 @@ class TEDIE(InfoExtractor): hls_formats = self._extract_m3u8_formats( resources.get('stream'), video_name, 'mp4', m3u8_id=format_id) for f in hls_formats: - f['acodec'] = 'none' + if not f.get('height'): + f['vcodec'] = 'none' + else: + f['acodec'] = 'none' formats.extend(hls_formats) audio_download = talk_info.get('audioDownload') From 14f7abfa71bc71f46a5e3306062b1feccc4ccd26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Apr 2015 22:04:17 +0600 Subject: [PATCH 0394/2721] [ted] Lower preference for direct audio since it's mono --- youtube_dl/extractor/ted.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index c788feb5d..8aa27d282 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -210,6 +210,7 @@ class TEDIE(InfoExtractor): 'url': audio_download, 'format_id': 'audio', 'vcodec': 'none', + 'preference': -0.5, }) self._sort_formats(formats) From 6621ca39a3f03e44648b54bd8ff57814362f5dbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Apr 2015 22:04:42 +0600 Subject: [PATCH 0395/2721] [ted] Skip hls quality selection format --- youtube_dl/extractor/ted.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 8aa27d282..a48d77c30 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -198,6 +198,8 @@ class TEDIE(InfoExtractor): hls_formats = self._extract_m3u8_formats( resources.get('stream'), video_name, 'mp4', m3u8_id=format_id) for f in hls_formats: + if f.get('format_id') == 'hls-meta': + continue if not f.get('height'): f['vcodec'] = 'none' else: From bda44f31a1e18d71dc1cc7a2a3a754593ecd63d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Apr 2015 22:33:35 +0600 Subject: [PATCH 0396/2721] [bambuser] Modernize --- youtube_dl/extractor/bambuser.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index c193e66ca..12673e766 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -35,12 +35,11 @@ class BambuserIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - info_url = ('http://player-c.api.bambuser.com/getVideo.json?' - '&api_key=%s&vid=%s' % (self._API_KEY, video_id)) - info_json = self._download_webpage(info_url, video_id) - info = json.loads(info_json)['result'] + video_id = self._match_id(url) + + info = self._download_json( + 'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s' + % (self._API_KEY, video_id), video_id)['result'] return { 'id': video_id, From ae8953409e30ef9d7dd6bc40350fccc2a29ffb4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Apr 2015 22:35:53 +0600 Subject: [PATCH 0397/2721] [bambuser] Capture and output error message (#5478) --- youtube_dl/extractor/bambuser.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 12673e766..d52302ebc 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -1,13 +1,11 @@ from __future__ import unicode_literals import re -import json import itertools from .common import InfoExtractor -from ..compat import ( - compat_urllib_request, -) +from ..compat import compat_urllib_request +from ..utils import ExtractorError class BambuserIE(InfoExtractor): @@ -39,17 +37,24 @@ class BambuserIE(InfoExtractor): info = self._download_json( 'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s' - % (self._API_KEY, video_id), video_id)['result'] + % (self._API_KEY, video_id), video_id) + + error = info.get('error') + if error: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error), expected=True) + + result = info['result'] return { 'id': video_id, - 'title': info['title'], - 'url': info['url'], - 'thumbnail': info.get('preview'), - 'duration': int(info['length']), - 'view_count': int(info['views_total']), - 'uploader': info['username'], - 'uploader_id': info['owner']['uid'], + 'title': result['title'], + 'url': result['url'], + 'thumbnail': result.get('preview'), + 'duration': int(result['length']), + 'view_count': int(result['views_total']), + 'uploader': result['username'], + 'uploader_id': result['owner']['uid'], } From edf421611928e48ac4fda778d6de8ea83585019d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Apr 2015 22:46:01 +0600 Subject: [PATCH 0398/2721] [bambuser] Modernize and extract more metadata --- youtube_dl/extractor/bambuser.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index d52302ebc..93913c3f4 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -4,8 +4,15 @@ import re import itertools from .common import InfoExtractor -from ..compat import compat_urllib_request -from ..utils import ExtractorError +from ..compat import ( + compat_urllib_request, + compat_str, +) +from ..utils import ( + ExtractorError, + int_or_none, + float_or_none, +) class BambuserIE(InfoExtractor): @@ -24,6 +31,9 @@ class BambuserIE(InfoExtractor): 'duration': 3741, 'uploader': 'pixelversity', 'uploader_id': '344706', + 'timestamp': 1382976692, + 'upload_date': '20131028', + 'view_count': int, }, 'params': { # It doesn't respect the 'Range' header, it would download the whole video @@ -51,10 +61,13 @@ class BambuserIE(InfoExtractor): 'title': result['title'], 'url': result['url'], 'thumbnail': result.get('preview'), - 'duration': int(result['length']), - 'view_count': int(result['views_total']), - 'uploader': result['username'], - 'uploader_id': result['owner']['uid'], + 'duration': int_or_none(result.get('length')), + 'uploader': result.get('username'), + 'uploader_id': compat_str(result.get('owner', {}).get('uid')), + 'timestamp': int_or_none(result.get('created')), + 'fps': float_or_none(result.get('framerate')), + 'view_count': int_or_none(result.get('views_total')), + 'comment_count': int_or_none(result.get('comment_count')), } From 006ce15a0cfbf79dedd2c59b1e17344b1a130e81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 20 Apr 2015 23:00:37 +0600 Subject: [PATCH 0399/2721] [bambuser] Add support for authentication (#5478) --- youtube_dl/extractor/bambuser.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py index 93913c3f4..8dff1d6e3 100644 --- a/youtube_dl/extractor/bambuser.py +++ b/youtube_dl/extractor/bambuser.py @@ -5,6 +5,7 @@ import itertools from .common import InfoExtractor from ..compat import ( + compat_urllib_parse, compat_urllib_request, compat_str, ) @@ -19,6 +20,8 @@ class BambuserIE(InfoExtractor): IE_NAME = 'bambuser' _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)' _API_KEY = '005f64509e19a868399060af746a00aa' + _LOGIN_URL = 'https://bambuser.com/user' + _NETRC_MACHINE = 'bambuser' _TEST = { 'url': 'http://bambuser.com/v/4050584', @@ -42,6 +45,34 @@ class BambuserIE(InfoExtractor): }, } + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + + login_form = { + 'form_id': 'user_login', + 'op': 'Log in', + 'name': username, + 'pass': password, + } + + request = compat_urllib_request.Request( + self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + request.add_header('Referer', self._LOGIN_URL) + response = self._download_webpage( + request, None, 'Logging in as %s' % username) + + login_error = self._html_search_regex( + r'(?s)<div class="messages error">(.+?)</div>', + response, 'login error', default=None) + if login_error: + raise ExtractorError( + 'Unable to login: %s' % login_error, expected=True) + + def _real_initialize(self): + self._login() + def _real_extract(self, url): video_id = self._match_id(url) From c6391cd587a26eb3d9bba7296be804f14612e919 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 21 Apr 2015 02:29:56 +0800 Subject: [PATCH 0400/2721] [Senate] Add new extractor (#5302) --- youtube_dl/downloader/f4m.py | 2 + youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/senateisvp.py | 129 +++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 youtube_dl/extractor/senateisvp.py diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 4ab000d67..b1a858c45 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -389,6 +389,8 @@ class F4mFD(FileDownloader): url = base_url + name if akamai_pv: url += '?' + akamai_pv.strip(';') + if info_dict.get('extra_param_to_segment_url'): + url += info_dict.get('extra_param_to_segment_url') frag_filename = '%s-%s' % (tmpfilename, name) try: success = http_dl.download(frag_filename, {'url': url}) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1dabea7d6..3bd6d1697 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -447,6 +447,7 @@ from .scivee import SciVeeIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE +from .senateisvp import SenateISVPIE from .servingsys import ServingSysIE from .sexu import SexuIE from .sexykarma import SexyKarmaIE diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py new file mode 100644 index 000000000..807979d13 --- /dev/null +++ b/youtube_dl/extractor/senateisvp.py @@ -0,0 +1,129 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..utils import ExtractorError +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) + + +class SenateISVPIE(InfoExtractor): + _COMM_MAP = [ + ["ag", "76440", "http://ag-f.akamaihd.net"], + ["aging", "76442", "http://aging-f.akamaihd.net"], + ["approps", "76441", "http://approps-f.akamaihd.net"], + ["armed", "76445", "http://armed-f.akamaihd.net"], + ["banking", "76446", "http://banking-f.akamaihd.net"], + ["budget", "76447", "http://budget-f.akamaihd.net"], + ["cecc", "76486", "http://srs-f.akamaihd.net"], + ["commerce", "80177", "http://commerce1-f.akamaihd.net"], + ["csce", "75229", "http://srs-f.akamaihd.net"], + ["dpc", "76590", "http://dpc-f.akamaihd.net"], + ["energy", "76448", "http://energy-f.akamaihd.net"], + ["epw", "76478", "http://epw-f.akamaihd.net"], + ["ethics", "76449", "http://ethics-f.akamaihd.net"], + ["finance", "76450", "http://finance-f.akamaihd.net"], + ["foreign", "76451", "http://foreign-f.akamaihd.net"], + ["govtaff", "76453", "http://govtaff-f.akamaihd.net"], + ["help", "76452", "http://help-f.akamaihd.net"], + ["indian", "76455", "http://indian-f.akamaihd.net"], + ["intel", "76456", "http://intel-f.akamaihd.net"], + ["intlnarc", "76457", "http://intlnarc-f.akamaihd.net"], + ["jccic", "85180", "http://jccic-f.akamaihd.net"], + ["jec", "76458", "http://jec-f.akamaihd.net"], + ["judiciary", "76459", "http://judiciary-f.akamaihd.net"], + ["rpc", "76591", "http://rpc-f.akamaihd.net"], + ["rules", "76460", "http://rules-f.akamaihd.net"], + ["saa", "76489", "http://srs-f.akamaihd.net"], + ["smbiz", "76461", "http://smbiz-f.akamaihd.net"], + ["srs", "75229", "http://srs-f.akamaihd.net"], + ["uscc", "76487", "http://srs-f.akamaihd.net"], + ["vetaff", "76462", "http://vetaff-f.akamaihd.net"], + ["arch", "", "http://ussenate-f.akamaihd.net/"] + ] + _IE_NAME = 'senate.gov' + _VALID_URL = r'http://www\.senate\.gov/isvp/\?(?P<qs>.+)' + _TESTS = [{ + 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', + 'md5': '7314c4b96dad66dd8e63dc3518ceaa6f', + 'info_dict': { + 'id': 'judiciary031715', + 'ext': 'flv', + 'title': 'Integrated Senate Video Player', + } + }, { + 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', + 'md5': '2917c827513700aa9b70eaebf25116da', + 'info_dict': { + 'id': 'commerce011514', + 'ext': 'flv', + 'title': 'Integrated Senate Video Player' + } + }, { + 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', + # checksum differs each time + 'info_dict': { + 'id': 'intel090613', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player' + } + }] + + def _get_info_for_comm(self, committee): + for entry in self._COMM_MAP: + if entry[0] == committee: + return entry[1:] + + def _real_extract(self, url): + qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs')) + if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): + raise ExtractorError('Invalid URL', expected=True) + + video_id = re.sub(r'.mp4$', '', qs['filename'][0]) + + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'<title>([^<]+)', webpage, video_id) + + video_type = qs['type'][0] + committee = video_type if video_type == 'arch' else qs['comm'][0] + stream_num, domain = self._get_info_for_comm(committee) + + formats = [] + if video_type == 'arch': + filename = video_id if '.' in video_id else video_id + '.mp4' + formats = [{ + # All parameters in the query string are necessary to prevent a 403 error + 'url': compat_urlparse.urljoin(domain, filename) + '?v=3.1.0&fp=&r=&g=', + }] + else: + hdcore_sign = '?hdcore=3.1.0' + url_params = (domain, video_id, stream_num) + f4m_url = '%s/z/%s_1@%s/manifest.f4m' % url_params + hdcore_sign + m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params + for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): + # URLs without the extra param induce an 404 error + entry.update({'extra_param_to_segment_url': hdcore_sign}) + formats.append(entry) + for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'): + mobj = re.search(r'(?P(?:-p|-b)).m3u8', entry['url']) + if mobj: + entry['format_id'] += mobj.group('tag') + formats.append(entry) + + self._sort_formats(formats) + + info_dict = { + 'id': video_id, + 'title': title, + } + + if len(formats) >= 1: + info_dict.update({'formats': formats}) + else: + info_dict.update(formats[0]) + + return info_dict From 24e21613b670efefd2f284c8e7027023d0a64399 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 21 Apr 2015 02:32:10 +0800 Subject: [PATCH 0401/2721] [bilibili] Capture the video-not-exist message --- youtube_dl/extractor/bilibili.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 75d744852..904d9a8b4 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( int_or_none, unified_strdate, + ExtractorError, ) @@ -30,6 +31,8 @@ class BiliBiliIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + if self._search_regex(r'(此视频不存在或被删除)', webpage, 'error message', default=None): + raise ExtractorError('The video does not exist or was deleted', expected=True) video_code = self._search_regex( r'(?s)
    (.*?)
    ', webpage, 'video code') From f91e1a8739a59bca1ced0bbc70f8cf9c3a33f778 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 21 Apr 2015 02:57:32 +0800 Subject: [PATCH 0402/2721] [Senate] Try to capture thumbnails --- youtube_dl/extractor/senateisvp.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index 807979d13..a93874cad 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -53,6 +53,7 @@ class SenateISVPIE(InfoExtractor): 'id': 'judiciary031715', 'ext': 'flv', 'title': 'Integrated Senate Video Player', + 'thumbnail': 're:^https?://.*\.(?:jpg|png)$', } }, { 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', @@ -87,6 +88,9 @@ class SenateISVPIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'([^<]+)', webpage, video_id) + poster = qs.get('poster') + if poster: + thumbnail = poster[0] video_type = qs['type'][0] committee = video_type if video_type == 'arch' else qs['comm'][0] @@ -119,6 +123,7 @@ class SenateISVPIE(InfoExtractor): info_dict = { 'id': video_id, 'title': title, + 'thumbnail': thumbnail, } if len(formats) >= 1: From 2fe1b5bd2add12d70717878704cd3f811af5d22c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 21 Apr 2015 03:18:38 +0800 Subject: [PATCH 0403/2721] [CSpan] Add detection for Senate ISVP. Closes #5302 --- youtube_dl/extractor/cspan.py | 18 +++++++++++++++++- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/senateisvp.py | 20 ++++++++++++++++++-- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 955119d40..7377ac7b9 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -7,7 +7,9 @@ from ..utils import ( int_or_none, unescapeHTML, find_xpath_attr, + smuggle_url, ) +from .senateisvp import SenateISVPIE class CSpanIE(InfoExtractor): @@ -40,6 +42,15 @@ class CSpanIE(InfoExtractor): 'title': 'General Motors Ignition Switch Recall', }, 'playlist_duration_sum': 14855, + }, { + # Video from senate.gov + 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', + 'md5': '7314c4b96dad66dd8e63dc3518ceaa6f', + 'info_dict': { + 'id': 'judiciary031715', + 'ext': 'flv', + 'title': 'Immigration Reforms Needed to Protect Skilled American Workers', + } }] def _real_extract(self, url): @@ -56,7 +67,7 @@ class CSpanIE(InfoExtractor): # present, otherwise this is a stripped version r'

    (.*?)

    ' ], - webpage, 'description', flags=re.DOTALL) + webpage, 'description', flags=re.DOTALL, default=None) info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id data = self._download_json(info_url, video_id) @@ -68,6 +79,11 @@ class CSpanIE(InfoExtractor): title = find_xpath_attr(doc, './/string', 'name', 'title').text thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text + senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) + if senate_isvp_url: + surl = smuggle_url(senate_isvp_url, {'force_title': title}) + return self.url_result(surl, 'SenateISVP', video_id, title) + files = data['video']['files'] entries = [{ diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e645d1bb3..ec4d0c210 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -35,6 +35,7 @@ from .rutv import RUTVIE from .smotri import SmotriIE from .condenast import CondeNastIE from .udn import UDNEmbedIE +from .senateisvp import SenateISVPIE class GenericIE(InfoExtractor): @@ -1365,6 +1366,11 @@ class GenericIE(InfoExtractor): return self.url_result( compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') + # Look for Senate ISVP iframe + senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) + if senate_isvp_url: + return self.url_result(surl, 'SenateISVP') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index a93874cad..23e1cd944 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + unsmuggle_url, +) from ..compat import ( compat_parse_qs, compat_urlparse, @@ -73,12 +76,22 @@ class SenateISVPIE(InfoExtractor): } }] + @staticmethod + def _search_iframe_url(webpage): + mobj = re.search( + r"]+src=['\"](?Phttp://www\.senate\.gov/isvp/\?[^'\"]+)['\"]", + webpage) + if mobj: + return mobj.group('url') + def _get_info_for_comm(self, committee): for entry in self._COMM_MAP: if entry[0] == committee: return entry[1:] def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs')) if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): raise ExtractorError('Invalid URL', expected=True) @@ -87,7 +100,10 @@ class SenateISVPIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'([^<]+)', webpage, video_id) + if smuggled_data.get('force_title'): + title = smuggled_data['force_title'] + else: + title = self._html_search_regex(r'([^<]+)', webpage, video_id) poster = qs.get('poster') if poster: thumbnail = poster[0] From 92dcba1e1cc411eecb64792e78ad8f3b125691c8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 21 Apr 2015 03:30:54 +0800 Subject: [PATCH 0404/2721] [CSpan] Fix test cases CSpan_1 and CSpan_2 --- youtube_dl/extractor/cspan.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 7377ac7b9..6bebcc65c 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -37,11 +37,14 @@ class CSpanIE(InfoExtractor): } }, { 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall', + 'md5': '446562a736c6bf97118e389433ed88d4', 'info_dict': { 'id': '342759', + 'ext': 'mp4', 'title': 'General Motors Ignition Switch Recall', + 'duration': 14848, + 'description': 'md5:70c7c3b8fa63fa60d42772440596034c' }, - 'playlist_duration_sum': 14855, }, { # Video from senate.gov 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', @@ -97,9 +100,14 @@ class CSpanIE(InfoExtractor): 'duration': int_or_none(f.get('length', {}).get('#text')), } for partnum, f in enumerate(files)] - return { - '_type': 'playlist', - 'entries': entries, - 'title': title, - 'id': video_id, - } + if len(entries) == 1: + entry = dict(entries[0]) + entry['id'] = video_id + return entry + else: + return { + '_type': 'playlist', + 'entries': entries, + 'title': title, + 'id': video_id, + } From 13a11b195ff00648ab5631db5b1ae7c46548c7fb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 21 Apr 2015 05:13:25 +0800 Subject: [PATCH 0405/2721] [SenateISVP] Fix tests Remove md5 sums. They differs from my PC and the travis worker. --- youtube_dl/extractor/senateisvp.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index 23e1cd944..fa6610261 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -51,7 +51,6 @@ class SenateISVPIE(InfoExtractor): _VALID_URL = r'http://www\.senate\.gov/isvp/\?(?P.+)' _TESTS = [{ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', - 'md5': '7314c4b96dad66dd8e63dc3518ceaa6f', 'info_dict': { 'id': 'judiciary031715', 'ext': 'flv', @@ -60,7 +59,6 @@ class SenateISVPIE(InfoExtractor): } }, { 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', - 'md5': '2917c827513700aa9b70eaebf25116da', 'info_dict': { 'id': 'commerce011514', 'ext': 'flv', @@ -105,8 +103,7 @@ class SenateISVPIE(InfoExtractor): else: title = self._html_search_regex(r'([^<]+)', webpage, video_id) poster = qs.get('poster') - if poster: - thumbnail = poster[0] + thumbnail = poster[0] if poster else None video_type = qs['type'][0] committee = video_type if video_type == 'arch' else qs['comm'][0] From da55dac047d63d48fd12247369b2fb858c4210ef Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 21 Apr 2015 05:22:23 +0800 Subject: [PATCH 0406/2721] [CSpan] Removed the md5 sum of CSpan_3 --- youtube_dl/extractor/cspan.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 6bebcc65c..d516b1402 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -48,7 +48,6 @@ class CSpanIE(InfoExtractor): }, { # Video from senate.gov 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers', - 'md5': '7314c4b96dad66dd8e63dc3518ceaa6f', 'info_dict': { 'id': 'judiciary031715', 'ext': 'flv', From 0954cd8aa4421b1844a30e99702c364f1fffc15f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 21 Apr 2015 13:48:02 +0800 Subject: [PATCH 0407/2721] [Cinemassacre] Add detection for videos from blip.tv --- youtube_dl/extractor/bliptv.py | 9 +++++++++ youtube_dl/extractor/generic.py | 10 ++++------ youtube_dl/extractor/screenwavemedia.py | 23 ++++++++++++++++++++++- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index b632ce967..fb56cd78d 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -102,6 +102,15 @@ class BlipTVIE(InfoExtractor): }, ] + @staticmethod + def _extract_url(webpage): + mobj = re.search(r']*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage) + if mobj: + return 'http://blip.tv/a/a-' + mobj.group(1) + mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage) + if mobj: + return mobj.group(1) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) lookup_id = mobj.group('lookup_id') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ec4d0c210..4946cc132 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -36,6 +36,7 @@ from .smotri import SmotriIE from .condenast import CondeNastIE from .udn import UDNEmbedIE from .senateisvp import SenateISVPIE +from .bliptv import BlipTVIE class GenericIE(InfoExtractor): @@ -1073,12 +1074,9 @@ class GenericIE(InfoExtractor): } # Look for embedded blip.tv player - mobj = re.search(r']*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage) - if mobj: - return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV') - mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage) - if mobj: - return self.url_result(mobj.group(1), 'BlipTV') + bliptv_url = BlipTVIE._extract_url(webpage) + if bliptv_url: + return self.url_result(bliptv_url, 'BlipTV') # Look for embedded condenast player matches = re.findall( diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index 6c9fdb7c1..b515b11b4 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -7,7 +7,9 @@ from .common import InfoExtractor from ..utils import ( int_or_none, unified_strdate, + ExtractorError ) +from .bliptv import BlipTVIE class ScreenwaveMediaIE(InfoExtractor): @@ -104,6 +106,20 @@ class CinemassacreIE(InfoExtractor): 'upload_date': '20131002', 'title': 'The Mummy’s Hand (1940)', }, + }, + { + 'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/', + 'md5': 'ca9b3c8dd5a66f9375daeb5135f5a3de', + 'info_dict': { + 'id': '4065369', + 'ext': 'flv', + 'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles', + 'upload_date': '20061207', + 'uploader': 'cinemassacre', + 'uploader_id': '250778', + 'timestamp': 1283233867, + 'description': 'md5:0a108c78d130676b207d0f6d029ecffd', + } } ] @@ -116,7 +132,12 @@ class CinemassacreIE(InfoExtractor): playerdata_url = self._search_regex( r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', - webpage, 'player data URL') + webpage, 'player data URL', default=None) + if not playerdata_url: + playerdata_url = BlipTVIE._extract_url(webpage) + if not playerdata_url: + raise ExtractorError('Unable to find player data') + video_title = self._html_search_regex( r'(?P<title>.+?)\|', webpage, 'title') video_description = self._html_search_regex( From e94443de80d20a62fccda23fa25abcfa3798243a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 21 Apr 2015 15:10:27 +0800 Subject: [PATCH 0408/2721] [Cinemassacre] Move to a standalone module --- youtube_dl/extractor/__init__.py | 3 +- youtube_dl/extractor/cinemassacre.py | 81 +++++++++++++++++++++++++ youtube_dl/extractor/screenwavemedia.py | 75 ----------------------- 3 files changed, 83 insertions(+), 76 deletions(-) create mode 100644 youtube_dl/extractor/cinemassacre.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3bd6d1697..a64afa1da 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -70,6 +70,7 @@ from .chirbit import ( ChirbitProfileIE, ) from .cinchcast import CinchcastIE +from .cinemassacre import CinemassacreIE from .clipfish import ClipfishIE from .cliphunter import CliphunterIE from .clipsyndicate import ClipsyndicateIE @@ -446,7 +447,7 @@ from .sbs import SBSIE from .scivee import SciVeeIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE -from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE +from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .senateisvp import SenateISVPIE from .servingsys import ServingSysIE from .sexu import SexuIE diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py new file mode 100644 index 000000000..c8bbebe1a --- /dev/null +++ b/youtube_dl/extractor/cinemassacre.py @@ -0,0 +1,81 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError +from .bliptv import BlipTVIE + + +class CinemassacreIE(InfoExtractor): + _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' + _TESTS = [ + { + 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', + 'md5': 'fde81fbafaee331785f58cd6c0d46190', + 'info_dict': { + 'id': 'Cinemassacre-19911', + 'ext': 'mp4', + 'upload_date': '20121110', + 'title': '“Angry Video Game Nerd: The Movie” – Trailer', + 'description': 'md5:fb87405fcb42a331742a0dce2708560b', + }, + }, + { + 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', + 'md5': 'd72f10cd39eac4215048f62ab477a511', + 'info_dict': { + 'id': 'Cinemassacre-521be8ef82b16', + 'ext': 'mp4', + 'upload_date': '20131002', + 'title': 'The Mummy’s Hand (1940)', + }, + }, + { + 'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/', + 'md5': 'ca9b3c8dd5a66f9375daeb5135f5a3de', + 'info_dict': { + 'id': '4065369', + 'ext': 'flv', + 'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles', + 'upload_date': '20061207', + 'uploader': 'cinemassacre', + 'uploader_id': '250778', + 'timestamp': 1283233867, + 'description': 'md5:0a108c78d130676b207d0f6d029ecffd', + } + } + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d') + + webpage = self._download_webpage(url, display_id) + + playerdata_url = self._search_regex( + r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', + webpage, 'player data URL', default=None) + if not playerdata_url: + playerdata_url = BlipTVIE._extract_url(webpage) + if not playerdata_url: + raise ExtractorError('Unable to find player data') + + video_title = self._html_search_regex( + r'<title>(?P<title>.+?)\|', webpage, 'title') + video_description = self._html_search_regex( + r'<div class="entry-content">(?P<description>.+?)</div>', + webpage, 'description', flags=re.DOTALL, fatal=False) + video_thumbnail = self._og_search_thumbnail(webpage) + + return { + '_type': 'url_transparent', + 'display_id': display_id, + 'title': video_title, + 'description': video_description, + 'upload_date': video_date, + 'thumbnail': video_thumbnail, + 'url': playerdata_url, + } diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index b515b11b4..74fb1983a 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -7,9 +7,7 @@ from .common import InfoExtractor from ..utils import ( int_or_none, unified_strdate, - ExtractorError ) -from .bliptv import BlipTVIE class ScreenwaveMediaIE(InfoExtractor): @@ -83,79 +81,6 @@ class ScreenwaveMediaIE(InfoExtractor): } -class CinemassacreIE(InfoExtractor): - _VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)' - _TESTS = [ - { - 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', - 'md5': 'fde81fbafaee331785f58cd6c0d46190', - 'info_dict': { - 'id': 'Cinemassacre-19911', - 'ext': 'mp4', - 'upload_date': '20121110', - 'title': '“Angry Video Game Nerd: The Movie” – Trailer', - 'description': 'md5:fb87405fcb42a331742a0dce2708560b', - }, - }, - { - 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', - 'md5': 'd72f10cd39eac4215048f62ab477a511', - 'info_dict': { - 'id': 'Cinemassacre-521be8ef82b16', - 'ext': 'mp4', - 'upload_date': '20131002', - 'title': 'The Mummy’s Hand (1940)', - }, - }, - { - 'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/', - 'md5': 'ca9b3c8dd5a66f9375daeb5135f5a3de', - 'info_dict': { - 'id': '4065369', - 'ext': 'flv', - 'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles', - 'upload_date': '20061207', - 'uploader': 'cinemassacre', - 'uploader_id': '250778', - 'timestamp': 1283233867, - 'description': 'md5:0a108c78d130676b207d0f6d029ecffd', - } - } - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d') - - webpage = self._download_webpage(url, display_id) - - playerdata_url = self._search_regex( - r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', - webpage, 'player data URL', default=None) - if not playerdata_url: - playerdata_url = BlipTVIE._extract_url(webpage) - if not playerdata_url: - raise ExtractorError('Unable to find player data') - - video_title = self._html_search_regex( - r'<title>(?P<title>.+?)\|', webpage, 'title') - video_description = self._html_search_regex( - r'<div class="entry-content">(?P<description>.+?)</div>', - webpage, 'description', flags=re.DOTALL, fatal=False) - video_thumbnail = self._og_search_thumbnail(webpage) - - return { - '_type': 'url_transparent', - 'display_id': display_id, - 'title': video_title, - 'description': video_description, - 'upload_date': video_date, - 'thumbnail': video_thumbnail, - 'url': playerdata_url, - } - - class TeamFourIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?' _TEST = { From 757cda0a96e5efd0c1fb86824a99e558a7c797b0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 21 Apr 2015 15:20:40 +0800 Subject: [PATCH 0409/2721] [Cinemassacre] Support Youtube embedded videos (fixes #5131) --- youtube_dl/extractor/cinemassacre.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index c8bbebe1a..cf0a7551b 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -33,6 +33,7 @@ class CinemassacreIE(InfoExtractor): }, }, { + # blip.tv embedded video 'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/', 'md5': 'ca9b3c8dd5a66f9375daeb5135f5a3de', 'info_dict': { @@ -45,6 +46,20 @@ class CinemassacreIE(InfoExtractor): 'timestamp': 1283233867, 'description': 'md5:0a108c78d130676b207d0f6d029ecffd', } + }, + { + # Youtube embedded video + 'url': 'http://cinemassacre.com/2006/09/01/mckids/', + 'md5': '6eb30961fa795fedc750eac4881ad2e1', + 'info_dict': { + 'id': 'FnxsNhuikpo', + 'ext': 'mp4', + 'upload_date': '20060901', + 'uploader': 'Cinemassacre Extras', + 'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53', + 'uploader_id': 'Cinemassacre', + 'title': 'AVGN: McKids', + } } ] @@ -56,7 +71,10 @@ class CinemassacreIE(InfoExtractor): webpage = self._download_webpage(url, display_id) playerdata_url = self._search_regex( - r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', + [ + r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', + r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', + ], webpage, 'player data URL', default=None) if not playerdata_url: playerdata_url = BlipTVIE._extract_url(webpage) From 5c1e6f69c421c244139b1f4c6d2759dadfbc1abd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 21 Apr 2015 15:04:55 +0200 Subject: [PATCH 0410/2721] [senate] Simplify There isn't any problem if the 'formats' field only has one element --- youtube_dl/extractor/senateisvp.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index fa6610261..d3b8a1be4 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -133,15 +133,9 @@ class SenateISVPIE(InfoExtractor): self._sort_formats(formats) - info_dict = { + return { 'id': video_id, 'title': title, + 'formats': formats, 'thumbnail': thumbnail, } - - if len(formats) >= 1: - info_dict.update({'formats': formats}) - else: - info_dict.update(formats[0]) - - return info_dict From ed553379dfd4d564f8335defc1067eeecd536f04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Apr 2015 20:55:05 +0600 Subject: [PATCH 0411/2721] [youtube:ytsearch] Temporary workaround (#5483) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 52909b0da..4ec39c589 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1517,7 +1517,7 @@ class YoutubeSearchIE(SearchInfoExtractor): while (PAGE_SIZE * pagenum) < limit: result_url = self._API_URL % ( compat_urllib_parse.quote_plus(query.encode('utf-8')), - (PAGE_SIZE * pagenum) + 1) + max((PAGE_SIZE * pagenum) + 1), 2) data_json = self._download_webpage( result_url, video_id='query "%s"' % query, note='Downloading page %s' % (pagenum + 1), From eb0f3e7ec080549c1df6a104fc59400efd9a992a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Apr 2015 22:36:41 +0600 Subject: [PATCH 0412/2721] [youtube:user] Extract in terms of `load_more_widget_html` --- youtube_dl/extractor/youtube.py | 49 +++------------------------------ 1 file changed, 4 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4ec39c589..0b4038038 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1358,6 +1358,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): class YoutubeChannelIE(InfoExtractor): IE_DESC = 'YouTube.com channels' _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' + _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos' IE_NAME = 'youtube:channel' _TESTS = [{ 'note': 'paginated channel', @@ -1386,7 +1387,7 @@ class YoutubeChannelIE(InfoExtractor): def _real_extract(self, url): channel_id = self._match_id(url) - url = 'https://www.youtube.com/channel/%s/videos' % channel_id + url = self._TEMPLATE_URL % channel_id channel_page = self._download_webpage(url, channel_id) autogenerated = re.search(r'''(?x) class="[^"]*?(?: @@ -1429,12 +1430,10 @@ class YoutubeChannelIE(InfoExtractor): return self.playlist_result(_entries(), channel_id) -class YoutubeUserIE(InfoExtractor): +class YoutubeUserIE(YoutubeChannelIE): IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)' - _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s' - _GDATA_PAGE_SIZE = 50 - _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' + _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos' IE_NAME = 'youtube:user' _TESTS = [{ @@ -1458,46 +1457,6 @@ class YoutubeUserIE(InfoExtractor): else: return super(YoutubeUserIE, cls).suitable(url) - def _real_extract(self, url): - username = self._match_id(url) - - # Download video ids using YouTube Data API. Result size per - # query is limited (currently to 50 videos) so we need to query - # page by page until there are no video ids - it means we got - # all of them. - - def download_page(pagenum): - start_index = pagenum * self._GDATA_PAGE_SIZE + 1 - - gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index) - page = self._download_webpage( - gdata_url, username, - 'Downloading video ids from %d to %d' % ( - start_index, start_index + self._GDATA_PAGE_SIZE)) - - try: - response = json.loads(page) - except ValueError as err: - raise ExtractorError('Invalid JSON in API response: ' + compat_str(err)) - if 'entry' not in response['feed']: - return - - # Extract video identifiers - entries = response['feed']['entry'] - for entry in entries: - title = entry['title']['$t'] - video_id = entry['id']['$t'].split('/')[-1] - yield { - '_type': 'url', - 'url': video_id, - 'ie_key': 'Youtube', - 'id': video_id, - 'title': title, - } - url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE) - - return self.playlist_result(url_results, playlist_title=username) - class YoutubeSearchIE(SearchInfoExtractor): IE_DESC = 'YouTube.com searches' From 60bf45c80d377a38b00b9ec1426c4cc1d9003742 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Apr 2015 22:37:45 +0600 Subject: [PATCH 0413/2721] [youtube:channel] Specify first page download message --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0b4038038..1469b932f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1388,7 +1388,7 @@ class YoutubeChannelIE(InfoExtractor): channel_id = self._match_id(url) url = self._TEMPLATE_URL % channel_id - channel_page = self._download_webpage(url, channel_id) + channel_page = self._download_webpage(url, channel_id, 'Downloading page #1') autogenerated = re.search(r'''(?x) class="[^"]*?(?: channel-header-autogenerated-label| From 6de5dbafeebd2c54670e4cbf2833f9f10f3c2032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Apr 2015 22:42:21 +0600 Subject: [PATCH 0414/2721] [youtube:channel] Make `extract_videos_from_page` static --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1469b932f..8bf54931c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1369,7 +1369,8 @@ class YoutubeChannelIE(InfoExtractor): } }] - def extract_videos_from_page(self, page): + @staticmethod + def extract_videos_from_page(page): ids_in_page = [] titles_in_page = [] for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page): From cc38fa6cfbdab2ca77ecb1155d64574ab0004bb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 21 Apr 2015 22:55:59 +0600 Subject: [PATCH 0415/2721] [youtube] Remove unused import --- youtube_dl/extractor/youtube.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8bf54931c..b2ae08418 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -28,7 +28,6 @@ from ..utils import ( get_element_by_attribute, get_element_by_id, int_or_none, - OnDemandPagedList, orderedSet, unescapeHTML, unified_strdate, From b4c08069638896e6c565536d52ab8fc9226c91bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 21 Apr 2015 19:30:31 +0200 Subject: [PATCH 0416/2721] [youtube:ytsearch] Use the same system as the search webpage (fixes #5483) The gdata api V2 was deprecated and according to http://youtube-eng.blogspot.com.es/2014/03/committing-to-youtube-data-api-v3_4.html remains available until April 20, 2015. --- youtube_dl/extractor/youtube.py | 51 +++++++++++++++++---------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b2ae08418..f1a5c0077 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1458,54 +1458,55 @@ class YoutubeUserIE(YoutubeChannelIE): return super(YoutubeUserIE, cls).suitable(url) -class YoutubeSearchIE(SearchInfoExtractor): +class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): IE_DESC = 'YouTube.com searches' - _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' - _MAX_RESULTS = 1000 + # there doesn't appear to be a real limit, for example if you search for + # 'python' you get more than 8.000.000 results + _MAX_RESULTS = float('inf') IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' + _EXTRA_QUERY_ARGS = {} def _get_n_results(self, query, n): """Get a specified number of results for a query""" - video_ids = [] - pagenum = 0 + videos = [] limit = n - PAGE_SIZE = 50 - while (PAGE_SIZE * pagenum) < limit: - result_url = self._API_URL % ( - compat_urllib_parse.quote_plus(query.encode('utf-8')), - max((PAGE_SIZE * pagenum) + 1), 2) - data_json = self._download_webpage( + for pagenum in itertools.count(1): + url_query = { + 'search_query': query, + 'page': pagenum, + 'spf': 'navigate', + } + url_query.update(self._EXTRA_QUERY_ARGS) + result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query) + data = self._download_json( result_url, video_id='query "%s"' % query, - note='Downloading page %s' % (pagenum + 1), + note='Downloading page %s' % pagenum, errnote='Unable to download API page') - data = json.loads(data_json) - api_response = data['data'] + html_content = data[1]['body']['content'] - if 'items' not in api_response: + if 'class="search-message' in html_content: raise ExtractorError( '[youtube] No video results', expected=True) - new_ids = list(video['id'] for video in api_response['items']) - video_ids += new_ids + new_videos = self._ids_to_results(orderedSet(re.findall( + r'href="/watch\?v=(.{11})', html_content))) + videos += new_videos + if not new_videos or len(videos) > limit: + break - limit = min(n, api_response['totalItems']) - pagenum += 1 - - if len(video_ids) > n: - video_ids = video_ids[:n] - videos = [self.url_result(video_id, 'Youtube', video_id=video_id) - for video_id in video_ids] + if len(videos) > n: + videos = videos[:n] return self.playlist_result(videos, query) class YoutubeSearchDateIE(YoutubeSearchIE): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' - _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' _SEARCH_KEY = 'ytsearchdate' IE_DESC = 'YouTube.com searches, newest videos first' + _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'} class YoutubeSearchURLIE(InfoExtractor): From 8be2bdfabd7a24936697f6681bd417de414a554a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 22 Apr 2015 15:05:35 +0800 Subject: [PATCH 0417/2721] [YoutubeDL] Remove the redundant assignment to old_filename Caused by commmit 592e97e8550389e22b716eb33c30584aa3a8d656 --- youtube_dl/YoutubeDL.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5b2c3aa38..0fdcf1b0b 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1516,7 +1516,6 @@ class YoutubeDL(object): pps_chain.extend(ie_info['__postprocessors']) pps_chain.extend(self._pps) for pp in pps_chain: - old_filename = info['filepath'] try: files_to_delete, info = pp.run(info) except PostProcessingError as e: From 9dd8e46a2d0860421b4bb4f616f05e5ebd686380 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Apr 2015 20:28:33 +0600 Subject: [PATCH 0418/2721] [youtube:search] Cancel out _TESTS --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f1a5c0077..07c0f6ef9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1466,6 +1466,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' _EXTRA_QUERY_ARGS = {} + _TESTS = [] def _get_n_results(self, query, n): """Get a specified number of results for a query""" From bc94bd510bfb8a98f7456912f930c69c21bd6e15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Apr 2015 21:01:25 +0600 Subject: [PATCH 0419/2721] [hitbox] Extract all formats (Closes #5494) --- youtube_dl/extractor/hitbox.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py index d606429ca..d7cd8bb7e 100644 --- a/youtube_dl/extractor/hitbox.py +++ b/youtube_dl/extractor/hitbox.py @@ -96,13 +96,33 @@ class HitboxIE(InfoExtractor): 'https://www.hitbox.tv/api/player/config/video/%s' % video_id, video_id) - clip = player_config.get('clip') - video_url = clip.get('url') - res = clip.get('bitrates', [])[0].get('label') + formats = [] + for video in player_config['clip']['bitrates']: + label = video.get('label') + if label == 'Auto': + continue + video_url = video.get('url') + if not video_url: + continue + bitrate = int_or_none(video.get('bitrate')) + if determine_ext(video_url) == 'm3u8': + if not video_url.startswith('http'): + continue + formats.append({ + 'url': video_url, + 'ext': 'mp4', + 'tbr': bitrate, + 'format_note': label, + 'protocol': 'm3u8_native', + }) + else: + formats.append({ + 'url': video_url, + 'tbr': bitrate, + 'format_note': label, + }) - metadata['resolution'] = res - metadata['url'] = video_url - metadata['protocol'] = 'm3u8' + metadata['formats'] = formats return metadata From 29492f33329c6d6fd4be7f2da7dcaff14bc531ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Apr 2015 21:01:52 +0600 Subject: [PATCH 0420/2721] [hitbox] Sort formats --- youtube_dl/extractor/hitbox.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py index d7cd8bb7e..6701fa48d 100644 --- a/youtube_dl/extractor/hitbox.py +++ b/youtube_dl/extractor/hitbox.py @@ -121,6 +121,7 @@ class HitboxIE(InfoExtractor): 'tbr': bitrate, 'format_note': label, }) + self._sort_formats(formats) metadata['formats'] = formats From 008bee0f50ea27e4fde4ba4d0cdb9043783299c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Apr 2015 21:03:56 +0600 Subject: [PATCH 0421/2721] [hitbox] Extract formats before metadata --- youtube_dl/extractor/hitbox.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py index 6701fa48d..c51bc7ca4 100644 --- a/youtube_dl/extractor/hitbox.py +++ b/youtube_dl/extractor/hitbox.py @@ -88,10 +88,6 @@ class HitboxIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - metadata = self._extract_metadata( - 'https://www.hitbox.tv/api/media/video', - video_id) - player_config = self._download_json( 'https://www.hitbox.tv/api/player/config/video/%s' % video_id, video_id) @@ -123,6 +119,9 @@ class HitboxIE(InfoExtractor): }) self._sort_formats(formats) + metadata = self._extract_metadata( + 'https://www.hitbox.tv/api/media/video', + video_id) metadata['formats'] = formats return metadata From 14f41bc2fb01266243e763ddc80840b895acd294 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Apr 2015 21:05:08 +0600 Subject: [PATCH 0422/2721] [hitbox:live] Extract formats before metadata --- youtube_dl/extractor/hitbox.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py index c51bc7ca4..2c440898e 100644 --- a/youtube_dl/extractor/hitbox.py +++ b/youtube_dl/extractor/hitbox.py @@ -150,10 +150,6 @@ class HitboxLiveIE(HitboxIE): def _real_extract(self, url): video_id = self._match_id(url) - metadata = self._extract_metadata( - 'https://www.hitbox.tv/api/media/live', - video_id) - player_config = self._download_json( 'https://www.hitbox.tv/api/player/config/live/%s' % video_id, video_id) @@ -194,9 +190,13 @@ class HitboxLiveIE(HitboxIE): 'page_url': url, 'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf', }) - self._sort_formats(formats) + + metadata = self._extract_metadata( + 'https://www.hitbox.tv/api/media/live', + video_id) metadata['formats'] = formats metadata['is_live'] = True metadata['title'] = self._live_title(metadata.get('title')) + return metadata From 33b066bda0491bca54cc09d7d117867f885aa5f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 22 Apr 2015 21:09:21 +0600 Subject: [PATCH 0423/2721] [hitbox] Clarify download messages --- youtube_dl/extractor/hitbox.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py index 2c440898e..421f55bbe 100644 --- a/youtube_dl/extractor/hitbox.py +++ b/youtube_dl/extractor/hitbox.py @@ -43,7 +43,8 @@ class HitboxIE(InfoExtractor): def _extract_metadata(self, url, video_id): thumb_base = 'https://edge.sf.hitbox.tv' metadata = self._download_json( - '%s/%s' % (url, video_id), video_id) + '%s/%s' % (url, video_id), video_id, + 'Downloading metadata JSON') date = 'media_live_since' media_type = 'livestream' @@ -90,7 +91,7 @@ class HitboxIE(InfoExtractor): player_config = self._download_json( 'https://www.hitbox.tv/api/player/config/video/%s' % video_id, - video_id) + video_id, 'Downloading video JSON') formats = [] for video in player_config['clip']['bitrates']: From f8e51f60b302357b43ef15cf479da3ce09643f9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 22 Apr 2015 19:24:14 +0200 Subject: [PATCH 0424/2721] [flickr] Fix extraction (fixes #5501) --- youtube_dl/extractor/flickr.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index 0c858b654..adffe4857 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urllib_request from ..utils import ( ExtractorError, unescapeHTML, @@ -29,9 +30,14 @@ class FlickrIE(InfoExtractor): video_id = mobj.group('id') video_uploader_id = mobj.group('uploader_id') webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id - webpage = self._download_webpage(webpage_url, video_id) + req = compat_urllib_request.Request(webpage_url) + req.add_header( + 'User-Agent', + # it needs a more recent version + 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20150101 Firefox/38.0 (Chrome)') + webpage = self._download_webpage(req, video_id) - secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, 'secret') + secret = self._search_regex(r'secret"\s*:\s*"(\w+)"', webpage, 'secret') first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self' first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage') From c04c3e334cc7ff0bbd2cbb8167f5cd2794c29d29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 22 Apr 2015 19:58:39 +0200 Subject: [PATCH 0425/2721] [flickr] Don't use regex for extracting the info from the xml files --- youtube_dl/extractor/flickr.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py index adffe4857..2fe76d661 100644 --- a/youtube_dl/extractor/flickr.py +++ b/youtube_dl/extractor/flickr.py @@ -6,7 +6,7 @@ from .common import InfoExtractor from ..compat import compat_urllib_request from ..utils import ( ExtractorError, - unescapeHTML, + find_xpath_attr, ) @@ -40,20 +40,21 @@ class FlickrIE(InfoExtractor): secret = self._search_regex(r'secret"\s*:\s*"(\w+)"', webpage, 'secret') first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self' - first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage') + first_xml = self._download_xml(first_url, video_id, 'Downloading first data webpage') - node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>', - first_xml, 'node_id') + node_id = find_xpath_attr( + first_xml, './/{http://video.yahoo.com/YEP/1.0/}Item', 'id', + 'id').text second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1' - second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage') + second_xml = self._download_xml(second_url, video_id, 'Downloading second data webpage') self.report_extraction(video_id) - mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml) - if mobj is None: + stream = second_xml.find('.//STREAM') + if stream is None: raise ExtractorError('Unable to extract video url') - video_url = mobj.group(1) + unescapeHTML(mobj.group(2)) + video_url = stream.attrib['APP'] + stream.attrib['FULLPATH'] return { 'id': video_id, From 7513f298b04eb11ab6c0bd0845b7a3b221bee317 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Wed, 22 Apr 2015 23:50:11 +0200 Subject: [PATCH 0426/2721] [vimeo] Fix login token (fixes #5082) --- youtube_dl/extractor/vimeo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 28bcc89cd..55c1574bb 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -38,7 +38,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): self.report_login() login_url = 'https://vimeo.com/log_in' webpage = self._download_webpage(login_url, None, False) - token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token') + token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token') data = urlencode_postdata({ 'email': username, 'password': password, @@ -177,7 +177,7 @@ class VimeoIE(VimeoBaseInfoExtractor): password = self._downloader.params.get('videopassword', None) if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) - token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token') + token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token') data = urlencode_postdata({ 'password': password, 'token': token, From 14a2d6789f24ce6f6565747a48ffe7728d5a2251 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Wed, 22 Apr 2015 23:55:19 +0200 Subject: [PATCH 0427/2721] [vimeo] one token overlooked --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 55c1574bb..9027f9dd6 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -439,7 +439,7 @@ class VimeoChannelIE(InfoExtractor): name="([^"]+)"\s+ value="([^"]*)" ''', login_form)) - token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token') + token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token') fields['token'] = token fields['password'] = password post = urlencode_postdata(fields) From 8c8826176d4562dce9558d121a955ed71509315a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 23 Apr 2015 13:49:06 +0800 Subject: [PATCH 0428/2721] [xattr] Add version detection for python-pyxattr For more information, see #5498 and changes to convertObj() in iustin/pyxattr@cc84e466f63906d32ec1bf4a4fcae6a7bce9a4c8 --- youtube_dl/postprocessor/xattrpp.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py index 0cba99fc3..b74adff43 100644 --- a/youtube_dl/postprocessor/xattrpp.py +++ b/youtube_dl/postprocessor/xattrpp.py @@ -11,6 +11,7 @@ from ..compat import ( from ..utils import ( check_executable, hyphenate_date, + version_tuple, ) @@ -36,6 +37,19 @@ class XAttrMetadataPP(PostProcessor): # try the pyxattr module... import xattr + # Unicode arguments are not supported in python-pyxattr until + # version 0.5.0 + # See https://github.com/rg3/youtube-dl/issues/5498 + pyxattr_required_version = '0.5.0' + if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): + self._downloader.report_warning( + 'python-pyxattr is detected but is too old. ' + 'yourube-dl requires %s or above while your version is %s. ' + 'Falling back to other xattr implementations' % ( + pyxattr_required_version, xattr.__version__)) + + raise ImportError + def write_xattr(path, key, value): return xattr.setxattr(path, key, value) From 3eec9fef302929438c321adfc8ec16adcc6ffb6d Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Thu, 23 Apr 2015 11:41:21 +0200 Subject: [PATCH 0429/2721] [realvid] Add extractor for realvid.net (closes #5504) --- youtube_dl/extractor/gorillavid.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index ae24aff84..ed2623456 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -15,10 +15,10 @@ from ..utils import ( class GorillaVidIE(InfoExtractor): - IE_DESC = 'GorillaVid.in, daclips.in, movpod.in and fastvideo.in' + IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net' _VALID_URL = r'''(?x) https?://(?P<host>(?:www\.)? - (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in))/ + (?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net))/ (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? ''' @@ -61,6 +61,15 @@ class GorillaVidIE(InfoExtractor): 'title': 'Man of Steel - Trailer', 'thumbnail': 're:http://.*\.jpg', }, + }, { + 'url': 'http://realvid.net/ctn2y6p2eviw', + 'md5': 'b2166d2cf192efd6b6d764c18fd3710e', + 'info_dict': { + 'id': 'ctn2y6p2eviw', + 'ext': 'flv', + 'title': 'rdx 1955', + 'thumbnail': 're:http://.*\.jpg', + }, }, { 'url': 'http://movpod.in/0wguyyxi1yca', 'only_matching': True, @@ -97,7 +106,7 @@ class GorillaVidIE(InfoExtractor): webpage = self._download_webpage(req, video_id, 'Downloading video page') title = self._search_regex( - r'style="z-index: [0-9]+;">([^<]+)</span>', + [r'style="z-index: [0-9]+;">([^<]+)</span>', r'>Watch (.+) '], webpage, 'title', default=None) or self._og_search_title(webpage) video_url = self._search_regex( r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url') From f8610ba1ca4705dcae5c45c5ac66a99b174ebed3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 23 Apr 2015 17:57:49 +0800 Subject: [PATCH 0430/2721] [ustream] Fix extraction (closes #3998) --- youtube_dl/extractor/ustream.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 68d03b999..d559e228a 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import json import re from .common import InfoExtractor @@ -39,7 +40,15 @@ class UstreamIE(InfoExtractor): desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id return self.url_result(desktop_url, 'Ustream') - video_url = 'http://tcdn.ustream.tv/video/%s' % video_id + params = self._download_json( + 'http://cdngw.ustream.tv/rgwjson/Viewer.getVideo/' + json.dumps({ + 'brandId': 1, + 'videoId': int(video_id), + 'autoplay': False, + }), video_id) + + video_url = params['flv'] + webpage = self._download_webpage(url, video_id) self.report_extraction(video_id) From 762155cc90754fc29c68bf0f0256988f37862c3b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 23 Apr 2015 18:07:09 +0800 Subject: [PATCH 0431/2721] [ustream] Checking errors --- youtube_dl/extractor/ustream.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index d559e228a..f8893b6cd 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..compat import ( compat_urlparse, ) +from ..utils import ExtractorError class UstreamIE(InfoExtractor): @@ -47,6 +48,9 @@ class UstreamIE(InfoExtractor): 'autoplay': False, }), video_id) + if 'error' in params: + raise ExtractorError(params['error']['message'], expected=True) + video_url = params['flv'] webpage = self._download_webpage(url, video_id) From 2a8137272d7d463648ccf53d25d78660209ad928 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 23 Apr 2015 18:24:44 +0800 Subject: [PATCH 0432/2721] [ustream] Add an alternative approach to extract title (fixes #5128) --- youtube_dl/extractor/ustream.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index f8893b6cd..a395dc5da 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -58,7 +58,16 @@ class UstreamIE(InfoExtractor): self.report_extraction(video_id) video_title = self._html_search_regex(r'data-title="(?P<title>.+)"', - webpage, 'title') + webpage, 'title', default=None) + + if not video_title: + try: + video_title = params['moduleConfig']['meta']['title'] + except KeyError: + pass + + if not video_title: + video_title = 'Ustream video ' + video_id uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>', webpage, 'uploader', fatal=False, flags=re.DOTALL) From cd9fdccde04799910fc3ec36fb1e8baa26a5b3e0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 23 Apr 2015 18:33:25 +0800 Subject: [PATCH 0433/2721] [ustream] Try to extract uploader from JSON data (#5128) --- youtube_dl/extractor/ustream.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index a395dc5da..c39c278ab 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -13,7 +13,7 @@ from ..utils import ExtractorError class UstreamIE(InfoExtractor): _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<videoID>\d+)' IE_NAME = 'ustream' - _TEST = { + _TESTS = [{ 'url': 'http://www.ustream.tv/recorded/20274954', 'md5': '088f151799e8f572f84eb62f17d73e5c', 'info_dict': { @@ -22,7 +22,18 @@ class UstreamIE(InfoExtractor): 'uploader': 'Young Americans for Liberty', 'title': 'Young Americans for Liberty February 7, 2012 2:28 AM', }, - } + }, { + # From http://sportscanada.tv/canadagames/index.php/week2/figure-skating/444 + # Title and uploader available only from params JSON + 'url': 'http://www.ustream.tv/embed/recorded/59307601?ub=ff0000&lc=ff0000&oc=ffffff&uc=ffffff&v=3&wmode=direct', + 'md5': '5a2abf40babeac9812ed20ae12d34e10', + 'info_dict': { + 'id': '59307601', + 'ext': 'flv', + 'title': '-CG11- Canada Games Figure Skating', + 'uploader': 'sportscanadatv', + } + }] def _real_extract(self, url): m = re.match(self._VALID_URL, url) @@ -70,7 +81,13 @@ class UstreamIE(InfoExtractor): video_title = 'Ustream video ' + video_id uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>', - webpage, 'uploader', fatal=False, flags=re.DOTALL) + webpage, 'uploader', fatal=False, flags=re.DOTALL, default=None) + + if not uploader: + try: + uploader = params['moduleConfig']['meta']['userName'] + except KeyError: + uploader = None thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"', webpage, 'thumbnail', fatal=False) From b46ed49996669a5e602042ae4d357f2ad952af58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Apr 2015 21:44:51 +0600 Subject: [PATCH 0434/2721] [cracked] Fix extraction --- youtube_dl/extractor/cracked.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py index cf763ee7e..e579863df 100644 --- a/youtube_dl/extractor/cracked.py +++ b/youtube_dl/extractor/cracked.py @@ -33,17 +33,25 @@ class CrackedIE(InfoExtractor): video_url = self._html_search_regex( [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL') - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) + title = self._search_regex( + [r'property="?og:title"?\s+content="([^"]+)"', r'class="?title"?>([^<]+)'], + webpage, 'title') - timestamp = self._html_search_regex(r'<time datetime="([^"]+)"', webpage, 'upload date', fatal=False) + description = self._search_regex( + r'name="?(?:og:)?description"?\s+content="([^"]+)"', + webpage, 'description', default=None) + + timestamp = self._html_search_regex( + r'"date"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False) if timestamp: timestamp = parse_iso8601(timestamp[:-6]) view_count = str_to_int(self._html_search_regex( - r'<span class="views" id="viewCounts">([\d,\.]+) Views</span>', webpage, 'view count', fatal=False)) + r'<span\s+class="?views"? id="?viewCounts"?>([\d,\.]+) Views</span>', + webpage, 'view count', fatal=False)) comment_count = str_to_int(self._html_search_regex( - r'<span id="commentCounts">([\d,\.]+)</span>', webpage, 'comment count', fatal=False)) + r'<span\s+id="?commentCounts"?>([\d,\.]+)</span>', + webpage, 'comment count', fatal=False)) m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url) if m: From 6447353f52dc4b2b5926d3d9a3a28d3a7a242b4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Apr 2015 21:49:54 +0600 Subject: [PATCH 0435/2721] [cracked] Add support for youtube embeds --- youtube_dl/extractor/cracked.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py index e579863df..8b381dd3a 100644 --- a/youtube_dl/extractor/cracked.py +++ b/youtube_dl/extractor/cracked.py @@ -30,8 +30,15 @@ class CrackedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + youtube_url = self._search_regex( + r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"', + webpage, 'youtube url', default=None) + if youtube_url: + return self.url_result(youtube_url) + video_url = self._html_search_regex( - [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL') + [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], + webpage, 'video URL') title = self._search_regex( [r'property="?og:title"?\s+content="([^"]+)"', r'class="?title"?>([^<]+)'], From c610f38ba9586f21f632e115cbc15e172d60ee40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Apr 2015 21:58:50 +0600 Subject: [PATCH 0436/2721] [cracked] Update tests --- youtube_dl/extractor/cracked.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py index 8b381dd3a..b2b151558 100644 --- a/youtube_dl/extractor/cracked.py +++ b/youtube_dl/extractor/cracked.py @@ -11,18 +11,30 @@ from ..utils import ( class CrackedIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html' - _TEST = { - 'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html', - 'md5': '4b29a5eeec292cd5eca6388c7558db9e', + _TESTS = [{ + 'url': 'http://www.cracked.com/video_19070_if-animal-actors-got-e21-true-hollywood-stories.html', + 'md5': '89b90b9824e3806ca95072c4d78f13f7', 'info_dict': { - 'id': '19006', + 'id': '19070', 'ext': 'mp4', - 'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies', - 'description': 'md5:3b909e752661db86007d10e5ec2df769', - 'timestamp': 1405659600, - 'upload_date': '20140718', + 'title': 'If Animal Actors Got E! True Hollywood Stories', + 'timestamp': 1404954000, + 'upload_date': '20140710', } - } + }, { + # youtube embed + 'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html', + 'md5': 'ccd52866b50bde63a6ef3b35016ba8c7', + 'info_dict': { + 'id': 'EjI00A3rZD0', + 'ext': 'mp4', + 'title': "4 Plot Holes You Didn't Notice in Your Favorite Movies - The Spit Take", + 'description': 'md5:c603708c718b796fe6079e2b3351ffc7', + 'upload_date': '20140725', + 'uploader_id': 'Cracked', + 'uploader': 'Cracked', + } + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -34,7 +46,7 @@ class CrackedIE(InfoExtractor): r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"', webpage, 'youtube url', default=None) if youtube_url: - return self.url_result(youtube_url) + return self.url_result(youtube_url, 'Youtube') video_url = self._html_search_regex( [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], From d7403332248a3893810f0461a682229552e1fd9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Apr 2015 21:59:18 +0600 Subject: [PATCH 0437/2721] [cracked] Modernize --- youtube_dl/extractor/cracked.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py index b2b151558..94d03ce2a 100644 --- a/youtube_dl/extractor/cracked.py +++ b/youtube_dl/extractor/cracked.py @@ -37,8 +37,7 @@ class CrackedIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) From 4515cb43cadd583de40b38021202595248080fb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 23 Apr 2015 22:11:09 +0600 Subject: [PATCH 0438/2721] [xattrpp] Fix typo --- youtube_dl/postprocessor/xattrpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py index b74adff43..93d0abcf6 100644 --- a/youtube_dl/postprocessor/xattrpp.py +++ b/youtube_dl/postprocessor/xattrpp.py @@ -44,7 +44,7 @@ class XAttrMetadataPP(PostProcessor): if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): self._downloader.report_warning( 'python-pyxattr is detected but is too old. ' - 'yourube-dl requires %s or above while your version is %s. ' + 'youtube-dl requires %s or above while your version is %s. ' 'Falling back to other xattr implementations' % ( pyxattr_required_version, xattr.__version__)) From 02f502f435f01d82b71d52e8b5fdb9369d76873d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Apr 2015 01:34:57 +0600 Subject: [PATCH 0439/2721] [README] Document on how to enable old format selection behavior (#5510, #5511) --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7dbe66995..3ea69b637 100644 --- a/README.md +++ b/README.md @@ -270,7 +270,9 @@ The simplest case is requesting a specific format, for example `-f 22`. You can If you are want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`. -youtube-dl uses `-f bestvideo+bestaudio/best` if ffmpeg or avconv are installed (`best` is needed for videos that don't come from YouTube because they don't provide the audio and video in two different files). If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. +Since the end of April 2015 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. + +If you want to preserve the old format selection behavior (pre-April 2015), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](https://github.com/rg3/youtube-dl/blob/master/README.md#configuration) in order not to type it every time you run youtube-dl. # VIDEO SELECTION From 7fb993e1f4105d573a5e42f71a2b3c51c0794a9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Apr 2015 01:38:02 +0600 Subject: [PATCH 0440/2721] [README] Fix configuration file link and typo --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3ea69b637..2b3bdde22 100644 --- a/README.md +++ b/README.md @@ -268,11 +268,11 @@ youtube-dl_test_video_.mp4 # A simple file name By default youtube-dl tries to download the best quality, but sometimes you may want to download other format. The simplest case is requesting a specific format, for example `-f 22`. You can get the list of available formats using `--list-formats`, you can also use a file extension (currently it supports aac, m4a, mp3, mp4, ogg, wav, webm) or the special names `best`, `bestvideo`, `bestaudio` and `worst`. -If you are want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`. +If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`. Since the end of April 2015 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. -If you want to preserve the old format selection behavior (pre-April 2015), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](https://github.com/rg3/youtube-dl/blob/master/README.md#configuration) in order not to type it every time you run youtube-dl. +If you want to preserve the old format selection behavior (pre-April 2015), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](https://github.com/rg3/youtube-dl#configuration) in order not to type it every time you run youtube-dl. # VIDEO SELECTION From 4d6a3ff411d494e2f3168bca0331317718b5d9a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Apr 2015 01:41:36 +0600 Subject: [PATCH 0441/2721] [README] Finally fix configuration file link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2b3bdde22..7040be1ec 100644 --- a/README.md +++ b/README.md @@ -272,7 +272,7 @@ If you want to download multiple videos and they don't have the same formats ava Since the end of April 2015 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. -If you want to preserve the old format selection behavior (pre-April 2015), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](https://github.com/rg3/youtube-dl#configuration) in order not to type it every time you run youtube-dl. +If you want to preserve the old format selection behavior (pre-April 2015), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl. # VIDEO SELECTION From a9b0d4e1f4936d93f277f098495eb4d5b770056f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 24 Apr 2015 14:09:35 +0800 Subject: [PATCH 0442/2721] [Crunchyroll] Fix extraction on Python 2.6 XPath with recursive children selection not supported --- youtube_dl/extractor/crunchyroll.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 6ded723c9..1c77df47e 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -263,8 +263,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text streamdata = self._download_xml( streamdata_req, video_id, note='Downloading media info for %s' % video_format) - video_url = streamdata.find('.//host').text - video_play_path = streamdata.find('.//file').text + video_url = streamdata.find('./host').text + video_play_path = streamdata.find('./file').text formats.append({ 'url': video_url, 'play_path': video_play_path, From ddbed36455f8b3053f38d84b2e62e2fb5cd66eac Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Fri, 24 Apr 2015 08:48:49 +0200 Subject: [PATCH 0443/2721] [embedthumbnail] Add support for mp3 cover embedding --- youtube_dl/__init__.py | 4 +- youtube_dl/postprocessor/__init__.py | 4 +- youtube_dl/postprocessor/atomicparsley.py | 62 --------------- youtube_dl/postprocessor/embedthumbnail.py | 87 ++++++++++++++++++++++ youtube_dl/postprocessor/ffmpeg.py | 2 + 5 files changed, 92 insertions(+), 67 deletions(-) delete mode 100644 youtube_dl/postprocessor/atomicparsley.py create mode 100644 youtube_dl/postprocessor/embedthumbnail.py diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1c8b411b7..d7759db68 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -240,9 +240,7 @@ def _real_main(argv=None): if opts.xattrs: postprocessors.append({'key': 'XAttrMetadata'}) if opts.embedthumbnail: - if not opts.addmetadata: - postprocessors.append({'key': 'FFmpegAudioFix'}) - postprocessors.append({'key': 'AtomicParsley'}) + postprocessors.append({'key': 'EmbedThumbnail'}) # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. # So if the user is able to remove the file before your postprocessor runs it might cause a few problems. if opts.exec_cmd: diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py index f39acadce..ab7f1a29a 100644 --- a/youtube_dl/postprocessor/__init__.py +++ b/youtube_dl/postprocessor/__init__.py @@ -1,6 +1,6 @@ from __future__ import unicode_literals -from .atomicparsley import AtomicParsleyPP +from .embedthumbnail import EmbedThumbnailPP from .ffmpeg import ( FFmpegPostProcessor, FFmpegAudioFixPP, @@ -23,7 +23,7 @@ def get_postprocessor(key): __all__ = [ - 'AtomicParsleyPP', + 'EmbedThumbnailPP', 'ExecAfterDownloadPP', 'FFmpegAudioFixPP', 'FFmpegEmbedSubtitlePP', diff --git a/youtube_dl/postprocessor/atomicparsley.py b/youtube_dl/postprocessor/atomicparsley.py deleted file mode 100644 index e4e198695..000000000 --- a/youtube_dl/postprocessor/atomicparsley.py +++ /dev/null @@ -1,62 +0,0 @@ -# -*- coding: utf-8 -*- -from __future__ import unicode_literals - - -import os -import subprocess - -from .common import PostProcessor -from ..compat import ( - compat_urlretrieve, -) -from ..utils import ( - check_executable, - encodeFilename, - PostProcessingError, - prepend_extension, - shell_quote -) - - -class AtomicParsleyPPError(PostProcessingError): - pass - - -class AtomicParsleyPP(PostProcessor): - def run(self, info): - if not check_executable('AtomicParsley', ['-v']): - raise AtomicParsleyPPError('AtomicParsley was not found. Please install.') - - filename = info['filepath'] - temp_filename = prepend_extension(filename, 'temp') - temp_thumbnail = prepend_extension(filename, 'thumb') - - if not info.get('thumbnail'): - raise AtomicParsleyPPError('Thumbnail was not found. Nothing to do.') - - compat_urlretrieve(info['thumbnail'], temp_thumbnail) - - cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename] - - self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename) - - if self._downloader.params.get('verbose', False): - self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd)) - - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = p.communicate() - - if p.returncode != 0: - msg = stderr.decode('utf-8', 'replace').strip() - raise AtomicParsleyPPError(msg) - - os.remove(encodeFilename(temp_thumbnail)) - # for formats that don't support thumbnails (like 3gp) AtomicParsley - # won't create to the temporary file - if b'No changes' in stdout: - self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail') - else: - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - - return [], info diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py new file mode 100644 index 000000000..b6507db27 --- /dev/null +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + + +import os +import subprocess + +from .common import PostProcessor +from ..compat import ( + compat_urlretrieve, +) +from ..utils import ( + check_executable, + encodeFilename, + PostProcessingError, + prepend_extension, + shell_quote +) + + +class EmbedThumbnailPPError(PostProcessingError): + pass + + +class EmbedThumbnailPP(PostProcessor): + def run(self, info): + filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') + temp_thumbnail = prepend_extension(filename, 'thumb') + + if not info.get('thumbnail'): + raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.') + + compat_urlretrieve(info['thumbnail'], temp_thumbnail) + + if info['ext'] == 'mp3': + if not check_executable('ffmpeg', ['-version']): + raise AtomicParsleyPPError('FFmpeg was not found. Please install.') + + cmd = ['ffmpeg', '-i', filename, '-i', temp_thumbnail, '-c', 'copy', '-map', '0', '-map', '1', '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"', temp_filename] + + self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename) + + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] FFmpeg command line: %s' % shell_quote(cmd)) + + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate() + + if p.returncode != 0: + msg = stderr.decode('utf-8', 'replace').strip() + raise EmbedThumbnailPPError(msg) + + os.remove(encodeFilename(temp_thumbnail)) + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + + elif info['ext'] == 'm4a': + if not check_executable('AtomicParsley', ['-v']): + raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.') + + cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename] + + self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename) + + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd)) + + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate() + + if p.returncode != 0: + msg = stderr.decode('utf-8', 'replace').strip() + raise EmbedThumbnailPPError(msg) + + os.remove(encodeFilename(temp_thumbnail)) + # for formats that don't support thumbnails (like 3gp) AtomicParsley + # won't create to the temporary file + if b'No changes' in stdout: + self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail') + else: + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + else: + raise EmbedThumbnailPPError('Only mp3 and m4a are supported for thumbnail embedding for now.') + + return [], info diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index df6fb6665..c01f2eedd 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -280,6 +280,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): errnote='Cannot update utime of audio file') information['filepath'] = new_path + information['ext'] = extension + return [path], information From 31fd9c76013faaa86b0c515d9305de548856ef84 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Fri, 24 Apr 2015 09:08:57 +0200 Subject: [PATCH 0444/2721] [embedthumbnail] use FFmpegPostProcessor for mp3 --- youtube_dl/postprocessor/embedthumbnail.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index b6507db27..a2d6b14db 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -5,7 +5,8 @@ from __future__ import unicode_literals import os import subprocess -from .common import PostProcessor +from .ffmpeg import FFmpegPostProcessor + from ..compat import ( compat_urlretrieve, ) @@ -22,7 +23,7 @@ class EmbedThumbnailPPError(PostProcessingError): pass -class EmbedThumbnailPP(PostProcessor): +class EmbedThumbnailPP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') @@ -34,22 +35,12 @@ class EmbedThumbnailPP(PostProcessor): compat_urlretrieve(info['thumbnail'], temp_thumbnail) if info['ext'] == 'mp3': - if not check_executable('ffmpeg', ['-version']): - raise AtomicParsleyPPError('FFmpeg was not found. Please install.') - - cmd = ['ffmpeg', '-i', filename, '-i', temp_thumbnail, '-c', 'copy', '-map', '0', '-map', '1', '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"', temp_filename] + options = ['-i', temp_thumbnail, '-c', 'copy', '-map', '0', '-map', '1', + '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"'] self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename) - if self._downloader.params.get('verbose', False): - self._downloader.to_screen('[debug] FFmpeg command line: %s' % shell_quote(cmd)) - - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = p.communicate() - - if p.returncode != 0: - msg = stderr.decode('utf-8', 'replace').strip() - raise EmbedThumbnailPPError(msg) + self.run_ffmpeg(filename, temp_filename, options) os.remove(encodeFilename(temp_thumbnail)) os.remove(encodeFilename(filename)) From c0ea8ebb9bc628e3787e739dbf58bd0e59c83f87 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Fri, 24 Apr 2015 09:11:39 +0200 Subject: [PATCH 0445/2721] [ffmpeg] Remove unneeded class --- youtube_dl/postprocessor/ffmpeg.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index c01f2eedd..7a952963e 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -590,21 +590,6 @@ class FFmpegMergerPP(FFmpegPostProcessor): return info['__files_to_merge'], info -class FFmpegAudioFixPP(FFmpegPostProcessor): - def run(self, info): - filename = info['filepath'] - temp_filename = prepend_extension(filename, 'temp') - - options = ['-vn', '-acodec', 'copy'] - self._downloader.to_screen('[ffmpeg] Fixing audio file "%s"' % filename) - self.run_ffmpeg(filename, temp_filename, options) - - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - - return [], info - - class FFmpegFixupStretchedPP(FFmpegPostProcessor): def run(self, info): stretched_ratio = info.get('stretched_ratio') From 10fb7710e85a240cc4bd065b98fd3f5a0c3d10ca Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Fri, 24 Apr 2015 09:17:46 +0200 Subject: [PATCH 0446/2721] Forgot to clean the remains of class --- youtube_dl/postprocessor/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py index ab7f1a29a..0d8ef6ca2 100644 --- a/youtube_dl/postprocessor/__init__.py +++ b/youtube_dl/postprocessor/__init__.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals from .embedthumbnail import EmbedThumbnailPP from .ffmpeg import ( FFmpegPostProcessor, - FFmpegAudioFixPP, FFmpegEmbedSubtitlePP, FFmpegExtractAudioPP, FFmpegFixupStretchedPP, @@ -25,7 +24,6 @@ def get_postprocessor(key): __all__ = [ 'EmbedThumbnailPP', 'ExecAfterDownloadPP', - 'FFmpegAudioFixPP', 'FFmpegEmbedSubtitlePP', 'FFmpegExtractAudioPP', 'FFmpegFixupM4aPP', From 25f7d1beba61ad9a5d013d423f169eafb05d76a5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 24 Apr 2015 22:25:34 +0800 Subject: [PATCH 0447/2721] [gdcvault] Extend _VALID_URL (fixes #5236) --- youtube_dl/extractor/gdcvault.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 51796f3a4..f634e6285 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -11,7 +11,7 @@ from ..utils import remove_end class GDCVaultIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)' + _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)?' _NETRC_MACHINE = 'gdcvault' _TESTS = [ { @@ -43,6 +43,10 @@ class GDCVaultIE(InfoExtractor): 'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment', }, 'skip': 'Requires login', + }, + { + 'url': 'http://gdcvault.com/play/1020791/', + 'only_matching': True, } ] From c8ff645766aa56742045adcf0c64b92617334eb5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 24 Apr 2015 22:43:33 +0800 Subject: [PATCH 0448/2721] [gdcvault] Add display_id --- youtube_dl/extractor/gdcvault.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index f634e6285..43f916412 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -19,6 +19,7 @@ class GDCVaultIE(InfoExtractor): 'md5': '7ce8388f544c88b7ac11c7ab1b593704', 'info_dict': { 'id': '1019721', + 'display_id': 'Doki-Doki-Universe-Sweet-Simple', 'ext': 'mp4', 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' } @@ -27,6 +28,7 @@ class GDCVaultIE(InfoExtractor): 'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', 'info_dict': { 'id': '1015683', + 'display_id': 'Embracing-the-Dark-Art-of', 'ext': 'flv', 'title': 'Embracing the Dark Art of Mathematical Modeling in AI' }, @@ -39,6 +41,7 @@ class GDCVaultIE(InfoExtractor): 'md5': 'a5eb77996ef82118afbbe8e48731b98e', 'info_dict': { 'id': '1015301', + 'display_id': 'Thexder-Meets-Windows-95-or', 'ext': 'flv', 'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment', }, @@ -94,7 +97,7 @@ class GDCVaultIE(InfoExtractor): }) return video_formats - def _login(self, webpage_url, video_id): + def _login(self, webpage_url, display_id): (username, password) = self._get_login_info() if username is None or password is None: self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.') @@ -111,9 +114,9 @@ class GDCVaultIE(InfoExtractor): request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') - self._download_webpage(request, video_id, 'Logging in') - start_page = self._download_webpage(webpage_url, video_id, 'Getting authenticated video page') - self._download_webpage(logout_url, video_id, 'Logging out') + self._download_webpage(request, display_id, 'Logging in') + start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') + self._download_webpage(logout_url, display_id, 'Logging out') return start_page @@ -121,8 +124,10 @@ class GDCVaultIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + display_id = mobj.group('name') or video_id + webpage_url = 'http://www.gdcvault.com/play/' + video_id - start_page = self._download_webpage(webpage_url, video_id) + start_page = self._download_webpage(webpage_url, display_id) direct_url = self._search_regex( r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);', @@ -135,6 +140,7 @@ class GDCVaultIE(InfoExtractor): return { 'id': video_id, + 'display_id': display_id, 'url': video_url, 'ext': 'flv', 'title': title, @@ -145,7 +151,7 @@ class GDCVaultIE(InfoExtractor): start_page, 'xml root', default=None) if xml_root is None: # Probably need to authenticate - login_res = self._login(webpage_url, video_id) + login_res = self._login(webpage_url, display_id) if login_res is None: self.report_warning('Could not login.') else: @@ -163,7 +169,7 @@ class GDCVaultIE(InfoExtractor): xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename') xml_decription_url = xml_root + 'xml/' + xml_name - xml_description = self._download_xml(xml_decription_url, video_id) + xml_description = self._download_xml(xml_decription_url, display_id) video_title = xml_description.find('./metadata/title').text video_formats = self._parse_mp4(xml_description) @@ -172,6 +178,7 @@ class GDCVaultIE(InfoExtractor): return { 'id': video_id, + 'display_id': display_id, 'title': video_title, 'formats': video_formats, } From 5090d93f2c7e5d40cd6d7a8c9eda789f67bd1eb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Apr 2015 21:47:13 +0600 Subject: [PATCH 0449/2721] [dotsub] Fix extraction --- youtube_dl/extractor/dotsub.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dotsub.py b/youtube_dl/extractor/dotsub.py index f51d88a98..e9ca236d4 100644 --- a/youtube_dl/extractor/dotsub.py +++ b/youtube_dl/extractor/dotsub.py @@ -36,7 +36,8 @@ class DotsubIE(InfoExtractor): if not video_url: webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r'"file"\s*:\s*\'([^\']+)', webpage, 'video url') + [r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'], + webpage, 'video url') return { 'id': video_id, From 2ad978532bd0a94fb7529f429a26e9b9966b2e1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Apr 2015 22:03:14 +0600 Subject: [PATCH 0450/2721] [ellentv] Fix extraction --- youtube_dl/extractor/ellentv.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 5154bbd7f..93affaa8f 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -39,24 +39,20 @@ class EllenTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'http://widgets.ellentube.com/videos/%s' % video_id, + video_id) - video_url = self._html_search_meta('VideoURL', webpage, 'url', fatal=True) - title = self._og_search_title(webpage, default=None) or self._search_regex( - r'pageName\s*=\s*"([^"]+)"', webpage, 'title') - description = self._html_search_meta( - 'description', webpage, 'description') or self._og_search_description(webpage) - timestamp = parse_iso8601(self._search_regex( - r'<span class="publish-date"><time datetime="([^"]+)">', - webpage, 'timestamp', fatal=False)) + partner_id = self._search_regex( + r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id') - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'description': description, - 'timestamp': timestamp, - } + kaltura_id = self._search_regex( + [r'id="kaltura_player_([^"]+)"', + r"_wb_entry_id\s*:\s*'([^']+)", + r'data-kaltura-entry-id="([^"]+)'], + webpage, 'kaltura id') + + return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') class EllenTVClipsIE(InfoExtractor): From 870744ce8f03bdd50a1d4d4ba87913aecba35716 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Apr 2015 22:07:15 +0600 Subject: [PATCH 0451/2721] [ellentv] Fix tests --- youtube_dl/extractor/ellentv.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 93affaa8f..464aeabdb 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -12,29 +12,19 @@ from ..utils import ( class EllenTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)' - _TESTS = [{ + _TEST = { 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/', 'md5': '8e3c576bf2e9bfff4d76565f56f94c9c', 'info_dict': { - 'id': '0-ipq1gsai', + 'id': '0_ipq1gsai', 'ext': 'mp4', 'title': 'Fast Fingers of Fate', - 'description': 'md5:686114ced0a032926935e9015ee794ac', - 'timestamp': 1428033600, + 'description': 'md5:587e79fbbd0d73b148bc596d99ce48e6', + 'timestamp': 1428035648, 'upload_date': '20150403', + 'uploader_id': 'batchUser', } - }, { - 'url': 'http://ellentube.com/videos/0-dvzmabd5/', - 'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb', - 'info_dict': { - 'id': '0-dvzmabd5', - 'ext': 'mp4', - 'title': '1 year old twin sister makes her brother laugh', - 'description': '1 year old twin sister makes her brother laugh', - 'timestamp': 1419542075, - 'upload_date': '20141225', - } - }] + } def _real_extract(self, url): video_id = self._match_id(url) From 66be4b89d73adcfeb9cd31cd0c903ecca40e2152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Apr 2015 22:09:54 +0600 Subject: [PATCH 0452/2721] [ellentv:clips] Fix extraction --- youtube_dl/extractor/ellentv.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 464aeabdb..54601786a 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -78,4 +78,8 @@ class EllenTVClipsIE(InfoExtractor): raise ExtractorError('Failed to download JSON', cause=ve) def _extract_entries(self, playlist): - return [self.url_result(item['url'], 'EllenTV') for item in playlist] + return [ + self.url_result( + 'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']), + 'Kaltura') + for item in playlist] From d0aefec99aa7bf2cd307b700f7c0d2c268d6762d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 24 Apr 2015 22:10:27 +0600 Subject: [PATCH 0453/2721] [ellentv:clips] Fix test --- youtube_dl/extractor/ellentv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 54601786a..74b50bca2 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -54,7 +54,7 @@ class EllenTVClipsIE(InfoExtractor): 'id': 'meryl-streep-vanessa-hudgens', 'title': 'Meryl Streep, Vanessa Hudgens', }, - 'playlist_mincount': 9, + 'playlist_mincount': 7, } def _real_extract(self, url): From db37e0c273d9d139d1d6a8541146d929b659610d Mon Sep 17 00:00:00 2001 From: mrkrossxdx <mrkrossxdx@libero.it> Date: Fri, 24 Apr 2015 20:50:34 +0200 Subject: [PATCH 0454/2721] Added support for mpv if mplayer is not available --- youtube_dl/downloader/mplayer.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/mplayer.py index 72cef30ea..551c4ae94 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/mplayer.py @@ -16,12 +16,19 @@ class MplayerFD(FileDownloader): self.report_destination(filename) tmpfilename = self.temp_name(filename) - args = [ - 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', - '-dumpstream', '-dumpfile', tmpfilename, url] + args = [] # Check for mplayer first - if not check_executable('mplayer', ['-h']): - self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0]) + if check_executable('mplayer', ['-h']): + args = [ + 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', + '-dumpstream', '-dumpfile', tmpfilename, url] + + # Check for mpv + elif check_executable('mpv', ['-h']): + args = [ + 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] + else: + self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run') return False # Download using mplayer. From a4196c3ea57cf06b6d1f961b9c40f65e173c9876 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 24 Apr 2015 22:06:22 +0200 Subject: [PATCH 0455/2721] [ellentv] Remove unused import --- youtube_dl/extractor/ellentv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index 74b50bca2..02c6a4615 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -6,7 +6,6 @@ import json from .common import InfoExtractor from ..utils import ( ExtractorError, - parse_iso8601, ) From 92995e6265d2287ee67af869e36a56a05fb86a06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 24 Apr 2015 22:08:00 +0200 Subject: [PATCH 0456/2721] [postprocessor/embedthumbnail] Style fix --- youtube_dl/postprocessor/embedthumbnail.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index a2d6b14db..7ba98a0ea 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -35,7 +35,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor): compat_urlretrieve(info['thumbnail'], temp_thumbnail) if info['ext'] == 'mp3': - options = ['-i', temp_thumbnail, '-c', 'copy', '-map', '0', '-map', '1', + options = [ + '-i', temp_thumbnail, '-c', 'copy', '-map', '0', '-map', '1', '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"'] self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename) From 9f3fa89f7c6300c8a7372cf7f694fd8a659785cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 25 Apr 2015 11:59:54 +0200 Subject: [PATCH 0457/2721] Remove the --max-quality option It doesn't work well with 'bestvideo' and 'bestaudio' because they are usually before the max quality. Format filters should be used instead, they are more flexible and don't require the requested quality to exist for each video. --- test/parameters.json | 1 - test/test_YoutubeDL.py | 33 --------------------------------- youtube_dl/YoutubeDL.py | 8 -------- youtube_dl/__init__.py | 1 - youtube_dl/options.py | 4 ---- youtube_dl/utils.py | 9 --------- 6 files changed, 56 deletions(-) diff --git a/test/parameters.json b/test/parameters.json index 48b5a062e..7bf59c25f 100644 --- a/test/parameters.json +++ b/test/parameters.json @@ -8,7 +8,6 @@ "forcetitle": false, "forceurl": false, "format": "best", - "format_limit": null, "ignoreerrors": false, "listformats": null, "logtostderr": false, diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 820e55ec2..bb4a65ee1 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -101,39 +101,6 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'flv') - def test_format_limit(self): - formats = [ - {'format_id': 'meh', 'url': 'http://example.com/meh', 'preference': 1}, - {'format_id': 'good', 'url': 'http://example.com/good', 'preference': 2}, - {'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3}, - {'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4}, - ] - info_dict = _make_result(formats) - - ydl = YDL() - ydl.process_ie_result(info_dict) - downloaded = ydl.downloaded_info_dicts[0] - self.assertEqual(downloaded['format_id'], 'excellent') - - ydl = YDL({'format_limit': 'good'}) - assert ydl.params['format_limit'] == 'good' - ydl.process_ie_result(info_dict.copy()) - downloaded = ydl.downloaded_info_dicts[0] - self.assertEqual(downloaded['format_id'], 'good') - - ydl = YDL({'format_limit': 'great', 'format': 'all'}) - ydl.process_ie_result(info_dict.copy()) - self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'meh') - self.assertEqual(ydl.downloaded_info_dicts[1]['format_id'], 'good') - self.assertEqual(ydl.downloaded_info_dicts[2]['format_id'], 'great') - self.assertTrue('3' in ydl.msgs[0]) - - ydl = YDL() - ydl.params['format_limit'] = 'excellent' - ydl.process_ie_result(info_dict.copy()) - downloaded = ydl.downloaded_info_dicts[0] - self.assertEqual(downloaded['format_id'], 'excellent') - def test_format_selection(self): formats = [ {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 0fdcf1b0b..977141881 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -64,7 +64,6 @@ from .utils import ( sanitize_path, std_headers, subtitles_filename, - takewhile_inclusive, UnavailableVideoError, url_basename, version_tuple, @@ -135,7 +134,6 @@ class YoutubeDL(object): (or video) as a single JSON line. simulate: Do not download the video files. format: Video format code. See options.py for more information. - format_limit: Highest quality format to try. outtmpl: Template for output names. restrictfilenames: Do not allow "&" and spaces in file names ignoreerrors: Do not stop on download errors. @@ -1068,12 +1066,6 @@ class YoutubeDL(object): full_format_info.update(format) format['http_headers'] = self._calc_headers(full_format_info) - format_limit = self.params.get('format_limit', None) - if format_limit: - formats = list(takewhile_inclusive( - lambda f: f['format_id'] != format_limit, formats - )) - # TODO Central sorting goes here if formats[0] is not info_dict: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index d7759db68..c88489f29 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -283,7 +283,6 @@ def _real_main(argv=None): 'simulate': opts.simulate or any_getting, 'skip_download': opts.skip_download, 'format': opts.format, - 'format_limit': opts.format_limit, 'listformats': opts.listformats, 'outtmpl': outtmpl, 'autonumber_size': opts.autonumber_size, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 39c38c980..4c9d39d9a 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -331,10 +331,6 @@ def parseOpts(overrideArguments=None): '--prefer-free-formats', action='store_true', dest='prefer_free_formats', default=False, help='Prefer free video formats unless a specific one is requested') - video_format.add_option( - '--max-quality', - action='store', dest='format_limit', metavar='FORMAT', - help='Highest quality format to download') video_format.add_option( '-F', '--list-formats', action='store_true', dest='listformats', diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index edeee1853..c69d3e165 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1109,15 +1109,6 @@ def shell_quote(args): return ' '.join(quoted_args) -def takewhile_inclusive(pred, seq): - """ Like itertools.takewhile, but include the latest evaluated element - (the first element so that Not pred(e)) """ - for e in seq: - yield e - if not pred(e): - return - - def smuggle_url(url, data): """ Pass additional data in a URL for internal use. """ From 0d1bd5d62f29cc1a02c4dd5cea55fe018f9fea97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 25 Apr 2015 15:14:16 +0200 Subject: [PATCH 0458/2721] README: remove --max-quality --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7040be1ec..948e0a4b9 100644 --- a/README.md +++ b/README.md @@ -188,7 +188,6 @@ which means you can modify it, redistribute it or use it however you like. -f, --format FORMAT Video format code, see the "FORMAT SELECTION" for all the info --all-formats Download all available video formats --prefer-free-formats Prefer free video formats unless a specific one is requested - --max-quality FORMAT Highest quality format to download -F, --list-formats List all available formats --youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos --merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no @@ -324,9 +323,9 @@ YouTube changed their playlist format in March 2014 and later on, so you'll need If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update. -### Do I always have to pass in `--max-quality FORMAT`, or `-citw`? +### Do I always have to pass `-citw`? -By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, `--max-quality` *limits* the video quality (so if you want the best quality, do NOT pass it in), and the only option out of `-citw` that is regularly useful is `-i`. +By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`. ### Can you please put the -b option back? From a542e372ab3dc36d2320ba588d5bfa8011e1df5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Apr 2015 20:22:20 +0600 Subject: [PATCH 0459/2721] [mtv] Stuff lang into info URL when available --- youtube_dl/extractor/mtv.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 4430b3416..b48fac5e3 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -25,6 +25,7 @@ def _media_xml_tag(tag): class MTVServicesInfoExtractor(InfoExtractor): _MOBILE_TEMPLATE = None + _LANG = None @staticmethod def _id_from_uri(uri): @@ -169,8 +170,12 @@ class MTVServicesInfoExtractor(InfoExtractor): video_id = self._id_from_uri(uri) feed_url = self._get_feed_url(uri) data = compat_urllib_parse.urlencode({'uri': uri}) + info_url = feed_url + '?' + if self._LANG: + info_url += 'lang=%s&' % self._LANG + info_url += data idoc = self._download_xml( - feed_url + '?' + data, video_id, + info_url, video_id, 'Downloading info', transform_source=fix_xml_ampersands) return self.playlist_result( [self._get_video_info(item) for item in idoc.findall('.//item')]) From e4a5e772f2e691a2f462796ad08dea6e04938697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Apr 2015 20:23:42 +0600 Subject: [PATCH 0460/2721] [southpark:espanol] Add extractor (Closes #5525) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/southpark.py | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a64afa1da..ab80fd5e0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -478,6 +478,7 @@ from .soundgasm import ( ) from .southpark import ( SouthParkIE, + SouthParkEsIE, SouthparkDeIE, ) from .space import SpaceIE diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index c20397b3d..5c653c8bc 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -20,6 +20,17 @@ class SouthParkIE(MTVServicesInfoExtractor): }] +class SouthParkEsIE(SouthParkIE): + IE_NAME = 'southpark.cc.com:espanol' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))' + _LANG = 'es' + + _TESTS = [{ + 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', + 'playlist_count': 4, + }] + + class SouthparkDeIE(SouthParkIE): IE_NAME = 'southpark.de' _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' From 857f00ed94ea04575a19155d8813b7b599bbd6a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Apr 2015 20:24:15 +0600 Subject: [PATCH 0461/2721] [southpark] Improve some _VALID_URL's --- youtube_dl/extractor/southpark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 5c653c8bc..77758bbed 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -5,7 +5,7 @@ from .mtv import MTVServicesInfoExtractor class SouthParkIE(MTVServicesInfoExtractor): IE_NAME = 'southpark.cc.com' - _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.cc\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' @@ -33,7 +33,7 @@ class SouthParkEsIE(SouthParkIE): class SouthparkDeIE(SouthParkIE): IE_NAME = 'southpark.de' - _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' _TESTS = [{ From 529d26c3e191a3fc24b9c7618310c47929a0c11f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Apr 2015 21:06:27 +0600 Subject: [PATCH 0462/2721] [orf:iptv] Update test --- youtube_dl/extractor/orf.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index ca1a5bb3c..2e6c9872b 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -210,16 +210,16 @@ class ORFIPTVIE(InfoExtractor): _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)' _TEST = { - 'url': 'http://iptv.orf.at/stories/2267952', - 'md5': '26ffa4bab6dbce1eee78bbc7021016cd', + 'url': 'http://iptv.orf.at/stories/2275236/', + 'md5': 'c8b22af4718a4b4af58342529453e3e5', 'info_dict': { - 'id': '339775', + 'id': '350612', 'ext': 'flv', - 'title': 'Kreml-Kritiker Nawalny wieder frei', - 'description': 'md5:6f24e7f546d364dacd0e616a9e409236', - 'duration': 84.729, + 'title': 'Weitere Evakuierungen um Vulkan Calbuco', + 'description': 'md5:d689c959bdbcf04efeddedbf2299d633', + 'duration': 68.197, 'thumbnail': 're:^https?://.*\.jpg$', - 'upload_date': '20150306', + 'upload_date': '20150425', }, } From 672f1bd8497f43179dcd01f8b4831564f0b42356 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 24 Apr 2015 23:46:51 +0800 Subject: [PATCH 0463/2721] [cspan] Extract subtitles --- youtube_dl/extractor/cspan.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index d516b1402..fbefd37d0 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -8,6 +8,7 @@ from ..utils import ( unescapeHTML, find_xpath_attr, smuggle_url, + determine_ext, ) from .senateisvp import SenateISVPIE @@ -87,6 +88,10 @@ class CSpanIE(InfoExtractor): return self.url_result(surl, 'SenateISVP', video_id, title) files = data['video']['files'] + try: + capfile = data['video']['capfile']['#text'] + except KeyError: + capfile = None entries = [{ 'id': '%s_%d' % (video_id, partnum + 1), @@ -97,6 +102,12 @@ class CSpanIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'duration': int_or_none(f.get('length', {}).get('#text')), + 'subtitles': { + 'en': [{ + 'url': capfile, + 'ext': determine_ext(capfile, 'dfxp') + }], + } if capfile else None, } for partnum, f in enumerate(files)] if len(entries) == 1: From bf6427d2fbcbd95cd1cb640e8b894c18782a2a12 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 25 Apr 2015 23:15:05 +0800 Subject: [PATCH 0464/2721] [ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146) --- test/test_utils.py | 38 +++++++++++++++++++++ youtube_dl/postprocessor/ffmpeg.py | 25 ++++++++++++++ youtube_dl/utils.py | 53 ++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 2e3a6480c..17017a8c0 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -58,6 +58,8 @@ from youtube_dl.utils import ( xpath_text, render_table, match_str, + parse_dfxp_time_expr, + dfxp2srt, ) @@ -581,6 +583,42 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') 'like_count > 100 & dislike_count <? 50 & description', {'like_count': 190, 'dislike_count': 10})) + def test_parse_dfxp_time_expr(self): + self.assertEqual(parse_dfxp_time_expr(None), 0.0) + self.assertEqual(parse_dfxp_time_expr(''), 0.0) + self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1) + self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1) + self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0) + self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1) + + def test_dfxp2srt(self): + dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?> + <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> + <body> + <div xml:lang="en"> + <p begin="0" end="1">The following line contains Chinese characters and special symbols</p> + <p begin="1" end="2">第二行<br/>♪♪</p> + <p begin="2" end="3"><span>Third<br/>Line</span></p> + </div> + </body> + </tt>''' + srt_data = '''1 +00:00:00,000 --> 00:00:01,000 +The following line contains Chinese characters and special symbols + +2 +00:00:01,000 --> 00:00:02,000 +第二行 +♪♪ + +3 +00:00:02,000 --> 00:00:03,000 +Third +Line + +''' + self.assertEqual(dfxp2srt(dfxp_data), srt_data) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 7a952963e..1765f4969 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -20,6 +20,7 @@ from ..utils import ( prepend_extension, shell_quote, subtitles_filename, + dfxp2srt, ) @@ -651,6 +652,30 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): 'format' % new_ext) continue new_file = subtitles_filename(filename, lang, new_ext) + + if ext == 'dfxp' or ext == 'ttml': + self._downloader.report_warning( + 'You have requested to convert dfxp (TTML) subtitles into another format, ' + 'which results in style information loss') + + dfxp_file = subtitles_filename(filename, lang, ext) + srt_file = subtitles_filename(filename, lang, 'srt') + + with io.open(dfxp_file, 'rt', encoding='utf-8') as f: + srt_data = dfxp2srt(f.read()) + + with io.open(srt_file, 'wt', encoding='utf-8') as f: + f.write(srt_data) + + ext = 'srt' + subs[lang] = { + 'ext': 'srt', + 'data': srt_data + } + + if new_ext == 'srt': + continue + self.run_ffmpeg( subtitles_filename(filename, lang, ext), new_file, ['-f', new_format]) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index edeee1853..5e1c4525d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1800,6 +1800,59 @@ def match_filter_func(filter_str): return _match_func +def parse_dfxp_time_expr(time_expr): + if not time_expr: + return 0.0 + + mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) + if mobj: + return float(mobj.group('time_offset')) + + mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr) + if mobj: + return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3)) + + +def format_srt_time(seconds): + (mins, secs) = divmod(seconds, 60) + (hours, mins) = divmod(mins, 60) + millisecs = (secs - int(secs)) * 1000 + secs = int(secs) + return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs) + + +def dfxp2srt(dfxp_data): + _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'}) + + def parse_node(node): + str_or_empty = functools.partial(str_or_none, default='') + + out = str_or_empty(node.text) + + for child in node: + if child.tag == _x('ttml:br'): + out += '\n' + str_or_empty(child.tail) + elif child.tag == _x('ttml:span'): + out += str_or_empty(parse_node(child)) + else: + out += str_or_empty(xml.etree.ElementTree.tostring(child)) + + return out + + dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) + out = [] + paras = dfxp.findall(_x('.//ttml:p')) + + for para, index in zip(paras, itertools.count(1)): + out.append('%d\n%s --> %s\n%s\n\n' % ( + index, + format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))), + format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))), + parse_node(para))) + + return ''.join(out) + + class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): def __init__(self, proxies=None): # Set default handlers From f54bab4d6777535390d64502bd20bb5a81b5468a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Apr 2015 22:39:50 +0600 Subject: [PATCH 0465/2721] [instagram] Improve _VALID_URL --- youtube_dl/extractor/instagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index b020e2621..d219ceb55 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -9,7 +9,7 @@ from ..utils import ( class InstagramIE(InfoExtractor): - _VALID_URL = r'http://instagram\.com/p/(?P<id>.*?)/' + _VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)' _TEST = { 'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', From d2d8248f68c029472bb81eacf4aa15987729977f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 25 Apr 2015 22:42:15 +0600 Subject: [PATCH 0466/2721] [instagram] Modernize --- youtube_dl/extractor/instagram.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index d219ceb55..65f6ca103 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -3,9 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - int_or_none, -) +from ..utils import int_or_none class InstagramIE(InfoExtractor): @@ -23,8 +21,8 @@ class InstagramIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"', webpage, 'uploader id', fatal=False) From b860f5dfd45f58b2c0a92cecd28c7eb05a7a2ff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Apr 2015 00:22:13 +0600 Subject: [PATCH 0467/2721] [mplayer] Clarify error message --- youtube_dl/downloader/mplayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/mplayer.py index 551c4ae94..b1ea8daa6 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/mplayer.py @@ -28,7 +28,7 @@ class MplayerFD(FileDownloader): args = [ 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] else: - self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run') + self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.') return False # Download using mplayer. From b874495b1fa339126c306df54aa316043ac0047b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Apr 2015 00:23:16 +0600 Subject: [PATCH 0468/2721] [mplayer] Simplify --- youtube_dl/downloader/mplayer.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/mplayer.py index b1ea8daa6..0cad5a148 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/mplayer.py @@ -16,14 +16,10 @@ class MplayerFD(FileDownloader): self.report_destination(filename) tmpfilename = self.temp_name(filename) - args = [] - # Check for mplayer first if check_executable('mplayer', ['-h']): args = [ 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url] - - # Check for mpv elif check_executable('mpv', ['-h']): args = [ 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] @@ -31,7 +27,6 @@ class MplayerFD(FileDownloader): self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.') return False - # Download using mplayer. retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) @@ -46,5 +41,5 @@ class MplayerFD(FileDownloader): return True else: self.to_stderr('\n') - self.report_error('mplayer exited with code %d' % retval) + self.report_error('%s exited with code %d' % (args[0], retval)) return False From a5ebf77d877ededed377db920faf4f31644573cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Apr 2015 00:25:51 +0600 Subject: [PATCH 0469/2721] [mplayer] Rename to RTSP --- youtube_dl/downloader/__init__.py | 6 +++--- youtube_dl/downloader/{mplayer.py => rtsp.py} | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) rename youtube_dl/downloader/{mplayer.py => rtsp.py} (97%) diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 9fb66e2f7..f110830c4 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -6,7 +6,7 @@ from .f4m import F4mFD from .hls import HlsFD from .hls import NativeHlsFD from .http import HttpFD -from .mplayer import MplayerFD +from .rtsp import RtspFD from .rtmp import RtmpFD from ..utils import ( @@ -17,8 +17,8 @@ PROTOCOL_MAP = { 'rtmp': RtmpFD, 'm3u8_native': NativeHlsFD, 'm3u8': HlsFD, - 'mms': MplayerFD, - 'rtsp': MplayerFD, + 'mms': RtspFD, + 'rtsp': RtspFD, 'f4m': F4mFD, } diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/rtsp.py similarity index 97% rename from youtube_dl/downloader/mplayer.py rename to youtube_dl/downloader/rtsp.py index 0cad5a148..3eb29526c 100644 --- a/youtube_dl/downloader/mplayer.py +++ b/youtube_dl/downloader/rtsp.py @@ -10,7 +10,7 @@ from ..utils import ( ) -class MplayerFD(FileDownloader): +class RtspFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] self.report_destination(filename) From 2a09c1b8ab348c43ece3cf246645e7335a802ca9 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Sat, 25 Apr 2015 20:41:15 +0200 Subject: [PATCH 0470/2721] [postprocessor/embedthumbnail] Fix mp3 embedding with avconv (fixes #5526) --- youtube_dl/postprocessor/embedthumbnail.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 7ba98a0ea..4868a42fd 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -11,6 +11,7 @@ from ..compat import ( compat_urlretrieve, ) from ..utils import ( + determine_ext, check_executable, encodeFilename, PostProcessingError, @@ -27,7 +28,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') - temp_thumbnail = prepend_extension(filename, 'thumb') + temp_thumbnail = filename + '.' + determine_ext(info['thumbnail']) if not info.get('thumbnail'): raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.') From 642f23bd81ae31bb9b5647ff515db66e3023c35f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 25 Apr 2015 22:34:28 +0200 Subject: [PATCH 0471/2721] =?UTF-8?q?[southpark]=20Use=20'=C3=B1'=20in=20t?= =?UTF-8?q?he=20spanish=20extractor=20name?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IE_NAME can contain unicode characters, so it shouldn't be a problem. --- youtube_dl/extractor/southpark.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 77758bbed..e3b73295c 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -1,3 +1,4 @@ +# encoding: utf-8 from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor @@ -21,7 +22,7 @@ class SouthParkIE(MTVServicesInfoExtractor): class SouthParkEsIE(SouthParkIE): - IE_NAME = 'southpark.cc.com:espanol' + IE_NAME = 'southpark.cc.com:español' _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))' _LANG = 'es' From aa49acd15a92faa5cfc1d2876821743f86440c13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Apr 2015 04:29:41 +0600 Subject: [PATCH 0472/2721] [utils] Add `get_subprocess_encoding` and filename/argument decode counterparts --- youtube_dl/utils.py | 49 +++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5e1c4525d..7de7742e3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -371,6 +371,18 @@ def unescapeHTML(s): r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s) +def get_subprocess_encoding(): + if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: + # For subprocess calls, encode with locale encoding + # Refer to http://stackoverflow.com/a/9951851/35070 + encoding = preferredencoding() + else: + encoding = sys.getfilesystemencoding() + if encoding is None: + encoding = 'utf-8' + return encoding + + def encodeFilename(s, for_subprocess=False): """ @param s The name of the file @@ -382,21 +394,24 @@ def encodeFilename(s, for_subprocess=False): if sys.version_info >= (3, 0): return s - if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: - # Pass '' directly to use Unicode APIs on Windows 2000 and up - # (Detecting Windows NT 4 is tricky because 'major >= 4' would - # match Windows 9x series as well. Besides, NT 4 is obsolete.) - if not for_subprocess: - return s - else: - # For subprocess calls, encode with locale encoding - # Refer to http://stackoverflow.com/a/9951851/35070 - encoding = preferredencoding() - else: - encoding = sys.getfilesystemencoding() - if encoding is None: - encoding = 'utf-8' - return s.encode(encoding, 'ignore') + # Pass '' directly to use Unicode APIs on Windows 2000 and up + # (Detecting Windows NT 4 is tricky because 'major >= 4' would + # match Windows 9x series as well. Besides, NT 4 is obsolete.) + if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: + return s + + return s.encode(get_subprocess_encoding(), 'ignore') + + +def decodeFilename(b, for_subprocess=False): + + if sys.version_info >= (3, 0): + return b + + if not isinstance(b, bytes): + return b + + return b.decode(get_subprocess_encoding(), 'ignore') def encodeArgument(s): @@ -408,6 +423,10 @@ def encodeArgument(s): return encodeFilename(s, True) +def decodeArgument(b): + return decodeFilename(b, True) + + def decodeOption(optval): if optval is None: return optval From cd8a07a7649cbd0feea476c89882c5c85130724f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Apr 2015 04:30:45 +0600 Subject: [PATCH 0473/2721] [downloader/common] Use decodeArgument --- youtube_dl/downloader/common.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index a0fc5ead0..97e755d4b 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -8,6 +8,7 @@ import time from ..compat import compat_str from ..utils import ( encodeFilename, + decodeArgument, format_bytes, timeconvert, ) @@ -353,19 +354,15 @@ class FileDownloader(object): # this interface self._progress_hooks.append(ph) - def _debug_cmd(self, args, subprocess_encoding, exe=None): + def _debug_cmd(self, args, exe=None): if not self.params.get('verbose', False): return - if exe is None: - exe = os.path.basename(args[0]) + str_args = [decodeArgument(a) for a in args] + + if exe is None: + exe = os.path.basename(str_args[0]) - if subprocess_encoding: - str_args = [ - a.decode(subprocess_encoding) if isinstance(a, bytes) else a - for a in args] - else: - str_args = args try: import pipes shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) From 9e105a858c2aa0089764326cc3fca808c51a9ffd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Apr 2015 04:32:54 +0600 Subject: [PATCH 0474/2721] [downloader/rtmp] Fix arguments encoding and simplify retry logic (Closes #5528) --- youtube_dl/downloader/rtmp.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index ddf5724ae..6865b5e2f 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import os import re import subprocess -import sys import time from .common import FileDownloader @@ -11,6 +10,7 @@ from ..compat import compat_str from ..utils import ( check_executable, encodeFilename, + encodeArgument, get_exe_version, ) @@ -121,7 +121,7 @@ class RtmpFD(FileDownloader): # possible. This is part of rtmpdump's normal usage, AFAIK. basic_args = [ 'rtmpdump', '--verbose', '-r', url, - '-o', encodeFilename(tmpfilename, True)] + '-o', tmpfilename] if player_url is not None: basic_args += ['--swfVfy', player_url] if page_url is not None: @@ -154,16 +154,9 @@ class RtmpFD(FileDownloader): if not live and continue_dl: args += ['--skip', '1'] - if sys.platform == 'win32' and sys.version_info < (3, 0): - # Windows subprocess module does not actually support Unicode - # on Python 2.x - # See http://stackoverflow.com/a/9951851/35070 - subprocess_encoding = sys.getfilesystemencoding() - args = [a.encode(subprocess_encoding, 'ignore') for a in args] - else: - subprocess_encoding = None + args = [encodeArgument(a) for a in args] - self._debug_cmd(args, subprocess_encoding, exe='rtmpdump') + self._debug_cmd(args, exe='rtmpdump') RD_SUCCESS = 0 RD_FAILED = 1 @@ -180,7 +173,11 @@ class RtmpFD(FileDownloader): prevsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen('[rtmpdump] %s bytes' % prevsize) time.sleep(5.0) # This seems to be needed - retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED]) + args = basic_args + ['--resume'] + if retval == RD_FAILED: + args += ['--skip', '1'] + args = [encodeArgument(a) for a in args] + retval = run_rtmpdump(args) cursize = os.path.getsize(encodeFilename(tmpfilename)) if prevsize == cursize and retval == RD_FAILED: break From 74f8654a53ed56dd813ace704151ea575963cabb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 26 Apr 2015 04:33:43 +0600 Subject: [PATCH 0475/2721] [downloader/external] Use encodeArgument --- youtube_dl/downloader/external.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 1673b2382..7ca2d3143 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -2,11 +2,11 @@ from __future__ import unicode_literals import os.path import subprocess -import sys from .common import FileDownloader from ..utils import ( encodeFilename, + encodeArgument, ) @@ -60,17 +60,9 @@ class ExternalFD(FileDownloader): def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ - cmd = self._make_cmd(tmpfilename, info_dict) + cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] - if sys.platform == 'win32' and sys.version_info < (3, 0): - # Windows subprocess module does not actually support Unicode - # on Python 2.x - # See http://stackoverflow.com/a/9951851/35070 - subprocess_encoding = sys.getfilesystemencoding() - cmd = [a.encode(subprocess_encoding, 'ignore') for a in cmd] - else: - subprocess_encoding = None - self._debug_cmd(cmd, subprocess_encoding) + self._debug_cmd(cmd) p = subprocess.Popen( cmd, stderr=subprocess.PIPE) From 06d07c400088d35096df1406a3434c996795250d Mon Sep 17 00:00:00 2001 From: felix <m.p.isaev@yandex.com> Date: Sun, 26 Apr 2015 14:15:29 +0200 Subject: [PATCH 0476/2721] New extractor: live.philharmoniedeparis.fr --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/philharmoniedeparis.py | 77 +++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 youtube_dl/extractor/philharmoniedeparis.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index ab80fd5e0..641c45f43 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -377,6 +377,7 @@ from .orf import ( from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE +from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .planetaplay import PlanetaPlayIE diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py new file mode 100644 index 000000000..7fc2f32ab --- /dev/null +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -0,0 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + parse_iso8601, + unified_strdate, +) + +class PhilharmonieDeParisIE(InfoExtractor): + _VALID_URL = r'http://live\.philharmoniedeparis\.fr/concert/(?P<id>\d+)(?:/|\.html)' + _TESTS = [{ + 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html', + 'info_dict': { + 'id': '1032066', + 'ext': 'mp4', + 'title': "Week-end Bach. Passion selon saint Jean. Akademie für alte Musik Berlin, Rias Kammerchor, René Jacobs", + 'upload_date': '20150404', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + fichier_nom = self._html_search_regex(r'\sflashvars\s*:\s*\{\s*fichier_nom\s*:\s*\'(.*?)\'\s*,', webpage, 'fichier_nom') + + playlist = self._download_xml('http://live.philharmoniedeparis.fr' + fichier_nom, video_id) + + concert = playlist.find('.//concert') + + formats = [] + info_dict = { + 'id': video_id, + 'title': concert.find('./titre').text, + 'formats': formats, + } + + if concert.attrib.get('heure'): + info_dict['timestamp'] = parse_iso8601(('%s-%s-%s%s') % ( + concert.attrib['date'][0:4], + concert.attrib['date'][4:6], + concert.attrib['date'][6:8], + concert.attrib['heure'] + )) + else: + info_dict['upload_date'] = concert.attrib['date'] + + fichiers = concert.find('./fichiers') + for fichier in fichiers.findall('./fichier'): + # Sometimes <ficher>s have no attributes at all. Skip them. + if 'url' not in fichier.attrib: + continue + + formats.append({ + 'format_id': 'lq', + 'url': fichiers.attrib['serveurstream'], + 'ext': determine_ext(fichier.attrib['url']), + 'play_path': fichier.attrib['url'], + 'width': int_or_none(concert.attrib['largeur']), + 'height': int_or_none(concert.attrib['hauteur']), + 'quality': 1, + }) + + formats.append({ + 'format_id': 'hq', + 'url': fichiers.attrib['serveurstream'], + 'ext': determine_ext(fichier.attrib['url_hd']), + 'play_path': fichier.attrib['url_hd'], + 'width': int_or_none(concert.attrib['largeur_hd']), + 'height': int_or_none(concert.attrib['hauteur_hd']), + 'quality': 2, + }) + + return info_dict From 4eb5c65beec671e534ad4859c073adb362e537ec Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 26 Apr 2015 22:45:20 +0200 Subject: [PATCH 0477/2721] release 2015.04.26 --- docs/supportedsites.md | 10 +++++++--- youtube_dl/version.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 80e86c1b6..cc1564b7b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -174,6 +174,7 @@ - **Gazeta** - **GDCVault** - **generic**: Generic downloader that works on some sites + - **Gfycat** - **GiantBomb** - **Giga** - **Glide**: Glide mobile video messages (glide.me) @@ -181,7 +182,7 @@ - **GodTube** - **GoldenMoustache** - **Golem** - - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in and fastvideo.in + - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net - **Goshgay** - **Grooveshark** - **Groupon** @@ -252,6 +253,7 @@ - **Malemotion** - **MDR** - **media.ccc.de** + - **MegaVideoz** - **metacafe** - **Metacritic** - **Mgoon** @@ -405,6 +407,7 @@ - **Screencast** - **ScreencastOMatic** - **ScreenwaveMedia** + - **SenateISVP** - **ServingSys** - **Sexu** - **SexyKarma**: Sexy Karma and Watch Indian Porn @@ -427,6 +430,7 @@ - **soundgasm** - **soundgasm:profile** - **southpark.cc.com** + - **southpark.cc.com:español** - **southpark.de** - **Space** - **SpankBang** @@ -461,7 +465,7 @@ - **TeamFour** - **TechTalks** - **techtv.mit.edu** - - **TED** + - **ted** - **tegenlicht.vpro.nl** - **TeleBruxelles** - **telecinco.es** @@ -551,7 +555,7 @@ - **vimeo:review**: Review pages on vimeo - **vimeo:user** - **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication) - - **Vimple**: Vimple.ru + - **Vimple**: Vimple - one-click video hosting - **Vine** - **vine:user** - **vk.com** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3fd0e7e56..dc7d666dd 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.04.17' +__version__ = '2015.04.26' From a01cfc2951745c36cb3e8664bacff3ff8a61d4d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 27 Apr 2015 03:36:32 +0600 Subject: [PATCH 0478/2721] [philharmoniedeparis] Fix extraction and tests, improve, simplify --- youtube_dl/extractor/philharmoniedeparis.py | 89 +++++++++++---------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py index 7fc2f32ab..6e60e5fe9 100644 --- a/youtube_dl/extractor/philharmoniedeparis.py +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -3,75 +3,76 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - determine_ext, + float_or_none, int_or_none, parse_iso8601, - unified_strdate, + xpath_text, ) + class PhilharmonieDeParisIE(InfoExtractor): - _VALID_URL = r'http://live\.philharmoniedeparis\.fr/concert/(?P<id>\d+)(?:/|\.html)' + IE_DESC = 'Philharmonie de Paris' + _VALID_URL = r'http://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)' _TESTS = [{ 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html', 'info_dict': { 'id': '1032066', - 'ext': 'mp4', - 'title': "Week-end Bach. Passion selon saint Jean. Akademie für alte Musik Berlin, Rias Kammerchor, René Jacobs", + 'ext': 'flv', + 'title': 'md5:d1f5585d87d041d07ce9434804bc8425', + 'timestamp': 1428179400, 'upload_date': '20150404', + 'duration': 6592.278, + }, + 'params': { + # rtmp download + 'skip_download': True, } + }, { + 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html', + 'only_matching': True, + }, { + 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - fichier_nom = self._html_search_regex(r'\sflashvars\s*:\s*\{\s*fichier_nom\s*:\s*\'(.*?)\'\s*,', webpage, 'fichier_nom') - - playlist = self._download_xml('http://live.philharmoniedeparis.fr' + fichier_nom, video_id) - - concert = playlist.find('.//concert') + concert = self._download_xml( + 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id, + video_id).find('./concert') formats = [] info_dict = { 'id': video_id, - 'title': concert.find('./titre').text, + 'title': xpath_text(concert, './titre', 'title', fatal=True), 'formats': formats, } - if concert.attrib.get('heure'): - info_dict['timestamp'] = parse_iso8601(('%s-%s-%s%s') % ( - concert.attrib['date'][0:4], - concert.attrib['date'][4:6], - concert.attrib['date'][6:8], - concert.attrib['heure'] - )) - else: - info_dict['upload_date'] = concert.attrib['date'] - fichiers = concert.find('./fichiers') + stream = fichiers.attrib['serveurstream'] for fichier in fichiers.findall('./fichier'): - # Sometimes <ficher>s have no attributes at all. Skip them. - if 'url' not in fichier.attrib: - continue + info_dict['duration'] = float_or_none(fichier.get('timecodefin')) + for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]): + format_url = fichier.get('url%s' % suffix) + if not format_url: + continue + formats.append({ + 'url': stream, + 'play_path': format_url, + 'ext': 'flv', + 'format_id': format_id, + 'width': int_or_none(concert.get('largeur%s' % suffix)), + 'height': int_or_none(concert.get('hauteur%s' % suffix)), + 'quality': quality, + }) + self._sort_formats(formats) - formats.append({ - 'format_id': 'lq', - 'url': fichiers.attrib['serveurstream'], - 'ext': determine_ext(fichier.attrib['url']), - 'play_path': fichier.attrib['url'], - 'width': int_or_none(concert.attrib['largeur']), - 'height': int_or_none(concert.attrib['hauteur']), - 'quality': 1, - }) - - formats.append({ - 'format_id': 'hq', - 'url': fichiers.attrib['serveurstream'], - 'ext': determine_ext(fichier.attrib['url_hd']), - 'play_path': fichier.attrib['url_hd'], - 'width': int_or_none(concert.attrib['largeur_hd']), - 'height': int_or_none(concert.attrib['hauteur_hd']), - 'quality': 2, - }) + date, hour = concert.get('date'), concert.get('heure') + if date and hour: + info_dict['timestamp'] = parse_iso8601( + '%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour)) + elif date: + info_dict['upload_date'] = date return info_dict From 053c94f1b358a44789c38efd916904ce2733fd51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 27 Apr 2015 15:21:51 +0600 Subject: [PATCH 0479/2721] [README] Clarify youtube-dl version when format selection changed to bestvideo+bestaudio/best --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7040be1ec..b4e4b5db6 100644 --- a/README.md +++ b/README.md @@ -270,9 +270,9 @@ The simplest case is requesting a specific format, for example `-f 22`. You can If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`. -Since the end of April 2015 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. +Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. -If you want to preserve the old format selection behavior (pre-April 2015), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl. +If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl. # VIDEO SELECTION From c8183e661d629bd7d86d38eda9e0013e2aad682e Mon Sep 17 00:00:00 2001 From: "Sergey M." <dstftw@gmail.com> Date: Mon, 27 Apr 2015 16:01:30 +0600 Subject: [PATCH 0480/2721] [README] Document special characters escaping (#5538) --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index b4e4b5db6..5fb992241 100644 --- a/README.md +++ b/README.md @@ -358,6 +358,22 @@ YouTube has switched to a new video info format in July 2011 which is not suppor YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. +### Video URL contains an ampersand and I'm getting some strange output `[1] 2839` or `'v' is not recognized as an internal or external command` ### + +That's actually the output from your shell. Since ampersand is one of the special shell characters it's interpreted by shell preventing you from passing the whole URL to youtube-dl. To disable your shell from interpreting the ampersands (or any other special characters) you have to either put the whole URL in quotes or escape them with a backslash (which approach will work depends on your shell). + +For example if your URL is https://www.youtube.com/watch?t=48&v=e2CXaJv0cMw you should end up with following command: + +```youtube-dl 'https://www.youtube.com/watch?t=48&v=e2CXaJv0cMw'``` + +or + +```youtube-dl https://www.youtube.com/watch?t=48\&v=e2CXaJv0cMw``` + +For Windows you have to use the double quotes: + +```youtube-dl "https://www.youtube.com/watch?t=48&v=e2CXaJv0cMw"``` + ### ExtractorError: Could not find JS function u'OF' In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. From 189ba90996d810ea00b165d20d320869efe14518 Mon Sep 17 00:00:00 2001 From: "Sergey M." <dstftw@gmail.com> Date: Mon, 27 Apr 2015 16:05:01 +0600 Subject: [PATCH 0481/2721] [README] Use youtube-dl test video URL --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5fb992241..f67744f0f 100644 --- a/README.md +++ b/README.md @@ -362,17 +362,17 @@ YouTube requires an additional signature since September 2012 which is not suppo That's actually the output from your shell. Since ampersand is one of the special shell characters it's interpreted by shell preventing you from passing the whole URL to youtube-dl. To disable your shell from interpreting the ampersands (or any other special characters) you have to either put the whole URL in quotes or escape them with a backslash (which approach will work depends on your shell). -For example if your URL is https://www.youtube.com/watch?t=48&v=e2CXaJv0cMw you should end up with following command: +For example if your URL is https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with following command: -```youtube-dl 'https://www.youtube.com/watch?t=48&v=e2CXaJv0cMw'``` +```youtube-dl 'https://www.youtube.com/watch?t=4&v=BaW_jenozKc'``` or -```youtube-dl https://www.youtube.com/watch?t=48\&v=e2CXaJv0cMw``` +```youtube-dl https://www.youtube.com/watch?t=4\&v=BaW_jenozKc``` For Windows you have to use the double quotes: -```youtube-dl "https://www.youtube.com/watch?t=48&v=e2CXaJv0cMw"``` +```youtube-dl "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"``` ### ExtractorError: Could not find JS function u'OF' From c86b61428b127ee29793f005c5e45e321696d7e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 27 Apr 2015 20:00:18 +0600 Subject: [PATCH 0482/2721] [utils] Fix another old python 2.6 kwargs issue (Closes #5539) --- youtube_dl/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 7d15eab64..f07679c76 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -37,6 +37,7 @@ from .compat import ( compat_chr, compat_html_entities, compat_http_client, + compat_kwargs, compat_parse_qs, compat_socket_create_connection, compat_str, @@ -114,7 +115,7 @@ def write_json_file(obj, fn): 'encoding': 'utf-8', }) - tf = tempfile.NamedTemporaryFile(**args) + tf = tempfile.NamedTemporaryFile(**compat_kwargs(args)) try: with tf: From e2dc351d25d1b05b26cba654a22b0d10ed47dec9 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Mon, 27 Apr 2015 17:44:13 +0200 Subject: [PATCH 0483/2721] [escapist] Fix extractor (fixes #5090) --- youtube_dl/extractor/escapist.py | 156 +++++++++++++------------------ 1 file changed, 65 insertions(+), 91 deletions(-) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index e47f3e27a..a01a05b06 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -1,128 +1,102 @@ from __future__ import unicode_literals +import json + from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, - compat_urllib_request, -) +from ..compat import compat_urllib_request + from ..utils import ( - ExtractorError, - js_to_json, - parse_duration, + determine_ext, + clean_html, + qualities, ) +def _decrypt_config(key, string): + a = '' + i = '' + r = '' + + while len(a) < (len(string) / 2): + a += key + + a = a[0:int(len(string) / 2)] + + t = 0 + while t < len(string): + i += chr(int(string[t] + string[t + 1], 16)) + t += 2 + + icko = [s for s in i] + + for t, c in enumerate(a): + r += chr(ord(c) ^ ord(icko[t])) + + return r + + class EscapistIE(InfoExtractor): _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])' - _USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko' - _TEST = { + _TESTS = [{ 'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', - 'md5': 'ab3a706c681efca53f0a35f1415cf0d1', + 'md5': 'c6793dbda81388f4264c1ba18684a74d', 'info_dict': { 'id': '6618', 'ext': 'mp4', 'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", - 'uploader_id': 'the-escapist-presents', - 'uploader': 'The Escapist Presents', 'title': "Breaking Down Baldur's Gate", 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 264, } - } + }, { + 'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer', + 'md5': 'cf8842a8a46444d241f9a9980d7874f2', + 'info_dict': { + 'id': '10044', + 'ext': 'mp4', + 'description': 'This week, Zero Punctuation reviews Evolve.', + 'title': 'Evolve - One vs Multiplayer', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 304, + } + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage_req = compat_urllib_request.Request(url) - webpage_req.add_header('User-Agent', self._USER_AGENT) - webpage = self._download_webpage(webpage_req, video_id) + webpage = self._download_webpage(url, video_id) - uploader_id = self._html_search_regex( - r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'", - webpage, 'uploader ID', fatal=False) - uploader = self._html_search_regex( - r"<h1\s+class='headline'>(.*?)</a>", - webpage, 'uploader', fatal=False) - description = self._html_search_meta('description', webpage) - duration = parse_duration(self._html_search_meta('duration', webpage)) + imsVideo = json.loads(self._search_regex(r'imsVideo\.play\(([^\)]+)\);', webpage, 'imsVideo')) + video_id = imsVideo['videoID'] + key = imsVideo['hash'] - raw_title = self._html_search_meta('title', webpage, fatal=True) - title = raw_title.partition(' : ')[2] - - config_url = compat_urllib_parse.unquote(self._html_search_regex( - r'''(?x) - (?: - <param\s+name="flashvars".*?\s+value="config=| - flashvars="config= - ) - (https?://[^"&]+) - ''', - webpage, 'config URL')) + quality = qualities(['lq', 'hq', 'hd']) formats = [] - ad_formats = [] + for q in ['lq', 'hq', 'hd']: + config_req = compat_urllib_request.Request('http://www.escapistmagazine.com/videos/' + 'vidconfig.php?videoID=%s&hash=%s&quality=%s' % (video_id, key, 'mp4_' + q)) + config_req.add_header('Referer', url) + config = self._download_webpage(config_req, video_id, 'Downloading video config ' + q.upper()) - def _add_format(name, cfg_url, quality): - cfg_req = compat_urllib_request.Request(cfg_url) - cfg_req.add_header('User-Agent', self._USER_AGENT) - config = self._download_json( - cfg_req, video_id, - 'Downloading ' + name + ' configuration', - 'Unable to download ' + name + ' configuration', - transform_source=js_to_json) + data = json.loads(_decrypt_config(key, config)) - playlist = config['playlist'] - for p in playlist: - if p.get('eventCategory') == 'Video': - ar = formats - elif p.get('eventCategory') == 'Video Postroll': - ar = ad_formats - else: - continue + title = clean_html(data['videoData']['title']) + duration = data['videoData']['duration'] / 1000 - ar.append({ - 'url': p['url'], - 'format_id': name, - 'quality': quality, - 'http_headers': { - 'User-Agent': self._USER_AGENT, - }, - }) + for i, v in enumerate(data['files']['videos']): - _add_format('normal', config_url, quality=0) - hq_url = (config_url + - ('&hq=1' if '?' in config_url else config_url + '?hq=1')) - try: - _add_format('hq', hq_url, quality=1) - except ExtractorError: - pass # That's fine, we'll just use normal quality - self._sort_formats(formats) + formats.append({ + 'url': v, + 'format_id': determine_ext(v) + '_' + q + str(i), + 'quality': quality(q), + }) - if '/escapist/sales-marketing/' in formats[-1]['url']: - raise ExtractorError('This IP address has been blocked by The Escapist', expected=True) - res = { + return { 'id': video_id, 'formats': formats, - 'uploader': uploader, - 'uploader_id': uploader_id, 'title': title, 'thumbnail': self._og_search_thumbnail(webpage), - 'description': description, + 'description': self._og_search_description(webpage), 'duration': duration, } - - if self._downloader.params.get('include_ads') and ad_formats: - self._sort_formats(ad_formats) - ad_res = { - 'id': '%s-ad' % video_id, - 'title': '%s (Postroll)' % title, - 'formats': ad_formats, - } - return { - '_type': 'playlist', - 'entries': [res, ad_res], - 'title': title, - 'id': video_id, - } - - return res From 290a5a8d85e8f7514099fa305c8f0b26d1b35a78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 27 Apr 2015 22:17:51 +0600 Subject: [PATCH 0484/2721] [escapist] Fix imsVideo regex (#5090) --- youtube_dl/extractor/escapist.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index a01a05b06..600ebf078 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -65,7 +65,10 @@ class EscapistIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - imsVideo = json.loads(self._search_regex(r'imsVideo\.play\(([^\)]+)\);', webpage, 'imsVideo')) + imsVideo = self._parse_json( + self._search_regex( + r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'), + video_id) video_id = imsVideo['videoID'] key = imsVideo['hash'] From e206740fd7454bb03e2819365a257bce59f76162 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 27 Apr 2015 23:44:05 +0600 Subject: [PATCH 0485/2721] [moniker] Capture and output error message (#5541) --- youtube_dl/extractor/moniker.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index 5de719bdc..801abe0e1 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -9,6 +9,7 @@ from ..compat import ( compat_urllib_parse, compat_urllib_request, ) +from ..utils import ExtractorError class MonikerIE(InfoExtractor): @@ -40,6 +41,12 @@ class MonikerIE(InfoExtractor): video_id = self._match_id(url) orig_webpage = self._download_webpage(url, video_id) + error = self._search_regex( + r'class="err">([^<]+)<', orig_webpage, 'error', default=None) + if error: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error), expected=True) + fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) data = dict(fields) From 2419a376b911df61bec6dae7178fc342a4e218f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 27 Apr 2015 23:46:16 +0600 Subject: [PATCH 0486/2721] [moniker] Check not found error (#5541) --- youtube_dl/extractor/moniker.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index 801abe0e1..88dcd4f73 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -41,6 +41,9 @@ class MonikerIE(InfoExtractor): video_id = self._match_id(url) orig_webpage = self._download_webpage(url, video_id) + if '>File Not Found<' in orig_webpage: + raise ExtractorError('Video %s does not exist' % video_id, expected=True) + error = self._search_regex( r'class="err">([^<]+)<', orig_webpage, 'error', default=None) if error: From 3a0f0c263a9c6e2a3e81fc97f59a5960fd5ee994 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 28 Apr 2015 09:11:18 +0200 Subject: [PATCH 0487/2721] release 2015.04.28 --- docs/supportedsites.md | 1 + youtube_dl/version.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index cc1564b7b..d1d676cdd 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -340,6 +340,7 @@ - **parliamentlive.tv**: UK parliament videos - **Patreon** - **PBS** + - **PhilharmonieDeParis**: Philharmonie de Paris - **Phoenix** - **Photobucket** - **Pladform** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index dc7d666dd..b88ea85e8 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.04.26' +__version__ = '2015.04.28' From 10831b5ec9158f2de5b16b66520d9dde02b97d77 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Tue, 28 Apr 2015 14:56:48 +0200 Subject: [PATCH 0488/2721] [vimeo] Fix redirection --- youtube_dl/extractor/vimeo.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 9027f9dd6..398ca67c1 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -223,6 +223,8 @@ class VimeoIE(VimeoBaseInfoExtractor): orig_url = url if mobj.group('pro') or mobj.group('player'): url = 'https://player.vimeo.com/video/' + video_id + else: + url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information request = compat_urllib_request.Request(url, None, headers) From 2edce52584ecd14ee626c6a1b6bbaeabb3985cc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 28 Apr 2015 15:05:41 +0200 Subject: [PATCH 0489/2721] [vimeo] Fix password protected videos again (#5082) Since they have changed again to the previous format, I've modified the regex to match both formats. --- youtube_dl/extractor/vimeo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 398ca67c1..f300c7ca4 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -177,7 +177,7 @@ class VimeoIE(VimeoBaseInfoExtractor): password = self._downloader.params.get('videopassword', None) if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) - token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token') + token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token') data = urlencode_postdata({ 'password': password, 'token': token, @@ -441,7 +441,7 @@ class VimeoChannelIE(InfoExtractor): name="([^"]+)"\s+ value="([^"]*)" ''', login_form)) - token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token') + token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token') fields['token'] = token fields['password'] = password post = urlencode_postdata(fields) From 39b62db1160f5a4770348f1d01daeb0ce049c28c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 28 Apr 2015 23:07:56 +0800 Subject: [PATCH 0490/2721] [youtube] Catch more alert messages (closes #5074) --- youtube_dl/extractor/youtube.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 07c0f6ef9..0869c9fd4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1291,12 +1291,22 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): page = self._download_webpage(url, playlist_id) more_widget_html = content_html = page - # Check if the playlist exists or is private - if re.search(r'<div class="yt-alert-message">[^<]*?(The|This) playlist (does not exist|is private)[^<]*?</div>', page) is not None: - raise ExtractorError( - 'The playlist doesn\'t exist or is private, use --username or ' - '--netrc to access it.', - expected=True) + for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page): + match = match.strip() + # Check if the playlist exists or is private + if re.match(r'[^<]*(The|This) playlist (does not exist|is private)[^<]*', match): + raise ExtractorError( + 'The playlist doesn\'t exist or is private, use --username or ' + '--netrc to access it.', + expected=True) + elif re.match(r'[^<]*Invalid parameters[^<]*', match): + raise ExtractorError( + 'Invalid parameters. Maybe URL is incorrect.', + expected=True) + elif re.match(r'[^<]*Choose your language[^<]*', match): + continue + else: + self.report_warning('Youtube gives an alert message: ' + match) # Extract the video ids from the playlist pages ids = [] From e70c7568c0226449190b57b988c1df10bf6508db Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 29 Apr 2015 00:01:09 +0800 Subject: [PATCH 0491/2721] [testtube] Detect Youtube iframes (fixes #4867) --- youtube_dl/extractor/testtube.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/extractor/testtube.py b/youtube_dl/extractor/testtube.py index 6a7b5e49d..cbada136c 100644 --- a/youtube_dl/extractor/testtube.py +++ b/youtube_dl/extractor/testtube.py @@ -22,12 +22,30 @@ class TestTubeIE(InfoExtractor): 'uploader': 'DNews', 'uploader_id': 'dnews', }, + }, { + 'url': 'https://testtube.com/iflscience/insane-jet-ski-flipping', + 'info_dict': { + 'id': 'fAGfJ4YjVus', + 'ext': 'mp4', + 'title': 'Flipping Jet-Ski Skills | Outrageous Acts of Science', + 'uploader': 'Science Channel', + 'uploader_id': 'ScienceChannel', + 'upload_date': '20150203', + 'description': 'md5:e61374030015bae1d2e22f096d4769d6', + } }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) + + youtube_url = self._html_search_regex( + r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"', + webpage, 'youtube iframe', default=None) + if youtube_url: + return self.url_result(youtube_url, 'Youtube', video_id=display_id) + video_id = self._search_regex( r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);", webpage, 'video ID') From ec7c1e85e0b26e67752bbd3d25b8d5ed23ae0992 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 29 Apr 2015 00:24:58 +0800 Subject: [PATCH 0492/2721] [testtube] Fix test case 1 Seems the site now provides webm with higher bitrates --- youtube_dl/extractor/testtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/testtube.py b/youtube_dl/extractor/testtube.py index cbada136c..26655d690 100644 --- a/youtube_dl/extractor/testtube.py +++ b/youtube_dl/extractor/testtube.py @@ -15,7 +15,7 @@ class TestTubeIE(InfoExtractor): 'id': '60163', 'display_id': '5-weird-ways-plants-can-eat-animals', 'duration': 275, - 'ext': 'mp4', + 'ext': 'webm', 'title': '5 Weird Ways Plants Can Eat Animals', 'description': 'Why have some plants evolved to eat meat?', 'thumbnail': 're:^https?://.*\.jpg$', From 9d8ba307ef18e737efc0be5164b53e0546288a4d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 29 Apr 2015 04:03:07 +0800 Subject: [PATCH 0493/2721] [yourupload] Fix extraction --- youtube_dl/extractor/yourupload.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/yourupload.py b/youtube_dl/extractor/yourupload.py index 40fc4165f..2064f4a69 100644 --- a/youtube_dl/extractor/yourupload.py +++ b/youtube_dl/extractor/yourupload.py @@ -16,7 +16,7 @@ class YourUploadIE(InfoExtractor): _TESTS = [ { 'url': 'http://yourupload.com/watch/14i14h', - 'md5': 'bf5c2f95c4c917536e80936af7bc51e1', + 'md5': '5e2c63385454c557f97c4c4131a393cd', 'info_dict': { 'id': '14i14h', 'ext': 'mp4', @@ -38,16 +38,16 @@ class YourUploadIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - url = 'http://embed.yucache.net/{0:}'.format(video_id) - webpage = self._download_webpage(url, video_id) + embed_url = 'http://embed.yucache.net/{0:}'.format(video_id) + webpage = self._download_webpage(embed_url, video_id) title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) - url = self._og_search_video_url(webpage) + video_url = self._og_search_video_url(webpage) formats = [{ 'format_id': 'sd', - 'url': url, + 'url': video_url, }] return { @@ -55,4 +55,7 @@ class YourUploadIE(InfoExtractor): 'title': title, 'formats': formats, 'thumbnail': thumbnail, + 'http_headers': { + 'Referer': embed_url, + }, } From cbbece96a25443dbf5cb66819883efa2e9b66de4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 29 Apr 2015 04:05:14 +0800 Subject: [PATCH 0494/2721] [yourupload] Simplify --- youtube_dl/extractor/yourupload.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/yourupload.py b/youtube_dl/extractor/yourupload.py index 2064f4a69..4e25d6f22 100644 --- a/youtube_dl/extractor/yourupload.py +++ b/youtube_dl/extractor/yourupload.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -35,25 +33,19 @@ class YourUploadIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) embed_url = 'http://embed.yucache.net/{0:}'.format(video_id) webpage = self._download_webpage(embed_url, video_id) title = self._og_search_title(webpage) - thumbnail = self._og_search_thumbnail(webpage) video_url = self._og_search_video_url(webpage) - - formats = [{ - 'format_id': 'sd', - 'url': video_url, - }] + thumbnail = self._og_search_thumbnail(webpage, default=None) return { 'id': video_id, 'title': title, - 'formats': formats, + 'url': video_url, 'thumbnail': thumbnail, 'http_headers': { 'Referer': embed_url, From 5456d78f0c177bbdd3ad0b6221ff95b706104441 Mon Sep 17 00:00:00 2001 From: zouhair <zouhair@users.noreply.github.com> Date: Wed, 29 Apr 2015 10:07:49 -0400 Subject: [PATCH 0495/2721] Typo "incompatible" instead of "uncompatible" --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 977141881..827c88e0d 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1384,7 +1384,7 @@ class YoutubeDL(object): requested_formats = info_dict['requested_formats'] if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats): filename = os.path.splitext(filename)[0] + '.mkv' - self.report_warning('You have requested formats uncompatible for merge. ' + self.report_warning('You have requested formats incompatible for merge. ' 'The formats will be merged into mkv') if os.path.exists(encodeFilename(filename)): self.to_screen( From cf0649f8b76879f9d32ceab91e87d659ea8ec398 Mon Sep 17 00:00:00 2001 From: zouhair <zouhair@users.noreply.github.com> Date: Wed, 29 Apr 2015 11:03:10 -0400 Subject: [PATCH 0496/2721] Typo: twice "the the" to "the" --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 71230323c..48f58aae3 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -47,7 +47,7 @@ class InfoExtractor(object): information possibly downloading the video to the file system, among other possible outcomes. - The type field determines the the type of the result. + The type field determines the type of the result. By far the most common value (and the default if _type is missing) is "video", which indicates a single video. @@ -572,7 +572,7 @@ class InfoExtractor(object): def _get_login_info(self): """ - Get the the login info as (username, password) + Get the login info as (username, password) It will look in the netrc file using the _NETRC_MACHINE value If there's no info available, return (None, None) """ From 9ee53a49f0f040f07e0f675e2077f22ace2acefe Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 29 Apr 2015 04:18:53 +0800 Subject: [PATCH 0497/2721] [YouPorn] Fix extractor --- youtube_dl/extractor/youporn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 6abe72f73..4ba7c36db 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -47,7 +47,7 @@ class YouPornIE(InfoExtractor): # Get JSON parameters json_params = self._search_regex( - [r'var\s+videoJa?son\s*=\s*({.+?});', + [r'videoJa?son\s*=\s*({.+})', r'var\s+currentVideo\s*=\s*new\s+Video\((.+?)\)[,;]'], webpage, 'JSON parameters') try: From bb865f3a5e7290f0da43d180f068f8715c39e3ae Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 30 Apr 2015 00:24:24 +0800 Subject: [PATCH 0498/2721] [niconico] Fix extraction and update tests (closes #5511) --- youtube_dl/extractor/niconico.py | 72 +++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index ddec7b338..0ca046ac2 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -14,7 +14,9 @@ from ..utils import ( ExtractorError, int_or_none, parse_duration, - unified_strdate, + parse_iso8601, + xpath_text, + determine_ext, ) @@ -32,6 +34,7 @@ class NiconicoIE(InfoExtractor): 'uploader': 'takuya0301', 'uploader_id': '2698420', 'upload_date': '20131123', + 'timestamp': 1385182762, 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', 'duration': 33, }, @@ -46,16 +49,31 @@ class NiconicoIE(InfoExtractor): 'id': 'nm14296458', 'ext': 'swf', 'title': '【鏡音リン】Dance on media【オリジナル】take2!', - 'description': 'md5:', + 'description': 'md5:689f066d74610b3b22e0f1739add0f58', 'uploader': 'りょうた', 'uploader_id': '18822557', 'upload_date': '20110429', + 'timestamp': 1304065916, 'duration': 209, }, 'params': { 'username': 'ydl.niconico@gmail.com', 'password': 'youtube-dl', }, + }, { + # 'video exists but is marked as "deleted" + 'url': 'http://www.nicovideo.jp/watch/sm10000', + 'md5': '38e53c9aad548f3ecf01ca7680b59b08', + 'info_dict': { + 'id': 'sm10000', + 'ext': 'unknown_video', + 'description': 'deleted', + 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', + }, + 'params': { + 'username': 'ydl.niconico@gmail.com', + 'password': 'youtube-dl', + } }] _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)' @@ -95,9 +113,10 @@ class NiconicoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - # Get video webpage. We are not actually interested in it, but need - # the cookies in order to be able to download the info webpage - self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id) + # Get video webpage. We are not actually interested in it for normal + # cases, but need the cookies in order to be able to download the + # info webpage + webpage = self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id) video_info = self._download_xml( 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, @@ -127,22 +146,34 @@ class NiconicoIE(InfoExtractor): flv_info_request, video_id, note='Downloading flv info', errnote='Unable to download flv info') - if 'deleted=' in flv_info_webpage: - raise ExtractorError('The video has been deleted.', - expected=True) - video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] + flv_info = compat_urlparse.parse_qs(flv_info_webpage) + if 'url' not in flv_info: + if 'deleted' in flv_info: + raise ExtractorError('The video has been deleted.', + expected=True) + else: + raise ExtractorError('Unable to find video URL') + + video_real_url = flv_info['url'][0] # Start extracting information - title = video_info.find('.//title').text - extension = video_info.find('.//movie_type').text + title = xpath_text(video_info, './/title') + if not title: + title = self._html_search_regex( + r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>', + webpage, 'video title') + + extension = xpath_text(video_info, './/movie_type') + if not extension: + extension = determine_ext(video_real_url) video_format = extension.upper() - thumbnail = video_info.find('.//thumbnail_url').text - description = video_info.find('.//description').text - upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) - view_count = int_or_none(video_info.find('.//view_counter').text) - comment_count = int_or_none(video_info.find('.//comment_num').text) - duration = parse_duration(video_info.find('.//length').text) - webpage_url = video_info.find('.//watch_url').text + thumbnail = xpath_text(video_info, './/thumbnail_url') + description = xpath_text(video_info, './/description') + timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve')) + view_count = int_or_none(xpath_text(video_info, './/view_counter')) + comment_count = int_or_none(xpath_text(video_info, './/comment_num')) + duration = parse_duration(xpath_text(video_info, './/length')) + webpage_url = xpath_text(video_info, './/watch_url') if video_info.find('.//ch_id') is not None: uploader_id = video_info.find('.//ch_id').text @@ -153,7 +184,7 @@ class NiconicoIE(InfoExtractor): else: uploader_id = uploader = None - return { + ret = { 'id': video_id, 'url': video_real_url, 'title': title, @@ -162,13 +193,14 @@ class NiconicoIE(InfoExtractor): 'thumbnail': thumbnail, 'description': description, 'uploader': uploader, - 'upload_date': upload_date, + 'timestamp': timestamp, 'uploader_id': uploader_id, 'view_count': view_count, 'comment_count': comment_count, 'duration': duration, 'webpage_url': webpage_url, } + return dict((k, v) for k, v in ret.items() if v is not None) class NiconicoPlaylistIE(InfoExtractor): From 59d814f79341341e6390392a09e628ee12a6f18d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 30 Apr 2015 00:47:52 +0800 Subject: [PATCH 0499/2721] [niconico] Remove credentials from tests and enhance title extraction All test videos can be downloaded without username and password now. --- youtube_dl/extractor/niconico.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 0ca046ac2..a9b770cb3 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -38,13 +38,10 @@ class NiconicoIE(InfoExtractor): 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', 'duration': 33, }, - 'params': { - 'username': 'ydl.niconico@gmail.com', - 'password': 'youtube-dl', - }, }, { + # File downloaded with and without credentials are different, so omit + # the md5 field 'url': 'http://www.nicovideo.jp/watch/nm14296458', - 'md5': '8db08e0158457cf852a31519fceea5bc', 'info_dict': { 'id': 'nm14296458', 'ext': 'swf', @@ -56,10 +53,6 @@ class NiconicoIE(InfoExtractor): 'timestamp': 1304065916, 'duration': 209, }, - 'params': { - 'username': 'ydl.niconico@gmail.com', - 'password': 'youtube-dl', - }, }, { # 'video exists but is marked as "deleted" 'url': 'http://www.nicovideo.jp/watch/sm10000', @@ -70,10 +63,6 @@ class NiconicoIE(InfoExtractor): 'description': 'deleted', 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', }, - 'params': { - 'username': 'ydl.niconico@gmail.com', - 'password': 'youtube-dl', - } }] _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)' @@ -158,6 +147,8 @@ class NiconicoIE(InfoExtractor): # Start extracting information title = xpath_text(video_info, './/title') + if not title: + title = self._og_search_title(webpage, default=None) if not title: title = self._html_search_regex( r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>', From b2e8e7dab567ed9b27817c5dd0cf173bc7fb8cfa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 30 Apr 2015 02:24:05 +0800 Subject: [PATCH 0500/2721] [niconico] Try to extract all optional fields from various sources --- youtube_dl/extractor/niconico.py | 57 ++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index a9b770cb3..dd16d0042 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re import json +import datetime from .common import InfoExtractor from ..compat import ( @@ -55,13 +56,16 @@ class NiconicoIE(InfoExtractor): }, }, { # 'video exists but is marked as "deleted" + # md5 is unstable 'url': 'http://www.nicovideo.jp/watch/sm10000', - 'md5': '38e53c9aad548f3ecf01ca7680b59b08', 'info_dict': { 'id': 'sm10000', 'ext': 'unknown_video', 'description': 'deleted', 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>', + 'upload_date': '20071224', + 'timestamp': 1198527840, # timestamp field has different value if logged in + 'duration': 304, }, }] @@ -154,17 +158,59 @@ class NiconicoIE(InfoExtractor): r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>', webpage, 'video title') + watch_api_data_string = self._html_search_regex( + r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>', + webpage, 'watch api data', default=None) + watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {} + video_detail = watch_api_data.get('videoDetail', {}) + extension = xpath_text(video_info, './/movie_type') if not extension: extension = determine_ext(video_real_url) video_format = extension.upper() - thumbnail = xpath_text(video_info, './/thumbnail_url') + + thumbnail = ( + xpath_text(video_info, './/thumbnail_url') or + self._html_search_meta('image', webpage, 'thumbnail', default=None) or + video_detail.get('thumbnail')) + description = xpath_text(video_info, './/description') + timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve')) + if not timestamp: + match = self._html_search_meta('datePublished', webpage, 'date published', default=None) + if match: + timestamp = parse_iso8601(match.replace('+', ':00+')) + if not timestamp and video_detail.get('postedAt'): + timestamp = parse_iso8601( + video_detail['postedAt'].replace('/', '-'), + delimiter=' ', timezone=datetime.timedelta(hours=9)) + view_count = int_or_none(xpath_text(video_info, './/view_counter')) + if not view_count: + match = self._html_search_regex( + r'>Views: <strong[^>]*>([^<]+)</strong>', + webpage, 'view count', default=None) + if match: + view_count = int_or_none(match.replace(',', '')) + view_count = view_count or video_detail.get('viewCount') + comment_count = int_or_none(xpath_text(video_info, './/comment_num')) - duration = parse_duration(xpath_text(video_info, './/length')) - webpage_url = xpath_text(video_info, './/watch_url') + if not comment_count: + match = self._html_search_regex( + r'>Comments: <strong[^>]*>([^<]+)</strong>', + webpage, 'comment count', default=None) + if match: + comment_count = int_or_none(match.replace(',', '')) + comment_count = comment_count or video_detail.get('commentCount') + + duration = (parse_duration( + xpath_text(video_info, './/length') or + self._html_search_meta( + 'video:duration', webpage, 'video duration', default=None)) or + video_detail.get('length')) + + webpage_url = xpath_text(video_info, './/watch_url') or url if video_info.find('.//ch_id') is not None: uploader_id = video_info.find('.//ch_id').text @@ -175,7 +221,7 @@ class NiconicoIE(InfoExtractor): else: uploader_id = uploader = None - ret = { + return { 'id': video_id, 'url': video_real_url, 'title': title, @@ -191,7 +237,6 @@ class NiconicoIE(InfoExtractor): 'duration': duration, 'webpage_url': webpage_url, } - return dict((k, v) for k, v in ret.items() if v is not None) class NiconicoPlaylistIE(InfoExtractor): From 965cb8d530e4ced61a9bc42530f7f91b67c709e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 29 Apr 2015 22:46:19 +0200 Subject: [PATCH 0501/2721] [escapist] pep8 fixes --- youtube_dl/extractor/escapist.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 600ebf078..8facf1185 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -76,7 +76,8 @@ class EscapistIE(InfoExtractor): formats = [] for q in ['lq', 'hq', 'hd']: - config_req = compat_urllib_request.Request('http://www.escapistmagazine.com/videos/' + config_req = compat_urllib_request.Request( + 'http://www.escapistmagazine.com/videos/' 'vidconfig.php?videoID=%s&hash=%s&quality=%s' % (video_id, key, 'mp4_' + q)) config_req.add_header('Referer', url) config = self._download_webpage(config_req, video_id, 'Downloading video config ' + q.upper()) @@ -92,8 +93,7 @@ class EscapistIE(InfoExtractor): 'url': v, 'format_id': determine_ext(v) + '_' + q + str(i), 'quality': quality(q), - }) - + }) return { 'id': video_id, From 8dd5418803a25de89d08cdb9d32f80f71c5d6c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 29 Apr 2015 22:53:18 +0200 Subject: [PATCH 0502/2721] Make 'best' format only match non-DASH formats (closes #5554) Otherwise it's impossible to only download non-DASH formats, for example `best[height=?480]/best` would download a DASH video if it's the only one with height=480, instead for falling back to the second format specifier. For audio only urls (soundcloud, bandcamp ...), the best audio will be downloaded as before. --- test/test_YoutubeDL.py | 4 ++-- youtube_dl/YoutubeDL.py | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index bb4a65ee1..82b827536 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -237,7 +237,7 @@ class TestFormatSelection(unittest.TestCase): f2['url'] = 'url:' + f2id info_dict = _make_result([f1, f2], extractor='youtube') - ydl = YDL() + ydl = YDL({'format': 'best/bestvideo'}) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) @@ -245,7 +245,7 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(downloaded['format_id'], f1id) info_dict = _make_result([f2, f1], extractor='youtube') - ydl = YDL() + ydl = YDL({'format': 'best/bestvideo'}) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 827c88e0d..eee9c0154 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -915,7 +915,14 @@ class YoutubeDL(object): return None if format_spec == 'best' or format_spec is None: - return available_formats[-1] + audiovideo_formats = [ + f for f in available_formats + if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] + if audiovideo_formats: + return audiovideo_formats[-1] + # for audio only urls, 'best' selects the best audio format + elif all(f.get('acodec') != 'none' for f in available_formats): + return available_formats[-1] elif format_spec == 'worst': audiovideo_formats = [ f for f in available_formats From 621ffe7bf420aa1a227e823edf2f1acbc67660d0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 30 Apr 2015 17:05:02 +0800 Subject: [PATCH 0503/2721] [niconico] Fix so* video extraction (fixes #4874) (#2087) --- youtube_dl/extractor/niconico.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index dd16d0042..3cecebf95 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -67,6 +67,18 @@ class NiconicoIE(InfoExtractor): 'timestamp': 1198527840, # timestamp field has different value if logged in 'duration': 304, }, + }, { + 'url': 'http://www.nicovideo.jp/watch/so22543406', + 'info_dict': { + 'id': '1388129933', + 'ext': 'mp4', + 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~', + 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1', + 'timestamp': 1388851200, + 'upload_date': '20140104', + 'uploader': 'アニメロチャンネル', + 'uploader_id': '312', + } }] _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)' @@ -109,7 +121,10 @@ class NiconicoIE(InfoExtractor): # Get video webpage. We are not actually interested in it for normal # cases, but need the cookies in order to be able to download the # info webpage - webpage = self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id) + webpage, handle = self._download_webpage_handle( + 'http://www.nicovideo.jp/watch/' + video_id, video_id) + if video_id.startswith('so'): + video_id = self._match_id(handle.geturl()) video_info = self._download_xml( 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, From c4a21bc9db1868e8be114f496899f6786b9982ec Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 30 Apr 2015 18:23:35 +0800 Subject: [PATCH 0504/2721] [bilibili] Extract multipart videos (closes #3250) --- youtube_dl/extractor/bilibili.py | 74 +++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 904d9a8b4..7ca835e31 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import itertools from .common import InfoExtractor from ..utils import ( @@ -14,18 +15,25 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/' - _TEST = { + _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '2c301e4dab317596e837c3e7633e7d86', 'info_dict': { - 'id': '1074402', + 'id': '1074402_part1', 'ext': 'flv', 'title': '【金坷垃】金泡沫', 'duration': 308, 'upload_date': '20140420', 'thumbnail': 're:^https?://.+\.jpg', }, - } + }, { + 'url': 'http://www.bilibili.com/video/av1041170/', + 'info_dict': { + 'id': '1041170', + 'title': '【BD1080P】刀语【诸神&异域】', + }, + 'playlist_count': 9, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -57,19 +65,14 @@ class BiliBiliIE(InfoExtractor): cid = self._search_regex(r'cid=(\d+)', webpage, 'cid') + entries = [] + lq_doc = self._download_xml( 'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid, video_id, note='Downloading LQ video info' ) - lq_durl = lq_doc.find('./durl') - formats = [{ - 'format_id': 'lq', - 'quality': 1, - 'url': lq_durl.find('./url').text, - 'filesize': int_or_none( - lq_durl.find('./size'), get_attr='text'), - }] + lq_durls = lq_doc.findall('./durl') hq_doc = self._download_xml( 'http://interface.bilibili.com/playurl?appkey=1&cid=%s' % cid, @@ -77,23 +80,44 @@ class BiliBiliIE(InfoExtractor): note='Downloading HQ video info', fatal=False, ) - if hq_doc is not False: - hq_durl = hq_doc.find('./durl') - formats.append({ - 'format_id': 'hq', - 'quality': 2, - 'ext': 'flv', - 'url': hq_durl.find('./url').text, + hq_durls = hq_doc.findall('./durl') if hq_doc is not False else itertools.repeat(None) + + assert len(lq_durls) == len(hq_durls) + + i = 1 + for lq_durl, hq_durl in zip(lq_durls, hq_durls): + formats = [{ + 'format_id': 'lq', + 'quality': 1, + 'url': lq_durl.find('./url').text, 'filesize': int_or_none( - hq_durl.find('./size'), get_attr='text'), + lq_durl.find('./size'), get_attr='text'), + }] + if hq_durl: + formats.append({ + 'format_id': 'hq', + 'quality': 2, + 'ext': 'flv', + 'url': hq_durl.find('./url').text, + 'filesize': int_or_none( + hq_durl.find('./size'), get_attr='text'), + }) + self._sort_formats(formats) + + entries.append({ + 'id': '%s_part%d' % (video_id, i), + 'title': title, + 'formats': formats, + 'duration': duration, + 'upload_date': upload_date, + 'thumbnail': thumbnail, }) - self._sort_formats(formats) + i += 1 + return { + '_type': 'multi_video', + 'entries': entries, 'id': video_id, - 'title': title, - 'formats': formats, - 'duration': duration, - 'upload_date': upload_date, - 'thumbnail': thumbnail, + 'title': title } From f7f1df1d82d556c0726898b9de2f7f3824c1be5a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 30 Apr 2015 22:37:41 +0800 Subject: [PATCH 0505/2721] [VeeHD] Enhance extraction and fix tests (fixes #4965) --- youtube_dl/extractor/veehd.py | 36 +++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py index 96353f525..7fdeb784d 100644 --- a/youtube_dl/extractor/veehd.py +++ b/youtube_dl/extractor/veehd.py @@ -17,7 +17,9 @@ from ..utils import ( class VeeHDIE(InfoExtractor): _VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)' - _TEST = { + # Seems VeeHD videos have multiple copies on several servers, all of + # whom have different MD5 checksums, so omit md5 field in all tests + _TESTS = [{ 'url': 'http://veehd.com/video/4639434_Solar-Sinter', 'info_dict': { 'id': '4639434', @@ -26,7 +28,26 @@ class VeeHDIE(InfoExtractor): 'uploader_id': 'VideoEyes', 'description': 'md5:46a840e8692ddbaffb5f81d9885cb457', }, - } + 'skip': 'Video deleted', + }, { + 'url': 'http://veehd.com/video/4905758_Elysian-Fields-Channeling', + 'info_dict': { + 'id': '4905758', + 'ext': 'mp4', + 'title': 'Elysian Fields - Channeling', + 'description': 'md5:360e4e95fdab58aefbea0f2a19e5604b', + 'uploader_id': 'spotted', + } + }, { + 'url': 'http://veehd.com/video/4665804_Tell-No-One-Ne-le-dis-a-personne-2006-French-EngSoftSubs-Re-Up', + 'info_dict': { + 'id': '4665804', + 'ext': 'avi', + 'title': 'Tell No One (Ne le dis a personne) 2006 French(EngSoftSubs) Re-Up', + 'description': 'md5:d660cca685549776f37165e9a10b60ba', + 'uploader_id': 'belial2549', + } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -48,13 +69,21 @@ class VeeHDIE(InfoExtractor): player_page = self._download_webpage( player_url, video_id, 'Downloading player page') + video_url = None + config_json = self._search_regex( r'value=\'config=({.+?})\'', player_page, 'config json', default=None) if config_json: config = json.loads(config_json) video_url = compat_urlparse.unquote(config['clip']['url']) - else: + + if not video_url: + video_url = self._html_search_regex( + r'<embed[^>]+type="video/divx"[^>]+src="([^"]+)"', + player_page, 'video url', default=None) + + if not video_url: iframe_src = self._search_regex( r'<iframe[^>]+src="/?([^"]+)"', player_page, 'iframe url') iframe_url = 'http://veehd.com/%s' % iframe_src @@ -82,7 +111,6 @@ class VeeHDIE(InfoExtractor): 'id': video_id, 'title': title, 'url': video_url, - 'ext': 'mp4', 'uploader_id': uploader_id, 'thumbnail': thumbnail, 'description': description, From e01c56f9e12d7abb2e3b548818a689146092806e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 30 Apr 2015 21:06:51 +0600 Subject: [PATCH 0506/2721] [YoutubeDL] Generalize best/worst format match behavior --- youtube_dl/YoutubeDL.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index eee9c0154..9d4a2dce8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -914,22 +914,16 @@ class YoutubeDL(object): if not available_formats: return None - if format_spec == 'best' or format_spec is None: + if format_spec in ['best', 'worst', None]: + format_idx = 0 if format_spec == 'worst' else -1 audiovideo_formats = [ f for f in available_formats if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] if audiovideo_formats: - return audiovideo_formats[-1] - # for audio only urls, 'best' selects the best audio format + return audiovideo_formats[format_idx] + # for audio only urls, select the best/worst audio format elif all(f.get('acodec') != 'none' for f in available_formats): - return available_formats[-1] - elif format_spec == 'worst': - audiovideo_formats = [ - f for f in available_formats - if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] - if audiovideo_formats: - return audiovideo_formats[0] - return available_formats[0] + return available_formats[format_idx] elif format_spec == 'bestaudio': audio_formats = [ f for f in available_formats From cd298882cd6f8ed2571dd372f684ec17e992fd9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 30 Apr 2015 21:25:17 +0600 Subject: [PATCH 0507/2721] [vporn] Fix metadata extraction (#5560) --- youtube_dl/extractor/vporn.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py index 2d23effcc..04ed0b381 100644 --- a/youtube_dl/extractor/vporn.py +++ b/youtube_dl/extractor/vporn.py @@ -64,29 +64,29 @@ class VpornIE(InfoExtractor): title = self._html_search_regex( r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip() description = self._html_search_regex( - r'<div class="description_txt">(.*?)</div>', webpage, 'description', fatal=False) + r'class="(?:descr|description_txt)">(.*?)</div>', + webpage, 'description', fatal=False) thumbnail = self._html_search_regex( r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None) if thumbnail: thumbnail = 'http://www.vporn.com' + thumbnail uploader = self._html_search_regex( - r'(?s)UPLOADED BY.*?<a href="/user/[^"]+">([^<]+)</a>', + r'(?s)Uploaded by:.*?<a href="/user/[^"]+">([^<]+)</a>', webpage, 'uploader', fatal=False) categories = re.findall(r'<a href="/cat/[^"]+">([^<]+)</a>', webpage) duration = parse_duration(self._search_regex( - r'duration (\d+ min \d+ sec)', webpage, 'duration', fatal=False)) + r'Runtime:\s*</span>\s*(\d+ min \d+ sec)', + webpage, 'duration', fatal=False)) - view_count = str_to_int(self._html_search_regex( - r'<span>([\d,\.]+) VIEWS</span>', webpage, 'view count', fatal=False)) - like_count = str_to_int(self._html_search_regex( - r'<span id="like" class="n">([\d,\.]+)</span>', webpage, 'like count', fatal=False)) - dislike_count = str_to_int(self._html_search_regex( - r'<span id="dislike" class="n">([\d,\.]+)</span>', webpage, 'dislike count', fatal=False)) + view_count = str_to_int(self._search_regex( + r'class="views">([\d,\.]+) [Vv]iews<', + webpage, 'view count', fatal=False)) comment_count = str_to_int(self._html_search_regex( - r'<h4>Comments \(<b>([\d,\.]+)</b>\)</h4>', webpage, 'comment count', fatal=False)) + r"'Comments \(([\d,\.]+)\)'", + webpage, 'comment count', default=None)) formats = [] @@ -117,8 +117,6 @@ class VpornIE(InfoExtractor): 'categories': categories, 'duration': duration, 'view_count': view_count, - 'like_count': like_count, - 'dislike_count': dislike_count, 'comment_count': comment_count, 'age_limit': 18, 'formats': formats, From 482a1258de6af0a15b6e7859d244f9125cadef47 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 30 Apr 2015 22:58:03 +0800 Subject: [PATCH 0508/2721] [VeeHD] Replace the third test case due to copyright issues --- youtube_dl/extractor/veehd.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py index 7fdeb784d..346edf485 100644 --- a/youtube_dl/extractor/veehd.py +++ b/youtube_dl/extractor/veehd.py @@ -39,13 +39,13 @@ class VeeHDIE(InfoExtractor): 'uploader_id': 'spotted', } }, { - 'url': 'http://veehd.com/video/4665804_Tell-No-One-Ne-le-dis-a-personne-2006-French-EngSoftSubs-Re-Up', + 'url': 'http://veehd.com/video/2046729_2012-2009-DivX-Trailer', 'info_dict': { - 'id': '4665804', + 'id': '2046729', 'ext': 'avi', - 'title': 'Tell No One (Ne le dis a personne) 2006 French(EngSoftSubs) Re-Up', - 'description': 'md5:d660cca685549776f37165e9a10b60ba', - 'uploader_id': 'belial2549', + 'title': '2012 (2009) DivX Trailer', + 'description': 'md5:75435ee95255e6a9838ac6f6f3a2396b', + 'uploader_id': 'Movie_Trailers', } }] From 7a03280df4555998fc99399907062b62383db2c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 30 Apr 2015 21:31:38 +0600 Subject: [PATCH 0509/2721] [vporn] More metadata extraction fixes and tests update (#5560) --- youtube_dl/extractor/vporn.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py index 04ed0b381..92c90e517 100644 --- a/youtube_dl/extractor/vporn.py +++ b/youtube_dl/extractor/vporn.py @@ -27,9 +27,6 @@ class VpornIE(InfoExtractor): 'duration': 393, 'age_limit': 18, 'view_count': int, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, } }, { @@ -47,9 +44,6 @@ class VpornIE(InfoExtractor): 'duration': 588, 'age_limit': 18, 'view_count': int, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, } }, ] @@ -72,10 +66,10 @@ class VpornIE(InfoExtractor): thumbnail = 'http://www.vporn.com' + thumbnail uploader = self._html_search_regex( - r'(?s)Uploaded by:.*?<a href="/user/[^"]+">([^<]+)</a>', + r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>', webpage, 'uploader', fatal=False) - categories = re.findall(r'<a href="/cat/[^"]+">([^<]+)</a>', webpage) + categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage) duration = parse_duration(self._search_regex( r'Runtime:\s*</span>\s*(\d+ min \d+ sec)', From 4070b458ece46a29dad9be2312a7daa48bb2f1d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 30 Apr 2015 23:55:05 +0600 Subject: [PATCH 0510/2721] [YoutubeDL] Do not write requested info in info JSON file (Closes #5562, closes #5564) --- youtube_dl/YoutubeDL.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 9d4a2dce8..e747c6892 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1337,8 +1337,11 @@ class YoutubeDL(object): self.to_screen('[info] Video description metadata is already present') else: self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn) + filtered_info_dict = dict( + (k, v) for k, v in info_dict.items() + if not k in ['requested_formats', 'requested_subtitles']) try: - write_json_file(info_dict, infofn) + write_json_file(filtered_info_dict, infofn) except (OSError, IOError): self.report_error('Cannot write metadata to JSON file ' + infofn) return From df8301fef55f9144f06337c10b8570b6560caa24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 30 Apr 2015 20:18:42 +0200 Subject: [PATCH 0511/2721] [YoutubeDL] pep8: use 'k not in' instead of 'not k in' --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e747c6892..584dbf8a6 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1339,7 +1339,7 @@ class YoutubeDL(object): self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn) filtered_info_dict = dict( (k, v) for k, v in info_dict.items() - if not k in ['requested_formats', 'requested_subtitles']) + if k not in ['requested_formats', 'requested_subtitles']) try: write_json_file(filtered_info_dict, infofn) except (OSError, IOError): From 67fc8ecd53b4ffe5375a741bf0b1282f7a44587d Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Thu, 30 Apr 2015 21:26:55 +0300 Subject: [PATCH 0512/2721] [dreisat] Extend _VALID_URL (Closes #5548) --- youtube_dl/extractor/dreisat.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 05bb22ddf..8ac8587be 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -11,19 +11,25 @@ from ..utils import ( class DreiSatIE(InfoExtractor): IE_NAME = '3sat' - _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' - _TEST = { - 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', - 'md5': 'be37228896d30a88f315b638900a026e', - 'info_dict': { - 'id': '45918', - 'ext': 'mp4', - 'title': 'Waidmannsheil', - 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', - 'uploader': '3sat', - 'upload_date': '20140913' - } - } + _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' + _TESTS = [ + { + 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', + 'md5': 'be37228896d30a88f315b638900a026e', + 'info_dict': { + 'id': '45918', + 'ext': 'mp4', + 'title': 'Waidmannsheil', + 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', + 'uploader': '3sat', + 'upload_date': '20140913' + } + }, + { + 'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066', + 'only_matching': True, + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From cb202fd28635bf82836a025c631339665ba610af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 May 2015 00:44:34 +0600 Subject: [PATCH 0513/2721] [YoutubeDL] Filter requested info fields on `--load-info` as well In order to properly handle JSON info files generated by youtube-dl versions prior to 4070b458ece46a29dad9be2312a7daa48bb2f1d7 --- youtube_dl/YoutubeDL.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 584dbf8a6..55b429f31 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1337,11 +1337,8 @@ class YoutubeDL(object): self.to_screen('[info] Video description metadata is already present') else: self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn) - filtered_info_dict = dict( - (k, v) for k, v in info_dict.items() - if k not in ['requested_formats', 'requested_subtitles']) try: - write_json_file(filtered_info_dict, infofn) + write_json_file(self.filter_requested_info(info_dict), infofn) except (OSError, IOError): self.report_error('Cannot write metadata to JSON file ' + infofn) return @@ -1491,7 +1488,7 @@ class YoutubeDL(object): [info_filename], mode='r', openhook=fileinput.hook_encoded('utf-8'))) as f: # FileInput doesn't have a read method, we can't call json.load - info = json.loads('\n'.join(f)) + info = self.filter_requested_info(json.loads('\n'.join(f))) try: self.process_ie_result(info, download=True) except DownloadError: @@ -1503,6 +1500,12 @@ class YoutubeDL(object): raise return self._download_retcode + @staticmethod + def filter_requested_info(info_dict): + return dict( + (k, v) for k, v in info_dict.items() + if k not in ['requested_formats', 'requested_subtitles']) + def post_process(self, filename, ie_info): """Run all the postprocessors on the given file.""" info = dict(ie_info) From 6a8422b942f5140238106c43e27d869d70126446 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 May 2015 02:49:06 +0600 Subject: [PATCH 0514/2721] [foxsports] Add extractor (Closes #5517) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/foxsports.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 youtube_dl/extractor/foxsports.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 641c45f43..fced42bd9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -161,6 +161,7 @@ from .footyroom import FootyRoomIE from .fourtube import FourTubeIE from .foxgay import FoxgayIE from .foxnews import FoxNewsIE +from .foxsports import FoxSportsIE from .franceculture import FranceCultureIE from .franceinter import FranceInterIE from .francetv import ( diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dl/extractor/foxsports.py new file mode 100644 index 000000000..363866b64 --- /dev/null +++ b/youtube_dl/extractor/foxsports.py @@ -0,0 +1,32 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import smuggle_url + + +class FoxSportsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?foxsports\.com/video\?vid=(?P<id>\d+)' + + _TEST = { + 'url': 'http://www.foxsports.com/video?vid=432609859715', + 'info_dict': { + 'id': 'gA0bHB3Ladz3', + 'ext': 'flv', + 'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', + 'description': 'Courtney Lee talks about Memphis being focused.', + }, + 'add_ie': ['ThePlatform'], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + config = self._parse_json( + self._search_regex( + r"data-player-config='([^']+)'", webpage, 'data player config'), + video_id) + + return self.url_result(smuggle_url( + config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True})) From 1dbd717eb49d075fa1efabc674e8074fd165eb0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 May 2015 02:51:55 +0600 Subject: [PATCH 0515/2721] [theplaform] Fix FutureWarning --- youtube_dl/extractor/theplatform.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 6a006b2d2..92731ad3d 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -129,7 +129,9 @@ class ThePlatformIE(InfoExtractor): head = meta.find(_x('smil:head')) body = meta.find(_x('smil:body')) - f4m_node = body.find(_x('smil:seq//smil:video')) or body.find(_x('smil:seq/smil:video')) + f4m_node = body.find(_x('smil:seq//smil:video')) + if f4m_node is None: + f4m_node = body.find(_x('smil:seq/smil:video')) if f4m_node is not None and '.f4m' in f4m_node.attrib['src']: f4m_url = f4m_node.attrib['src'] if 'manifest.f4m?' not in f4m_url: @@ -142,7 +144,9 @@ class ThePlatformIE(InfoExtractor): formats = [] switch = body.find(_x('smil:switch')) if switch is None: - switch = body.find(_x('smil:par//smil:switch')) or body.find(_x('smil:par/smil:switch')) + switch = body.find(_x('smil:par//smil:switch')) + if switch is None: + switch = body.find(_x('smil:par/smil:switch')) if switch is None: switch = body.find(_x('smil:par')) if switch is not None: @@ -163,7 +167,9 @@ class ThePlatformIE(InfoExtractor): 'vbr': vbr, }) else: - switch = body.find(_x('smil:seq//smil:switch')) or body.find(_x('smil:seq/smil:switch')) + switch = body.find(_x('smil:seq//smil:switch')) + if switch is None: + switch = body.find(_x('smil:seq/smil:switch')) for f in switch.findall(_x('smil:video')): attr = f.attrib vbr = int_or_none(attr.get('system-bitrate'), 1000) From 8683b4d8d91a7c6b72ca4a12bf6b538cbb4b2a68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 May 2015 03:59:13 +0600 Subject: [PATCH 0516/2721] [bbccouk] Improve extraction (Closes #5530) --- youtube_dl/extractor/bbccouk.py | 35 ++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index abc34a576..22c2843be 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import xml.etree.ElementTree from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + int_or_none, +) from ..compat import compat_HTTPError @@ -326,16 +329,29 @@ class BBCCoUkIE(InfoExtractor): webpage = self._download_webpage(url, group_id, 'Downloading video page') - programme_id = self._search_regex( - r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None) + thumbnail = self._og_search_thumbnail(webpage) + + programme_id = None + + tviplayer = self._search_regex( + r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById', + webpage, 'player', default=None) + + if tviplayer: + player = self._parse_json(tviplayer, group_id).get('player', {}) + duration = int_or_none(player.get('duration')) + programme_id = player.get('vpid') + + if not programme_id: + programme_id = self._search_regex( + r'"vpid"\s*:\s*"([\da-z]{8})"', webpage, 'vpid', fatal=False, default=None) + if programme_id: - player = self._download_json( - 'http://www.bbc.co.uk/iplayer/episode/%s.json' % group_id, - group_id)['jsConf']['player'] - title = player['title'] - description = player['subtitle'] - duration = player['duration'] formats, subtitles = self._download_media_selector(programme_id) + title = self._og_search_title(webpage) + description = self._search_regex( + r'<p class="medium-description">([^<]+)</p>', + webpage, 'description', fatal=False) else: programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id) @@ -345,6 +361,7 @@ class BBCCoUkIE(InfoExtractor): 'id': programme_id, 'title': title, 'description': description, + 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, 'subtitles': subtitles, From e68ae99a417f39db269dcffb5011cfcc8341552d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 May 2015 04:02:56 +0600 Subject: [PATCH 0517/2721] [bbccouk] Add test for #5530 --- youtube_dl/extractor/bbccouk.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index 22c2843be..dbfbbb5ca 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -115,6 +115,20 @@ class BBCCoUkIE(InfoExtractor): # rtmp download 'skip_download': True, } + }, { + 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls', + 'info_dict': { + 'id': 'p02n76xf', + 'ext': 'flv', + 'title': 'Natural World, 2015-2016: 2. Super Powered Owls', + 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d', + 'duration': 3540, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'geolocation', }, { 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4', 'only_matching': True, From 650cfd0cb0e330c8e6b1a5cc43a5a20d54b4714c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 May 2015 04:07:30 +0600 Subject: [PATCH 0518/2721] [bbccouk] Mute thumbnail --- youtube_dl/extractor/bbccouk.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py index dbfbbb5ca..249bc6bbd 100644 --- a/youtube_dl/extractor/bbccouk.py +++ b/youtube_dl/extractor/bbccouk.py @@ -343,8 +343,6 @@ class BBCCoUkIE(InfoExtractor): webpage = self._download_webpage(url, group_id, 'Downloading video page') - thumbnail = self._og_search_thumbnail(webpage) - programme_id = None tviplayer = self._search_regex( @@ -375,7 +373,7 @@ class BBCCoUkIE(InfoExtractor): 'id': programme_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'duration': duration, 'formats': formats, 'subtitles': subtitles, From 861e65eb0573c824cf82e1f31b7169df2efa74ab Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 1 May 2015 12:31:31 +0800 Subject: [PATCH 0519/2721] [yahoo] Extend _VALID_URL --- youtube_dl/extractor/yahoo.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b777159c5..bf4e659ac 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -22,7 +22,7 @@ from .nbc import NBCSportsVPlayerIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+?)-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)' + _VALID_URL = r'(?P<url>(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P<display_id>.+)?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)' _TESTS = [ { 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', @@ -140,12 +140,15 @@ class YahooIE(InfoExtractor): 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', } + }, { + 'url': 'https://tw.news.yahoo.com/-100120367.html', + 'only_matching': True, } ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') + display_id = mobj.group('display_id') or self._match_id(url) page_id = mobj.group('id') url = mobj.group('url') host = mobj.group('host') From 083c1bb960715031aed63dfb834a5bdc5ac6ff9a Mon Sep 17 00:00:00 2001 From: Nikoli <nikoli@gmx.us> Date: Wed, 15 Apr 2015 20:27:40 +0300 Subject: [PATCH 0520/2721] Add ability to embed subtitles in mkv files (closes #5434) --- README.md | 2 +- youtube_dl/options.py | 2 +- youtube_dl/postprocessor/ffmpeg.py | 7 ++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index e1f30ca47..3432546fc 100644 --- a/README.md +++ b/README.md @@ -216,7 +216,7 @@ which means you can modify it, redistribute it or use it however you like. --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv) -k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default --no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default - --embed-subs Embed subtitles in the video (only for mp4 videos) + --embed-subs Embed subtitles in the video (only for mkv and mp4 videos) --embed-thumbnail Embed thumbnail in the audio as cover art --add-metadata Write metadata to the video file --metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 4c9d39d9a..d0aa8296d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -698,7 +698,7 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--embed-subs', action='store_true', dest='embedsubtitles', default=False, - help='Embed subtitles in the video (only for mp4 videos)') + help='Embed subtitles in the video (only for mkv and mp4 videos)') postproc.add_option( '--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False, diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 1765f4969..214de39f9 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -501,8 +501,8 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): return cls._lang_map.get(code[:2]) def run(self, information): - if information['ext'] != 'mp4': - self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files') + if information['ext'] not in ['mp4', 'mkv']: + self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files') return [], information subtitles = information.get('requested_subtitles') if not subtitles: @@ -520,8 +520,9 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): # Don't copy the existing subtitles, we may be running the # postprocessor a second time '-map', '-0:s', - '-c:s', 'mov_text', ] + if information['ext'] == 'mp4': + opts += ['-c:s', 'mov_text'] for (i, lang) in enumerate(sub_langs): opts.extend(['-map', '%d:0' % (i + 1)]) lang_code = self._conver_lang_code(lang) From 5890eef6b021845cb68882107364f1b04d773913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 May 2015 17:43:06 +0600 Subject: [PATCH 0521/2721] [pbs] Add support for HD (Closes #3564, closes #5390) --- youtube_dl/extractor/pbs.py | 56 ++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index afce732e1..761bd6d8d 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -5,6 +5,8 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, + determine_ext, + int_or_none, unified_strdate, US_RATINGS, ) @@ -149,21 +151,44 @@ class PBSIE(InfoExtractor): for vid_id in video_id] return self.playlist_result(entries, display_id) - info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id - info = self._download_json(info_url, display_id) + info = self._download_json( + 'http://video.pbs.org/videoInfo/%s?format=json&type=partner' % video_id, + display_id) - redirect_url = info['alternate_encoding']['url'] - redirect_info = self._download_json( - redirect_url + '?format=json', display_id, - 'Downloading video url info') - if redirect_info['status'] == 'error': - if redirect_info['http_code'] == 403: - message = ( - 'The video is not available in your region due to ' - 'right restrictions') + formats = [] + for encoding_name in ('recommended_encoding', 'alternate_encoding'): + redirect = info.get(encoding_name) + if not redirect: + continue + redirect_url = redirect.get('url') + if not redirect_url: + continue + + redirect_info = self._download_json( + redirect_url + '?format=json', display_id, + 'Downloading %s video url info' % encoding_name) + + if redirect_info['status'] == 'error': + if redirect_info['http_code'] == 403: + message = ( + 'The video is not available in your region due to ' + 'right restrictions') + else: + message = redirect_info['message'] + raise ExtractorError(message, expected=True) + + format_url = redirect_info.get('url') + if not format_url: + continue + + if determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, display_id, 'mp4', preference=1, m3u8_id='hls')) else: - message = redirect_info['message'] - raise ExtractorError(message, expected=True) + formats.append({ + 'url': format_url, + }) + self._sort_formats(formats) rating_str = info.get('rating') if rating_str is not None: @@ -174,11 +199,10 @@ class PBSIE(InfoExtractor): 'id': video_id, 'display_id': display_id, 'title': info['title'], - 'url': redirect_info['url'], - 'ext': 'mp4', 'description': info['program'].get('description'), 'thumbnail': info.get('image_url'), - 'duration': info.get('duration'), + 'duration': int_or_none(info.get('duration')), 'age_limit': age_limit, 'upload_date': upload_date, + 'formats': formats, } From 8e3df9dfeef8503e9a8c01fcf42008d376d8d64d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 2 May 2015 00:08:38 +0800 Subject: [PATCH 0522/2721] [viki] Fix extractor and add a global availble test case --- youtube_dl/extractor/viki.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 6816dacb6..957e3c01e 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals import re -from ..compat import compat_urlparse +from ..compat import ( + compat_urlparse, + compat_urllib_request, +) from ..utils import ( ExtractorError, unescapeHTML, @@ -15,8 +18,11 @@ from .common import InfoExtractor class VikiIE(InfoExtractor): IE_NAME = 'viki' + # iPad2 + _USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5' + _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)' - _TEST = { + _TESTS = [{ 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14', 'info_dict': { 'id': '1023585v', @@ -28,7 +34,17 @@ class VikiIE(InfoExtractor): 'age_limit': 13, }, 'skip': 'Blocked in the US', - } + }, { + 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference', + 'md5': 'ca6493e6f0a6ec07da9aa8d6304b4b2c', + 'info_dict': { + 'id': '1067139v', + 'ext': 'mp4', + 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea', + 'upload_date': '20150430', + 'title': '\'The Avengers: Age of Ultron\' Press Conference', + } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -50,9 +66,11 @@ class VikiIE(InfoExtractor): 'rating information', default='').strip() age_limit = US_RATINGS.get(rating_str) - info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id + req = compat_urllib_request.Request( + 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id) + req.add_header('User-Agent', self._USER_AGENT) info_webpage = self._download_webpage( - info_url, video_id, note='Downloading info page') + req, video_id, note='Downloading info page') if re.match(r'\s*<div\s+class="video-error', info_webpage): raise ExtractorError( 'Video %s is blocked from your location.' % video_id, From 89966a5aeacb70cc19b0a87a0514be824d2409af Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 2 May 2015 00:32:46 +0800 Subject: [PATCH 0523/2721] [viki] Enhance error message handling (#3774) --- youtube_dl/extractor/viki.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 957e3c01e..0fc1ceb19 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -11,6 +11,7 @@ from ..utils import ( unescapeHTML, unified_strdate, US_RATINGS, + clean_html, ) from .common import InfoExtractor @@ -71,10 +72,15 @@ class VikiIE(InfoExtractor): req.add_header('User-Agent', self._USER_AGENT) info_webpage = self._download_webpage( req, video_id, note='Downloading info page') - if re.match(r'\s*<div\s+class="video-error', info_webpage): - raise ExtractorError( - 'Video %s is blocked from your location.' % video_id, - expected=True) + err_msg = self._html_search_regex(r'<div[^>]+class="video-error[^>]+>(.+)</div>', info_webpage, 'error message', default=None) + if err_msg: + err_msg = clean_html(err_msg) + if 'not available in your region' in err_msg: + raise ExtractorError( + 'Video %s is blocked from your location.' % video_id, + expected=True) + else: + raise ExtractorError('Viki said: ' + err_msg) video_url = self._html_search_regex( r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL') From d948e09b6174179adb43ac10cdb95dffeb4854dd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 2 May 2015 01:19:06 +0800 Subject: [PATCH 0524/2721] [viki] Extract m3u8 videos (#4855) --- youtube_dl/extractor/viki.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 0fc1ceb19..f85e43042 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -12,6 +12,8 @@ from ..utils import ( unified_strdate, US_RATINGS, clean_html, + determine_ext, + mimetype2ext, ) from .common import InfoExtractor @@ -45,6 +47,19 @@ class VikiIE(InfoExtractor): 'upload_date': '20150430', 'title': '\'The Avengers: Age of Ultron\' Press Conference', } + }, { + 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi', + 'info_dict': { + 'id': '1048879v', + 'ext': 'mp4', + 'upload_date': '20140820', + 'description': 'md5:54ff56d51bdfc7a30441ec967394e91c', + 'title': 'Ankhon Dekhi', + }, + 'params': { + # requires ffmpeg + 'skip_download': True, + } }] def _real_extract(self, url): @@ -81,8 +96,21 @@ class VikiIE(InfoExtractor): expected=True) else: raise ExtractorError('Viki said: ' + err_msg) - video_url = self._html_search_regex( - r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL') + mobj = re.search( + r'<source[^>]+type="(?P<mime_type>[^"]+)"[^>]+src="(?P<url>[^"]+)"', info_webpage) + if not mobj: + raise ExtractorError('Unable to find video URL') + video_url = unescapeHTML(mobj.group('url')) + video_ext = mimetype2ext(mobj.group('mime_type')) + + if determine_ext(video_url) == 'm3u8': + formats = self._extract_m3u8_formats( + video_url, video_id, ext=video_ext) + else: + formats = [{ + 'url': video_url, + 'ext': video_ext, + }] upload_date_str = self._html_search_regex( r'"created_at":"([^"]+)"', info_webpage, 'upload date') @@ -98,7 +126,7 @@ class VikiIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'url': video_url, + 'formats': formats, 'description': description, 'thumbnail': thumbnail, 'age_limit': age_limit, From 2eb0192155d53d70ae89ab8553ead49feb860ea4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 2 May 2015 01:35:46 +0800 Subject: [PATCH 0525/2721] [viki] Remove clean_html call --- youtube_dl/extractor/viki.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index f85e43042..cf6af1e5c 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -11,7 +11,6 @@ from ..utils import ( unescapeHTML, unified_strdate, US_RATINGS, - clean_html, determine_ext, mimetype2ext, ) @@ -89,7 +88,6 @@ class VikiIE(InfoExtractor): req, video_id, note='Downloading info page') err_msg = self._html_search_regex(r'<div[^>]+class="video-error[^>]+>(.+)</div>', info_webpage, 'error message', default=None) if err_msg: - err_msg = clean_html(err_msg) if 'not available in your region' in err_msg: raise ExtractorError( 'Video %s is blocked from your location.' % video_id, From c938c35f957ea069eed824131ca908608853abe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 May 2015 07:18:22 +0600 Subject: [PATCH 0526/2721] [iconosquare] Fix extraction --- youtube_dl/extractor/iconosquare.py | 57 ++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/iconosquare.py b/youtube_dl/extractor/iconosquare.py index 370e86e5a..70e4c0d41 100644 --- a/youtube_dl/extractor/iconosquare.py +++ b/youtube_dl/extractor/iconosquare.py @@ -1,36 +1,75 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import int_or_none class IconosquareIE(InfoExtractor): - _VALID_URL = r'https?://(www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)' _TEST = { 'url': 'http://statigr.am/p/522207370455279102_24101272', 'md5': '6eb93b882a3ded7c378ee1d6884b1814', 'info_dict': { 'id': '522207370455279102_24101272', 'ext': 'mp4', - 'uploader_id': 'aguynamedpatrick', - 'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)', + 'title': 'Instagram media by @aguynamedpatrick (Patrick Janelle)', 'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d', + 'timestamp': 1376471991, + 'upload_date': '20130814', + 'uploader': 'aguynamedpatrick', + 'uploader_id': '24101272', + 'comment_count': int, + 'like_count': int, }, } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + media = self._parse_json( + self._search_regex( + r'window\.media\s*=\s*({.+?});\n', webpage, 'media'), + video_id) + + formats = [{ + 'url': f['url'], + 'format_id': format_id, + 'width': int_or_none(f.get('width')), + 'height': int_or_none(f.get('height')) + } for format_id, f in media['videos'].items()] + self._sort_formats(formats) + title = self._html_search_regex( r'<title>(.+?)(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)', webpage, 'title') - uploader_id = self._html_search_regex( - r'@([^ ]+)', title, 'uploader name', fatal=False) + + timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time')) + description = media.get('caption', {}).get('text') + + uploader = media.get('user', {}).get('username') + uploader_id = media.get('user', {}).get('id') + + comment_count = int_or_none(media.get('comments', {}).get('count')) + like_count = int_or_none(media.get('likes', {}).get('count')) + + thumbnails = [{ + 'url': t['url'], + 'id': thumbnail_id, + 'width': int_or_none(t.get('width')), + 'height': int_or_none(t.get('height')) + } for thumbnail_id, t in media.get('images', {}).items()] return { 'id': video_id, - 'url': self._og_search_video_url(webpage), 'title': title, - 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - 'uploader_id': uploader_id + 'description': description, + 'thumbnails': thumbnails, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'comment_count': comment_count, + 'like_count': like_count, + 'formats': formats, } From dd8920653c7249545bbcf5a690ffca702c9f29f2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 2 May 2015 21:45:25 +0800 Subject: [PATCH 0527/2721] [Grooveshark] Remove the extractor grooveshark.com was shut down on 2015/04/30 --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/grooveshark.py | 191 ---------------------------- 2 files changed, 192 deletions(-) delete mode 100644 youtube_dl/extractor/grooveshark.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fced42bd9..43f3e6808 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -199,7 +199,6 @@ from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE from .goshgay import GoshgayIE -from .grooveshark import GroovesharkIE from .groupon import GrouponIE from .hark import HarkIE from .hearthisat import HearThisAtIE diff --git a/youtube_dl/extractor/grooveshark.py b/youtube_dl/extractor/grooveshark.py deleted file mode 100644 index 36ad4915c..000000000 --- a/youtube_dl/extractor/grooveshark.py +++ /dev/null @@ -1,191 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import time -import math -import os.path -import re - - -from .common import InfoExtractor -from ..compat import ( - compat_html_parser, - compat_urllib_parse, - compat_urllib_request, - compat_urlparse, -) -from ..utils import ExtractorError - - -class GroovesharkHtmlParser(compat_html_parser.HTMLParser): - def __init__(self): - self._current_object = None - self.objects = [] - compat_html_parser.HTMLParser.__init__(self) - - def handle_starttag(self, tag, attrs): - attrs = dict((k, v) for k, v in attrs) - if tag == 'object': - self._current_object = {'attrs': attrs, 'params': []} - elif tag == 'param': - self._current_object['params'].append(attrs) - - def handle_endtag(self, tag): - if tag == 'object': - self.objects.append(self._current_object) - self._current_object = None - - @classmethod - def extract_object_tags(cls, html): - p = cls() - p.feed(html) - p.close() - return p.objects - - -class GroovesharkIE(InfoExtractor): - _VALID_URL = r'https?://(www\.)?grooveshark\.com/#!/s/([^/]+)/([^/]+)' - _TEST = { - 'url': 'http://grooveshark.com/#!/s/Jolene+Tenth+Key+Remix+Ft+Will+Sessions/6SS1DW?src=5', - 'md5': '7ecf8aefa59d6b2098517e1baa530023', - 'info_dict': { - 'id': '6SS1DW', - 'title': 'Jolene (Tenth Key Remix ft. Will Sessions)', - 'ext': 'mp3', - 'duration': 227, - } - } - - do_playerpage_request = True - do_bootstrap_request = True - - def _parse_target(self, target): - uri = compat_urlparse.urlparse(target) - hash = uri.fragment[1:].split('?')[0] - token = os.path.basename(hash.rstrip('/')) - return (uri, hash, token) - - def _build_bootstrap_url(self, target): - (uri, hash, token) = self._parse_target(target) - query = 'getCommunicationToken=1&hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts) - return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token) - - def _build_meta_url(self, target): - (uri, hash, token) = self._parse_target(target) - query = 'hash=%s&%d' % (compat_urllib_parse.quote(hash, safe=''), self.ts) - return (compat_urlparse.urlunparse((uri.scheme, uri.netloc, '/preload.php', None, query, None)), token) - - def _build_stream_url(self, meta): - return compat_urlparse.urlunparse(('http', meta['streamKey']['ip'], '/stream.php', None, None, None)) - - def _build_swf_referer(self, target, obj): - (uri, _, _) = self._parse_target(target) - return compat_urlparse.urlunparse((uri.scheme, uri.netloc, obj['attrs']['data'], None, None, None)) - - def _transform_bootstrap(self, js): - return re.split('(?m)^\s*try\s*\{', js)[0] \ - .split(' = ', 1)[1].strip().rstrip(';') - - def _transform_meta(self, js): - return js.split('\n')[0].split('=')[1].rstrip(';') - - def _get_meta(self, target): - (meta_url, token) = self._build_meta_url(target) - self.to_screen('Metadata URL: %s' % meta_url) - - headers = {'Referer': compat_urlparse.urldefrag(target)[0]} - req = compat_urllib_request.Request(meta_url, headers=headers) - res = self._download_json(req, token, - transform_source=self._transform_meta) - - if 'getStreamKeyWithSong' not in res: - raise ExtractorError( - 'Metadata not found. URL may be malformed, or Grooveshark API may have changed.') - - if res['getStreamKeyWithSong'] is None: - raise ExtractorError( - 'Metadata download failed, probably due to Grooveshark anti-abuse throttling. Wait at least an hour before retrying from this IP.', - expected=True) - - return res['getStreamKeyWithSong'] - - def _get_bootstrap(self, target): - (bootstrap_url, token) = self._build_bootstrap_url(target) - - headers = {'Referer': compat_urlparse.urldefrag(target)[0]} - req = compat_urllib_request.Request(bootstrap_url, headers=headers) - res = self._download_json(req, token, fatal=False, - note='Downloading player bootstrap data', - errnote='Unable to download player bootstrap data', - transform_source=self._transform_bootstrap) - return res - - def _get_playerpage(self, target): - (_, _, token) = self._parse_target(target) - - webpage = self._download_webpage( - target, token, - note='Downloading player page', - errnote='Unable to download player page', - fatal=False) - - if webpage is not None: - # Search (for example German) error message - error_msg = self._html_search_regex( - r'
    \s*

    (.*?)

    ', webpage, - 'error message', default=None) - if error_msg is not None: - error_msg = error_msg.replace('\n', ' ') - raise ExtractorError('Grooveshark said: %s' % error_msg) - - if webpage is not None: - o = GroovesharkHtmlParser.extract_object_tags(webpage) - return webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'] - - return webpage, None - - def _real_initialize(self): - self.ts = int(time.time() * 1000) # timestamp in millis - - def _real_extract(self, url): - (target_uri, _, token) = self._parse_target(url) - - # 1. Fill cookiejar by making a request to the player page - swf_referer = None - if self.do_playerpage_request: - (_, player_objs) = self._get_playerpage(url) - if player_objs: - swf_referer = self._build_swf_referer(url, player_objs[0]) - self.to_screen('SWF Referer: %s' % swf_referer) - - # 2. Ask preload.php for swf bootstrap data to better mimic webapp - if self.do_bootstrap_request: - bootstrap = self._get_bootstrap(url) - self.to_screen('CommunicationToken: %s' % bootstrap['getCommunicationToken']) - - # 3. Ask preload.php for track metadata. - meta = self._get_meta(url) - - # 4. Construct stream request for track. - stream_url = self._build_stream_url(meta) - duration = int(math.ceil(float(meta['streamKey']['uSecs']) / 1000000)) - post_dict = {'streamKey': meta['streamKey']['streamKey']} - post_data = compat_urllib_parse.urlencode(post_dict).encode('utf-8') - headers = { - 'Content-Length': len(post_data), - 'Content-Type': 'application/x-www-form-urlencoded' - } - if swf_referer is not None: - headers['Referer'] = swf_referer - - return { - 'id': token, - 'title': meta['song']['Name'], - 'http_method': 'POST', - 'url': stream_url, - 'ext': 'mp3', - 'format': 'mp3 audio', - 'duration': duration, - 'http_post_data': post_data, - 'http_headers': headers, - } From 2ddcd88129e633ead8c5faee6c930b66dddb1b32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 2 May 2015 17:29:56 +0200 Subject: [PATCH 0528/2721] Remove code that was only used by the Grooveshark extractor --- youtube_dl/compat.py | 6 ------ youtube_dl/downloader/http.py | 9 ++------- youtube_dl/extractor/common.py | 3 --- 3 files changed, 2 insertions(+), 16 deletions(-) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 973bcd320..f9529210d 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -46,11 +46,6 @@ try: except ImportError: # Python 2 import htmlentitydefs as compat_html_entities -try: - import html.parser as compat_html_parser -except ImportError: # Python 2 - import HTMLParser as compat_html_parser - try: import http.client as compat_http_client except ImportError: # Python 2 @@ -404,7 +399,6 @@ __all__ = [ 'compat_getenv', 'compat_getpass', 'compat_html_entities', - 'compat_html_parser', 'compat_http_client', 'compat_http_server', 'compat_kwargs', diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index d136bebd1..b7f144af9 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -28,13 +28,8 @@ class HttpFD(FileDownloader): add_headers = info_dict.get('http_headers') if add_headers: headers.update(add_headers) - data = info_dict.get('http_post_data') - http_method = info_dict.get('http_method') - basic_request = compat_urllib_request.Request(url, data, headers) - request = compat_urllib_request.Request(url, data, headers) - if http_method is not None: - basic_request.get_method = lambda: http_method - request.get_method = lambda: http_method + basic_request = compat_urllib_request.Request(url, None, headers) + request = compat_urllib_request.Request(url, None, headers) is_test = self.params.get('test', False) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 48f58aae3..3ae5d5212 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -111,11 +111,8 @@ class InfoExtractor(object): (quality takes higher priority) -1 for default (order by other properties), -2 or smaller for less than default. - * http_method HTTP method to use for the download. * http_headers A dictionary of additional HTTP headers to add to the request. - * http_post_data Additional data to send with a POST - request. * stretched_ratio If given and not 1, indicates that the video's pixels are not square. width : height ratio as float. From 38c6902b9051fa7e1dbda79808a90dbf2aac37ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 2 May 2015 22:52:21 +0600 Subject: [PATCH 0529/2721] [YoutubeDL] Ensure correct extension is always present for a merged file (Closes #5535) --- youtube_dl/YoutubeDL.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 55b429f31..eaa436bf9 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1382,11 +1382,18 @@ class YoutubeDL(object): # TODO: Check acodec/vcodec return False + filename_real_ext = os.path.splitext(filename)[1][1:] + filename_wo_ext = ( + os.path.splitext(filename)[0] + if filename_real_ext == info_dict['ext'] + else filename) requested_formats = info_dict['requested_formats'] if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats): - filename = os.path.splitext(filename)[0] + '.mkv' + info_dict['ext'] = 'mkv' self.report_warning('You have requested formats incompatible for merge. ' 'The formats will be merged into mkv') + # Ensure filename always has a correct extension for successful merge + filename = '%s.%s' % (filename_wo_ext, info_dict['ext']) if os.path.exists(encodeFilename(filename)): self.to_screen( '[download] %s has already been downloaded and ' From 21f6330274c6f87c796bd9248ed82d2bc73de969 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 3 May 2015 00:51:24 +0800 Subject: [PATCH 0530/2721] [baiduvideo] Add new extractor (closes #4563) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/baidu.py | 68 ++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 youtube_dl/extractor/baidu.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 43f3e6808..41af925cc 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -32,6 +32,7 @@ from .atresplayer import AtresPlayerIE from .atttechchannel import ATTTechChannelIE from .audiomack import AudiomackIE, AudiomackAlbumIE from .azubu import AzubuIE +from .baidu import BaiduVideoIE from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE from .bbccouk import BBCCoUkIE diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py new file mode 100644 index 000000000..7f7a88ffe --- /dev/null +++ b/youtube_dl/extractor/baidu.py @@ -0,0 +1,68 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urlparse + + +class BaiduVideoIE(InfoExtractor): + _VALID_URL = r'http://v\.baidu\.com/(?P[a-z]+)/(?P\d+).htm' + _TESTS = [{ + 'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6', + 'info_dict': { + 'id': '1069', + 'title': '中华小当家 TV版 (全52集)', + 'description': 'md5:395a419e41215e531c857bb037bbaf80', + }, + 'playlist_count': 52, + }, { + 'url': 'http://v.baidu.com/show/11595.htm?frp=bdbrand', + 'info_dict': { + 'id': '11595', + 'title': 're:^奔跑吧兄弟', + 'description': 'md5:1bf88bad6d850930f542d51547c089b8', + }, + 'playlist_mincount': 3, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + category = category2 = mobj.group('type') + if category == 'show': + category2 = 'tvshow' + + webpage = self._download_webpage(url, playlist_id) + + playlist_title = self._html_search_regex( + r'title\s*:\s*(["\'])(?P[^\']+)\1', webpage, + 'playlist title', group='title') + playlist_description = self._html_search_regex( + r'<input[^>]+class="j-data-intro"[^>]+value="([^"]+)"/>', webpage, + playlist_id, 'playlist description') + + site = self._html_search_regex( + r'filterSite\s*:\s*["\']([^"]*)["\']', webpage, + 'primary provider site') + api_result = self._download_json( + 'http://v.baidu.com/%s_intro/?dtype=%sPlayUrl&id=%s&site=%s' % ( + category, category2, playlist_id, site), + playlist_id, 'Get playlist links') + + entries = [] + for episode in api_result[0]['episodes']: + episode_id = '%s_%s' % (playlist_id, episode['episode']) + + redirect_page = self._download_webpage( + compat_urlparse.urljoin(url, episode['url']), episode_id, + note='Download Baidu redirect page') + real_url = self._html_search_regex( + r'location\.replace\("([^"]+)"\)', redirect_page, 'real URL') + + entries.append(self.url_result( + real_url, video_title=episode['single_title'])) + + return self.playlist_result( + entries, playlist_id, playlist_title, playlist_description) From e65e4c88748f2d245aa683116a2c6de907186751 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 May 2015 23:06:01 +0600 Subject: [PATCH 0531/2721] [utils] Improve prepend_extension Now `ext` is appended to filename if real extension != expected extension. --- youtube_dl/utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f07679c76..b3abfbc11 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1349,9 +1349,12 @@ def parse_duration(s): return res -def prepend_extension(filename, ext): +def prepend_extension(filename, ext, expected_real_ext=None): name, real_ext = os.path.splitext(filename) - return '{0}.{1}{2}'.format(name, ext, real_ext) + return ( + '{0}.{1}{2}'.format(name, ext, real_ext) + if not expected_real_ext or real_ext[1:] == expected_real_ext + else '{0}.{1}'.format(filename, ext)) def check_executable(exe, args=[]): From a4bcaad77314c714d2304cb21ffdd87a9b84316b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 May 2015 23:10:48 +0600 Subject: [PATCH 0532/2721] [test_utils] Add tests for prepend_extension --- test/test_utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index 17017a8c0..a9464f2e4 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -41,6 +41,7 @@ from youtube_dl.utils import ( sanitize_filename, sanitize_path, sanitize_url_path_consecutive_slashes, + prepend_extension, shell_quote, smuggle_url, str_to_int, @@ -193,6 +194,14 @@ class TestUtil(unittest.TestCase): sanitize_url_path_consecutive_slashes('http://hostname/abc//'), 'http://hostname/abc/') + def test_prepend_extension(self): + self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') + self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') + self.assertEqual(prepend_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp') + self.assertEqual(prepend_extension('abc', 'temp'), 'abc.temp') + self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp') + self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext') + def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) From 666a9a2b954bb6c75a5fcdb9fbb18842038c188a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 May 2015 23:11:34 +0600 Subject: [PATCH 0533/2721] [YoutubeDL] Improve audio/video-only file naming --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index eaa436bf9..06d04c8f0 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1403,7 +1403,7 @@ class YoutubeDL(object): new_info = dict(info_dict) new_info.update(f) fname = self.prepare_filename(new_info) - fname = prepend_extension(fname, 'f%s' % f['format_id']) + fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext']) downloaded.append(fname) partial_success = dl(fname, new_info) success = success and partial_success From b3ed15b7604ce83e85b791ed329c5725a436b805 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 May 2015 23:23:06 +0600 Subject: [PATCH 0534/2721] [utils] Add replace_extension --- test/test_utils.py | 9 +++++++++ youtube_dl/utils.py | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index a9464f2e4..6906a65c2 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -42,6 +42,7 @@ from youtube_dl.utils import ( sanitize_path, sanitize_url_path_consecutive_slashes, prepend_extension, + replace_extension, shell_quote, smuggle_url, str_to_int, @@ -202,6 +203,14 @@ class TestUtil(unittest.TestCase): self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext') + def test_replace_extension(self): + self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp') + self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp') + self.assertEqual(replace_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp') + self.assertEqual(replace_extension('abc', 'temp'), 'abc.temp') + self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') + self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') + def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b3abfbc11..a5a5c317e 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1357,6 +1357,13 @@ def prepend_extension(filename, ext, expected_real_ext=None): else '{0}.{1}'.format(filename, ext)) +def replace_extension(filename, ext, expected_real_ext=None): + name, real_ext = os.path.splitext(filename) + return '{0}.{1}'.format( + name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename, + ext) + + def check_executable(exe, args=[]): """ Checks if the given binary is installed somewhere in PATH, and returns its name. args can be a list of arguments for a short output (like -version) """ From b29e0000e69458252f73ffe62a466da5d1449863 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 May 2015 23:23:44 +0600 Subject: [PATCH 0535/2721] [YoutubeDL] Improve JSON info file naming --- youtube_dl/YoutubeDL.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 06d04c8f0..0330b0b34 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -71,6 +71,7 @@ from .utils import ( write_string, YoutubeDLHandler, prepend_extension, + replace_extension, args_to_str, age_restricted, ) @@ -1332,7 +1333,7 @@ class YoutubeDL(object): return if self.params.get('writeinfojson', False): - infofn = os.path.splitext(filename)[0] + '.info.json' + infofn = replace_extension(filename, 'info.json', info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)): self.to_screen('[info] Video description metadata is already present') else: From 98727e123f6d4881920bcb9fa57ea7956afb9a5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 May 2015 23:35:18 +0600 Subject: [PATCH 0536/2721] [YoutubeDL] Improve annotations file naming --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 0330b0b34..27c1e726f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1286,7 +1286,7 @@ class YoutubeDL(object): return if self.params.get('writeannotations', False): - annofn = filename + '.annotations.xml' + annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)): self.to_screen('[info] Video annotations are already present') else: From 2699da8041cf10a636e0ac99efb91854b2c3aa58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 May 2015 23:36:55 +0600 Subject: [PATCH 0537/2721] [YoutubeDL] Improve description file naming --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 27c1e726f..eb7470f72 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1271,7 +1271,7 @@ class YoutubeDL(object): return if self.params.get('writedescription', False): - descfn = filename + '.description' + descfn = replace_extension(filename, 'description', info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)): self.to_screen('[info] Video description is already present') elif info_dict.get('description') is None: From 0669c89c55a77eeda2b9767e096c465b12c67b8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 May 2015 23:38:30 +0600 Subject: [PATCH 0538/2721] [options] Clarify `--write-annotations` help --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index d0aa8296d..22dbc3aec 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -637,7 +637,7 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--write-annotations', action='store_true', dest='writeannotations', default=False, - help='Write video annotations to a .annotation file') + help='Write video annotations to a .annotations.xml file') filesystem.add_option( '--load-info', dest='load_info_filename', metavar='FILE', From 0a64aa73552d3fc3ae6e24a56e2f8e78c960603c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 May 2015 00:58:42 +0600 Subject: [PATCH 0539/2721] [vgtv] Fix _VALID_URL (Closes #5578) --- youtube_dl/extractor/vgtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 2f111bf7e..69dc9a759 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -8,7 +8,7 @@ from ..utils import float_or_none class VGTVIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/(?:.*)/(?P<id>[0-9]+)' + _VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/[^/]+/(?P<id>[0-9]+)' _TESTS = [ { # streamType: vod From 963aea5279691affca34d220452cecfd0e291a77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 May 2015 07:45:15 +0600 Subject: [PATCH 0540/2721] [baiduvideo] Improve _VALID_URL --- youtube_dl/extractor/baidu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py index 7f7a88ffe..906895c1e 100644 --- a/youtube_dl/extractor/baidu.py +++ b/youtube_dl/extractor/baidu.py @@ -8,7 +8,7 @@ from ..compat import compat_urlparse class BaiduVideoIE(InfoExtractor): - _VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+).htm' + _VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm' _TESTS = [{ 'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6', 'info_dict': { From 8dab1e9072037daa9c6cab3da4a5dbd4daaae4c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 May 2015 09:56:03 +0600 Subject: [PATCH 0541/2721] [rutv] Recognize live streams (#5584) --- youtube_dl/extractor/rutv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index ef766237b..169f7c032 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -181,12 +181,15 @@ class RUTVIE(InfoExtractor): self._sort_formats(formats) + is_live = video_type == 'live' + return { 'id': video_id, - 'title': title, + 'title': self._live_title(title) if is_live else title, 'description': description, 'thumbnail': thumbnail, 'view_count': view_count, 'duration': duration, 'formats': formats, + 'is_live': is_live, } From d0fd305023b37f7776485679a74e422eade26c13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 3 May 2015 10:00:34 +0600 Subject: [PATCH 0542/2721] [rutv] Add test for #5584 --- youtube_dl/extractor/rutv.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index 169f7c032..1ec2c86e5 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -84,11 +84,20 @@ class RUTVIE(InfoExtractor): 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', }, + 'skip': 'Translation has finished', + }, + { + 'url': 'http://live.russia.tv/index/index/channel_id/3', + 'info_dict': { + 'id': '21', + 'ext': 'mp4', + 'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, + }, 'params': { - # rtmp download + # m3u8 download 'skip_download': True, }, - 'skip': 'Translation has finished', }, ] From 233c1c0e76d64c9e13dc8968bfd8a014c49e66a8 Mon Sep 17 00:00:00 2001 From: Antti Ajanki <antti.ajanki@iki.fi> Date: Sun, 3 May 2015 11:04:14 +0300 Subject: [PATCH 0543/2721] [downloader/f4m] Fragment filenames must be sanitized because the fragment was written to a file with a sanitized name by http_dl.download() --- youtube_dl/downloader/f4m.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index b1a858c45..3cb07e15f 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -396,18 +396,19 @@ class F4mFD(FileDownloader): success = http_dl.download(frag_filename, {'url': url}) if not success: return False - with open(frag_filename, 'rb') as down: - down_data = down.read() - reader = FlvReader(down_data) - while True: - _, box_type, box_data = reader.read_box_info() - if box_type == b'mdat': - dest_stream.write(box_data) - break + (down, frag_sanitized) = sanitize_open(frag_filename, 'rb') + down_data = down.read() + down.close() + reader = FlvReader(down_data) + while True: + _, box_type, box_data = reader.read_box_info() + if box_type == b'mdat': + dest_stream.write(box_data) + break if live: - os.remove(frag_filename) + os.remove(encodeFilename(frag_sanitized)) else: - frags_filenames.append(frag_filename) + frags_filenames.append(frag_sanitized) except (compat_urllib_error.HTTPError, ) as err: if live and (err.code == 404 or err.code == 410): # We didn't keep up with the live window. Continue @@ -430,7 +431,7 @@ class F4mFD(FileDownloader): elapsed = time.time() - start self.try_rename(tmpfilename, filename) for frag_file in frags_filenames: - os.remove(frag_file) + os.remove(encodeFilename(frag_file)) fsize = os.path.getsize(encodeFilename(filename)) self._hook_progress({ From 5477ca82395545e577afb269b910336cd98de5b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 3 May 2015 16:59:14 +0200 Subject: [PATCH 0544/2721] [dailymotion] Use https urls The video url still redirects to an http url, but it doesn't explicitly contain the video id. --- youtube_dl/extractor/dailymotion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 7615ecd4b..aa595af20 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -85,7 +85,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - url = 'http://www.dailymotion.com/video/%s' % video_id + url = 'https://www.dailymotion.com/video/%s' % video_id # Retrieve video webpage to extract further information request = self._build_request(url) @@ -110,7 +110,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): if mobj is not None: video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) - embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id + embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id embed_request = self._build_request(embed_url) embed_page = self._download_webpage( embed_request, video_id, 'Downloading embed page') From 1748d67aea66b6d63bc203ada1036110e7fd2402 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 4 May 2015 01:11:23 +0800 Subject: [PATCH 0545/2721] [lifenews] Fix view count and comment count --- youtube_dl/extractor/lifenews.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 1dfe7f77f..3c00ed333 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -47,9 +47,9 @@ class LifeNewsIE(InfoExtractor): description = self._og_search_description(webpage) view_count = self._html_search_regex( - r'<div class=\'views\'>(\d+)</div>', webpage, 'view count', fatal=False) + r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False) comment_count = self._html_search_regex( - r'<div class=\'comments\'>\s*<span class=\'counter\'>(\d+)</span>', webpage, 'comment count', fatal=False) + r'<div class=\'comments\'>\s*<span class=\'counter\'>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False) upload_date = self._html_search_regex( r'<time datetime=\'([^\']+)\'>', webpage, 'upload date', fatal=False) From 848edeab898ee45e45a8fcdb355f35be8b73dd5d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 4 May 2015 01:24:19 +0800 Subject: [PATCH 0546/2721] [lifenews] Detect <iframe> (fixes #5346) --- youtube_dl/extractor/lifenews.py | 47 ++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 3c00ed333..330138692 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -16,7 +16,7 @@ class LifeNewsIE(InfoExtractor): IE_DESC = 'LIFE | NEWS' _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P<id>\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://lifenews.ru/news/126342', 'md5': 'e1b50a5c5fb98a6a544250f2e0db570a', 'info_dict': { @@ -27,7 +27,19 @@ class LifeNewsIE(InfoExtractor): 'thumbnail': 're:http://.*\.jpg', 'upload_date': '20140130', } - } + }, { + # video in <iframe> + 'url': 'http://lifenews.ru/news/152125', + 'md5': '77d19a6f0886cd76bdbf44b4d971a273', + 'info_dict': { + 'id': '152125', + 'ext': 'mp4', + 'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ', + 'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ', + 'upload_date': '20150402', + 'uploader': 'embed.life.ru', + } + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -36,7 +48,9 @@ class LifeNewsIE(InfoExtractor): webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page') videos = re.findall(r'<video.*?poster="(?P<poster>[^"]+)".*?src="(?P<video>[^"]+)".*?></video>', webpage) - if not videos: + iframe_link = self._html_search_regex( + '<iframe[^>]+src="([^"]+)', webpage, 'iframe link', default=None) + if not videos and not iframe_link: raise ExtractorError('No media links available for %s' % video_id) title = self._og_search_title(webpage) @@ -56,17 +70,32 @@ class LifeNewsIE(InfoExtractor): if upload_date is not None: upload_date = unified_strdate(upload_date) + common_info = { + 'description': description, + 'view_count': int_or_none(view_count), + 'comment_count': int_or_none(comment_count), + 'upload_date': upload_date, + } + def make_entry(video_id, media, video_number=None): - return { + cur_info = dict(common_info) + cur_info.update({ 'id': video_id, 'url': media[1], 'thumbnail': media[0], 'title': title if video_number is None else '%s-video%s' % (title, video_number), - 'description': description, - 'view_count': int_or_none(view_count), - 'comment_count': int_or_none(comment_count), - 'upload_date': upload_date, - } + }) + return cur_info + + if iframe_link: + cur_info = dict(common_info) + cur_info.update({ + '_type': 'url_transparent', + 'id': video_id, + 'title': title, + 'url': iframe_link, + }) + return cur_info if len(videos) == 1: return make_entry(video_id, videos[0]) From 3e7202c1bca0618fef04c60a0f5603d50f09b76f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 4 May 2015 01:59:26 +0800 Subject: [PATCH 0547/2721] [MLB] Extend _VALID_URL (#5443) --- youtube_dl/extractor/mlb.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index e369551c2..ee9ff73bf 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -10,7 +10,7 @@ from ..utils import ( class MLBIE(InfoExtractor): - _VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)' + _VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/(?:embed|m-internal-embed)\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)' _TESTS = [ { 'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea', @@ -83,6 +83,11 @@ class MLBIE(InfoExtractor): { 'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728', 'only_matching': True, + }, + { + # From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer + 'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb', + 'only_matching': True, } ] From 8001607e903a5ccaff76908f23bece08818743c0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 4 May 2015 02:20:07 +0800 Subject: [PATCH 0548/2721] [generic] Detect more MLB videos (fixes #5443) --- youtube_dl/extractor/generic.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4946cc132..d09e85665 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -413,6 +413,19 @@ class GenericIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', }, }, + # MLB articles + { + 'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer', + 'md5': 'b190e70141fb9a1552a85426b4da1b5d', + 'info_dict': { + 'id': '75609783', + 'ext': 'mp4', + 'title': 'Must C: Pillar climbs for catch', + 'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run', + 'timestamp': 1429124820, + 'upload_date': '20150415', + } + }, # Wistia embed { 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', @@ -1289,6 +1302,10 @@ class GenericIE(InfoExtractor): mobj = re.search( r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1', webpage) + if not mobj: + mobj = re.search( + r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)', + webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'MLB') From 957b794c2622fdc3696c245f1fa1eebfdb0f8bdf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 3 May 2015 22:31:39 +0200 Subject: [PATCH 0549/2721] release 2015.05.03 --- README.md | 2 +- docs/supportedsites.md | 3 ++- youtube_dl/version.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3432546fc..9aeb114f3 100644 --- a/README.md +++ b/README.md @@ -133,7 +133,7 @@ which means you can modify it, redistribute it or use it however you like. --no-mtime Do not use the Last-modified header to set the file modification time --write-description Write video description to a .description file --write-info-json Write video metadata to a .info.json file - --write-annotations Write video annotations to a .annotation file + --write-annotations Write video annotations to a .annotations.xml file --load-info FILE JSON file containing the video information (created with the "--write-info-json" option) --cookies FILE File to read cookies from and dump cookie jar in --cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d1d676cdd..6d2e496a8 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -44,6 +44,7 @@ - **audiomack** - **audiomack:album** - **Azubu** + - **BaiduVideo** - **bambuser** - **bambuser:channel** - **Bandcamp** @@ -155,6 +156,7 @@ - **FootyRoom** - **Foxgay** - **FoxNews** + - **FoxSports** - **france2.fr:generation-quoi** - **FranceCulture** - **FranceInter** @@ -184,7 +186,6 @@ - **Golem** - **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net - **Goshgay** - - **Grooveshark** - **Groupon** - **Hark** - **HearThisAt** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b88ea85e8..6950afc47 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.04.28' +__version__ = '2015.05.03' From b9b3ab45ea95e4c48f3c87d3584f8180ff460be6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 4 May 2015 19:09:18 +0800 Subject: [PATCH 0550/2721] [NBC] Enhance extraction of ThePlatform URL (fixes #5470) --- youtube_dl/extractor/nbc.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index ecd0ac8b1..6cbe03d0f 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -37,13 +37,26 @@ class NBCIE(InfoExtractor): }, 'skip': 'Only works from US', }, + { + 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821', + 'info_dict': { + 'id': '8iUuyzWDdYUZ', + 'ext': 'flv', + 'title': 'Star Wars Teaser', + 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442', + }, + 'skip': 'Only works from US', + } ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) theplatform_url = self._search_regex( - '(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', + [ + r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', + r'"embedURL"\s*:\s*"([^"]+)"' + ], webpage, 'theplatform url').replace('_no_endcard', '') if theplatform_url.startswith('//'): theplatform_url = 'http:' + theplatform_url From 71fa56b887b7c4d1a3ba16c7127736fcaec34b4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 May 2015 18:59:22 +0600 Subject: [PATCH 0551/2721] [escapist] Fix formats extraction --- youtube_dl/extractor/escapist.py | 35 ++++++++++++++------------------ 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 8facf1185..802943dc2 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -8,7 +8,7 @@ from ..compat import compat_urllib_request from ..utils import ( determine_ext, clean_html, - qualities, + int_or_none, ) @@ -72,28 +72,23 @@ class EscapistIE(InfoExtractor): video_id = imsVideo['videoID'] key = imsVideo['hash'] - quality = qualities(['lq', 'hq', 'hd']) + config_req = compat_urllib_request.Request( + 'http://www.escapistmagazine.com/videos/' + 'vidconfig.php?videoID=%s&hash=%s' % (video_id, key)) + config_req.add_header('Referer', url) + config = self._download_webpage(config_req, video_id, 'Downloading video config') - formats = [] - for q in ['lq', 'hq', 'hd']: - config_req = compat_urllib_request.Request( - 'http://www.escapistmagazine.com/videos/' - 'vidconfig.php?videoID=%s&hash=%s&quality=%s' % (video_id, key, 'mp4_' + q)) - config_req.add_header('Referer', url) - config = self._download_webpage(config_req, video_id, 'Downloading video config ' + q.upper()) + data = json.loads(_decrypt_config(key, config)) - data = json.loads(_decrypt_config(key, config)) + title = clean_html(data['videoData']['title']) + duration = data['videoData']['duration'] / 1000 - title = clean_html(data['videoData']['title']) - duration = data['videoData']['duration'] / 1000 - - for i, v in enumerate(data['files']['videos']): - - formats.append({ - 'url': v, - 'format_id': determine_ext(v) + '_' + q + str(i), - 'quality': quality(q), - }) + formats = [{ + 'url': video['src'], + 'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']), + 'height': int_or_none(video.get('res')), + } for video in data['files']['videos']] + self._sort_formats(formats) return { 'id': video_id, From cec04ef3a6cfd43036f7463dc1fa890af5b6ee11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 May 2015 19:00:34 +0600 Subject: [PATCH 0552/2721] [escapist] Update tests' checksums --- youtube_dl/extractor/escapist.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 802943dc2..daf859385 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -39,7 +39,7 @@ class EscapistIE(InfoExtractor): _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])' _TESTS = [{ 'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', - 'md5': 'c6793dbda81388f4264c1ba18684a74d', + 'md5': 'ab3a706c681efca53f0a35f1415cf0d1', 'info_dict': { 'id': '6618', 'ext': 'mp4', @@ -50,7 +50,7 @@ class EscapistIE(InfoExtractor): } }, { 'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer', - 'md5': 'cf8842a8a46444d241f9a9980d7874f2', + 'md5': '9e8c437b0dbb0387d3bd3255ca77f6bf', 'info_dict': { 'id': '10044', 'ext': 'mp4', From 90b4b0eabeabbdad3bfece8254692d34a3dcba95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 May 2015 19:01:08 +0600 Subject: [PATCH 0553/2721] [escapist] Improve _VALID_URL --- youtube_dl/extractor/escapist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index daf859385..dadfaa6a5 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -36,7 +36,7 @@ def _decrypt_config(key, string): class EscapistIE(InfoExtractor): - _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])' + _VALID_URL = r'https?://?(?:www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])' _TESTS = [{ 'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', 'md5': 'ab3a706c681efca53f0a35f1415cf0d1', From 782e0568ef28ebd7d7a11273d0a5af065575379a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 May 2015 19:04:49 +0600 Subject: [PATCH 0554/2721] [escapist] Modernize --- youtube_dl/extractor/escapist.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index dadfaa6a5..2cd3af142 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -9,6 +9,7 @@ from ..utils import ( determine_ext, clean_html, int_or_none, + float_or_none, ) @@ -65,12 +66,12 @@ class EscapistIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - imsVideo = self._parse_json( + ims_video = self._parse_json( self._search_regex( r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'), video_id) - video_id = imsVideo['videoID'] - key = imsVideo['hash'] + video_id = ims_video['videoID'] + key = ims_video['hash'] config_req = compat_urllib_request.Request( 'http://www.escapistmagazine.com/videos/' @@ -80,8 +81,11 @@ class EscapistIE(InfoExtractor): data = json.loads(_decrypt_config(key, config)) - title = clean_html(data['videoData']['title']) - duration = data['videoData']['duration'] / 1000 + video_data = data['videoData'] + + title = clean_html(video_data['title']) + duration = float_or_none(video_data.get('duration'), 1000) + uploader = video_data.get('publisher') formats = [{ 'url': video['src'], @@ -97,4 +101,5 @@ class EscapistIE(InfoExtractor): 'thumbnail': self._og_search_thumbnail(webpage), 'description': self._og_search_description(webpage), 'duration': duration, + 'uploader': uploader, } From b2f82360d78696daca576f558f585b2285db0341 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 May 2015 19:06:07 +0600 Subject: [PATCH 0555/2721] [escapist] Add uploader to tests --- youtube_dl/extractor/escapist.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 2cd3af142..c85b4c458 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -48,6 +48,7 @@ class EscapistIE(InfoExtractor): 'title': "Breaking Down Baldur's Gate", 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 264, + 'uploader': 'The Escapist', } }, { 'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer', @@ -59,6 +60,7 @@ class EscapistIE(InfoExtractor): 'title': 'Evolve - One vs Multiplayer', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 304, + 'uploader': 'The Escapist', } }] From dc1eed93be7ac2afb3f52237ae0034d24715b4bd Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 4 May 2015 15:12:48 +0200 Subject: [PATCH 0556/2721] release 2015.05.04 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6950afc47..a5a81bcd2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.05.03' +__version__ = '2015.05.04' From 0fe2ff78e68ec03d56bf3d9434eb612ffb683977 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 4 May 2015 21:53:05 +0800 Subject: [PATCH 0557/2721] [NBC] Enhance embedURL extraction (closes #2549) --- test/test_utils.py | 5 +++++ youtube_dl/extractor/nbc.py | 11 +++++++++-- youtube_dl/utils.py | 8 ++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 6906a65c2..032d3656a 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -53,6 +53,7 @@ from youtube_dl.utils import ( unified_strdate, unsmuggle_url, uppercase_escape, + lowercase_escape, url_basename, urlencode_postdata, version_tuple, @@ -418,6 +419,10 @@ class TestUtil(unittest.TestCase): self.assertEqual(uppercase_escape('aä'), 'aä') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') + def test_lowercase_escape(self): + self.assertEqual(lowercase_escape('aä'), 'aä') + self.assertEqual(lowercase_escape('\\u0026'), '&') + def test_limit_length(self): self.assertEqual(limit_length(None, 12), None) self.assertEqual(limit_length('foo', 12), 'foo') diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 6cbe03d0f..dc2091be0 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -10,6 +10,8 @@ from ..compat import ( from ..utils import ( ExtractorError, find_xpath_attr, + lowercase_escape, + unescapeHTML, ) @@ -46,18 +48,23 @@ class NBCIE(InfoExtractor): 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442', }, 'skip': 'Only works from US', + }, + { + # This video has expired but with an escaped embedURL + 'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515', + 'skip': 'Expired' } ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - theplatform_url = self._search_regex( + theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex( [ r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', r'"embedURL"\s*:\s*"([^"]+)"' ], - webpage, 'theplatform url').replace('_no_endcard', '') + webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/'))) if theplatform_url.startswith('//'): theplatform_url = 'http:' + theplatform_url return self.url_result(theplatform_url) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a5a5c317e..1013f7c18 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1486,6 +1486,14 @@ def uppercase_escape(s): s) +def lowercase_escape(s): + unicode_escape = codecs.getdecoder('unicode_escape') + return re.sub( + r'\\u[0-9a-fA-F]{4}', + lambda m: unicode_escape(m.group(0))[0], + s) + + def escape_rfc3986(s): """Escape non-ASCII characters as suggested by RFC 3986""" if sys.version_info < (3, 0) and isinstance(s, compat_str): From 883340c10758abf76a250172deb12b528b8da7b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 4 May 2015 16:50:22 +0200 Subject: [PATCH 0558/2721] [livestream:original] Fix extraction (fixes #4702) --- youtube_dl/extractor/livestream.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index ec309dadd..5be59a92f 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -202,13 +202,9 @@ class LivestreamOriginalIE(InfoExtractor): 'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', 'info_dict': { 'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', }, - 'params': { - # rtmp - 'skip_download': True, - }, }, { 'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3', 'info_dict': { @@ -221,19 +217,17 @@ class LivestreamOriginalIE(InfoExtractor): api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) info = self._download_xml(api_url, video_id) + # this url is used on mobile devices + stream_url = 'http://x{0}x.api.channel.livestream.com/3.0/getstream.json?id={1}'.format(user, video_id) + stream_info = self._download_json(stream_url, video_id) item = info.find('channel').find('item') ns = {'media': 'http://search.yahoo.com/mrss'} thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url'] - # Remove the extension and number from the path (like 1.jpg) - path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path') return { 'id': video_id, 'title': item.find('title').text, - 'url': 'rtmp://extondemand.livestream.com/ondemand', - 'play_path': 'trans/dv15/mogulus-{0}'.format(path), - 'player_url': 'http://static.livestream.com/chromelessPlayer/v21/playerapi.swf?hash=5uetk&v=0803&classid=D27CDB6E-AE6D-11cf-96B8-444553540000&jsEnabled=false&wmode=opaque', - 'ext': 'flv', + 'url': stream_info['progressiveUrl'], 'thumbnail': thumbnail_url, } From a90552663e555c89638706141c31b0022a9ba27b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 4 May 2015 16:54:01 +0200 Subject: [PATCH 0559/2721] [livestream:original] Update url format (fixes #5598) --- youtube_dl/extractor/livestream.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 5be59a92f..6d7733e41 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -194,19 +194,19 @@ class LivestreamIE(InfoExtractor): # The original version of Livestream uses a different system class LivestreamOriginalIE(InfoExtractor): IE_NAME = 'livestream:original' - _VALID_URL = r'''(?x)https?://www\.livestream\.com/ + _VALID_URL = r'''(?x)https?://original\.livestream\.com/ (?P<user>[^/]+)/(?P<type>video|folder) (?:\?.*?Id=|/)(?P<id>.*?)(&|$) ''' _TESTS = [{ - 'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', + 'url': 'http://original.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', 'info_dict': { 'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', 'ext': 'mp4', 'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', }, }, { - 'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3', + 'url': 'https://original.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3', 'info_dict': { 'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3', }, From 50aa43b3ae074781bb7c7ad55a85f54b1fd84146 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 4 May 2015 22:32:57 +0800 Subject: [PATCH 0560/2721] [nytimes] Implement extracting videos from articles (closes #5436) --- youtube_dl/extractor/nytimes.py | 79 +++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index 03f0a4de6..266d8d400 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -8,30 +8,8 @@ from ..utils import ( ) -class NYTimesIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)' - - _TESTS = [{ - 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263', - 'md5': '18a525a510f942ada2720db5f31644c0', - 'info_dict': { - 'id': '100000002847155', - 'ext': 'mov', - 'title': 'Verbatim: What Is a Photocopier?', - 'description': 'md5:93603dada88ddbda9395632fdc5da260', - 'timestamp': 1398631707, - 'upload_date': '20140427', - 'uploader': 'Brett Weiner', - 'duration': 419, - } - }, { - 'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - +class NYTimesBaseIE(InfoExtractor): + def _extract_video_from_id(self, video_id): video_data = self._download_json( 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON') @@ -81,3 +59,56 @@ class NYTimesIE(InfoExtractor): 'formats': formats, 'thumbnails': thumbnails, } + + +class NYTimesIE(NYTimesBaseIE): + _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)' + + _TESTS = [{ + 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263', + 'md5': '18a525a510f942ada2720db5f31644c0', + 'info_dict': { + 'id': '100000002847155', + 'ext': 'mov', + 'title': 'Verbatim: What Is a Photocopier?', + 'description': 'md5:93603dada88ddbda9395632fdc5da260', + 'timestamp': 1398631707, + 'upload_date': '20140427', + 'uploader': 'Brett Weiner', + 'duration': 419, + } + }, { + 'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + return self._extract_video_from_id(video_id) + + +class NYTimesArticleIE(NYTimesBaseIE): + _VALID_URL = r'https?://(?:www)?\.nytimes\.com/\d{4}/\d{2}/\d{2}/(?:[^/]+/)*(?P<id>[^.]+)\.html' + _TEST = { + 'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0', + 'md5': 'e2076d58b4da18e6a001d53fd56db3c9', + 'info_dict': { + 'id': '100000003628438', + 'ext': 'mov', + 'title': 'New Minimum Wage: $70,000 a Year', + 'description': 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.', + 'timestamp': 1429033037, + 'upload_date': '20150414', + 'uploader': 'Matthew Williams', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_id = self._html_search_regex(r'data-videoid="(\d+)"', webpage, 'video id') + + return self._extract_video_from_id(video_id) From df8418ffcff60c5a90e3138a39959d36cebcee7f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 4 May 2015 23:00:09 +0800 Subject: [PATCH 0561/2721] [nytimes] Extend _VALID_URL (#2754) --- youtube_dl/extractor/__init__.py | 5 ++++- youtube_dl/extractor/nytimes.py | 9 ++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 41af925cc..1c3a46dd8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -363,7 +363,10 @@ from .nrk import ( ) from .ntvde import NTVDeIE from .ntvru import NTVRuIE -from .nytimes import NYTimesIE +from .nytimes import ( + NYTimesIE, + NYTimesArticleIE, +) from .nuvid import NuvidIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index 266d8d400..6ffbe3863 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -89,8 +89,8 @@ class NYTimesIE(NYTimesBaseIE): class NYTimesArticleIE(NYTimesBaseIE): - _VALID_URL = r'https?://(?:www)?\.nytimes\.com/\d{4}/\d{2}/\d{2}/(?:[^/]+/)*(?P<id>[^.]+)\.html' - _TEST = { + _VALID_URL = r'https?://(?:www)?\.nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?' + _TESTS = [{ 'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0', 'md5': 'e2076d58b4da18e6a001d53fd56db3c9', 'info_dict': { @@ -102,7 +102,10 @@ class NYTimesArticleIE(NYTimesBaseIE): 'upload_date': '20150414', 'uploader': 'Matthew Williams', } - } + }, { + 'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From dfad3aac981a311de89a5ae29121bcb6255902f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 May 2015 21:23:26 +0600 Subject: [PATCH 0562/2721] [rutv] Fix live stream test URL --- youtube_dl/extractor/rutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index 1ec2c86e5..6b56b1d49 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -87,7 +87,7 @@ class RUTVIE(InfoExtractor): 'skip': 'Translation has finished', }, { - 'url': 'http://live.russia.tv/index/index/channel_id/3', + 'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/', 'info_dict': { 'id': '21', 'ext': 'mp4', From e038d5c4e323bdfa9de02ad59d1caa2c819d9cf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 May 2015 21:29:32 +0600 Subject: [PATCH 0563/2721] [rutv] Fix preference --- youtube_dl/extractor/rutv.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index 6b56b1d49..a5e98415f 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -128,8 +128,10 @@ class RUTVIE(InfoExtractor): elif video_path.startswith('index/iframe/cast_id'): video_type = 'live' + is_live = video_type == 'live' + json_data = self._download_json( - 'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id), + 'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if is_live else '', video_id), video_id, 'Downloading JSON') if json_data['errors']: @@ -156,6 +158,7 @@ class RUTVIE(InfoExtractor): for transport, links in media['sources'].items(): for quality, url in links.items(): + preference = -1 if priority_transport == transport else -2 if transport == 'rtmp': mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url) if not mobj: @@ -169,9 +172,11 @@ class RUTVIE(InfoExtractor): 'rtmp_live': True, 'ext': 'flv', 'vbr': int(quality), + 'preference': preference, } elif transport == 'm3u8': - formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4')) + formats.extend(self._extract_m3u8_formats( + url, video_id, 'mp4', preference=preference, m3u8_id='hls')) continue else: fmt = { @@ -181,7 +186,6 @@ class RUTVIE(InfoExtractor): 'width': width, 'height': height, 'format_id': '%s-%s' % (transport, quality), - 'preference': -1 if priority_transport == transport else -2, }) formats.append(fmt) @@ -190,8 +194,6 @@ class RUTVIE(InfoExtractor): self._sort_formats(formats) - is_live = video_type == 'live' - return { 'id': video_id, 'title': self._live_title(title) if is_live else title, From 1aa43d77c084bc2dd735fcd3d03b8b2fc3407c51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 May 2015 21:29:56 +0600 Subject: [PATCH 0564/2721] [rutv] Remove superfluous check --- youtube_dl/extractor/rutv.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index a5e98415f..55604637d 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -189,9 +189,6 @@ class RUTVIE(InfoExtractor): }) formats.append(fmt) - if not formats: - raise ExtractorError('No media links available for %s' % video_id) - self._sort_formats(formats) return { From 7212560f4d6b0de5b76eb41090c639855915946e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Dunand?= <aurelien@tassatux.net> Date: Wed, 29 Apr 2015 01:07:33 +0200 Subject: [PATCH 0565/2721] [noco] Retrieve video language according to user options --- youtube_dl/extractor/noco.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 251e6da07..20a658149 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -86,22 +86,36 @@ class NocoIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + options = self._call_api('users/init', None, 'Downloading user options JSON')['options'] + audio_lang = options.get('audio_language', 'fr') + medias = self._call_api( 'shows/%s/medias' % video_id, video_id, 'Downloading video JSON') + show = self._call_api( + 'shows/by_id/%s' % video_id, + video_id, 'Downloading show JSON')[0] + + if audio_lang == 'original': + audio_lang = show['original_lang'] + if len(medias) == 1: + audio_lang = list(medias.keys())[0] + elif not audio_lang in medias: + audio_lang = 'fr' + qualities = self._call_api( 'qualities', video_id, 'Downloading qualities JSON') formats = [] - for lang, lang_dict in medias['fr']['video_list'].items(): + for lang, lang_dict in medias[audio_lang]['video_list'].items(): for format_id, fmt in lang_dict['quality_list'].items(): format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id video = self._call_api( - 'shows/%s/video/%s/fr' % (video_id, format_id.lower()), + 'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang), video_id, 'Downloading %s video JSON' % format_id_extended, lang if lang != 'none' else None) @@ -127,10 +141,6 @@ class NocoIE(InfoExtractor): self._sort_formats(formats) - show = self._call_api( - 'shows/by_id/%s' % video_id, - video_id, 'Downloading show JSON')[0] - upload_date = unified_strdate(show['online_date_start_utc']) uploader = show['partner_name'] uploader_id = show['partner_key'] From ff9d68e7be2dd01a21cc0ed90aaa594b5d36697f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Dunand?= <aurelien@tassatux.net> Date: Mon, 4 May 2015 19:55:29 +0200 Subject: [PATCH 0566/2721] [noco] Add test for multi languages video --- youtube_dl/extractor/noco.py | 45 +++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 20a658149..f86d210ee 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -25,21 +25,38 @@ class NocoIE(InfoExtractor): _SUB_LANG_TEMPLATE = '&sub_lang=%s' _NETRC_MACHINE = 'noco' - _TEST = { - 'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/', - 'md5': '0a993f0058ddbcd902630b2047ef710e', - 'info_dict': { - 'id': '11538', - 'ext': 'mp4', - 'title': 'Ami Ami Idol - Hello! France', - 'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86', - 'upload_date': '20140412', - 'uploader': 'Nolife', - 'uploader_id': 'NOL', - 'duration': 2851.2, + _TESTS = [ + { + 'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/', + 'md5': '0a993f0058ddbcd902630b2047ef710e', + 'info_dict': { + 'id': '11538', + 'ext': 'mp4', + 'title': 'Ami Ami Idol - Hello! France', + 'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86', + 'upload_date': '20140412', + 'uploader': 'Nolife', + 'uploader_id': 'NOL', + 'duration': 2851.2, + }, + 'skip': 'Requires noco account', }, - 'skip': 'Requires noco account', - } + { + 'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call', + 'md5': 'c190f1f48e313c55838f1f412225934d', + 'info_dict': { + 'id': '12610', + 'ext': 'mp4', + 'title': 'The Guild #1 - Wake-Up Call', + 'description': '', + 'upload_date': '20140627', + 'uploader': 'LBL42', + 'uploader_id': 'LBL', + 'duration': 233.023, + }, + 'skip': 'Requires noco account', + } + ] def _real_initialize(self): self._login() From 6568382d6f3a986a773aa4d92b6bcbd367ccb794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 May 2015 02:27:24 +0600 Subject: [PATCH 0567/2721] [noco] Extract all variations of audio/subtitles media --- youtube_dl/extractor/noco.py | 69 +++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index f86d210ee..e44ece5a2 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -103,9 +103,6 @@ class NocoIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - options = self._call_api('users/init', None, 'Downloading user options JSON')['options'] - audio_lang = options.get('audio_language', 'fr') - medias = self._call_api( 'shows/%s/medias' % video_id, video_id, 'Downloading video JSON') @@ -114,12 +111,17 @@ class NocoIE(InfoExtractor): 'shows/by_id/%s' % video_id, video_id, 'Downloading show JSON')[0] - if audio_lang == 'original': - audio_lang = show['original_lang'] + options = self._call_api( + 'users/init', video_id, + 'Downloading user options JSON')['options'] + audio_lang_pref = options.get('audio_language') or options.get('language', 'fr') + + if audio_lang_pref == 'original': + audio_lang_pref = show['original_lang'] if len(medias) == 1: - audio_lang = list(medias.keys())[0] - elif not audio_lang in medias: - audio_lang = 'fr' + audio_lang_pref = list(medias.keys())[0] + elif audio_lang_pref not in medias: + audio_lang_pref = 'fr' qualities = self._call_api( 'qualities', @@ -127,34 +129,37 @@ class NocoIE(InfoExtractor): formats = [] - for lang, lang_dict in medias[audio_lang]['video_list'].items(): - for format_id, fmt in lang_dict['quality_list'].items(): - format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id + for audio_lang, audio_lang_dict in medias.items(): + preference = 1 if audio_lang == audio_lang_pref else 0 + for sub_lang, lang_dict in audio_lang_dict['video_list'].items(): + for format_id, fmt in lang_dict['quality_list'].items(): + format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id) - video = self._call_api( - 'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang), - video_id, 'Downloading %s video JSON' % format_id_extended, - lang if lang != 'none' else None) + video = self._call_api( + 'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang), + video_id, 'Downloading %s video JSON' % format_id_extended, + sub_lang if sub_lang != 'none' else None) - file_url = video['file'] - if not file_url: - continue + file_url = video['file'] + if not file_url: + continue - if file_url in ['forbidden', 'not found']: - popmessage = video['popmessage'] - self._raise_error(popmessage['title'], popmessage['message']) + if file_url in ['forbidden', 'not found']: + popmessage = video['popmessage'] + self._raise_error(popmessage['title'], popmessage['message']) - formats.append({ - 'url': file_url, - 'format_id': format_id_extended, - 'width': fmt['res_width'], - 'height': fmt['res_lines'], - 'abr': fmt['audiobitrate'], - 'vbr': fmt['videobitrate'], - 'filesize': fmt['filesize'], - 'format_note': qualities[format_id]['quality_name'], - 'preference': qualities[format_id]['priority'], - }) + formats.append({ + 'url': file_url, + 'format_id': format_id_extended, + 'width': fmt['res_width'], + 'height': fmt['res_lines'], + 'abr': fmt['audiobitrate'], + 'vbr': fmt['videobitrate'], + 'filesize': fmt['filesize'], + 'format_note': qualities[format_id]['quality_name'], + 'quality': qualities[format_id]['priority'], + 'preference': preference, + }) self._sort_formats(formats) From 815ac0293ee9d7f3771499cd71d833c83575bec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 May 2015 02:38:13 +0600 Subject: [PATCH 0568/2721] [noco] Modernize --- youtube_dl/extractor/noco.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index e44ece5a2..098c564bd 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -14,6 +14,9 @@ from ..compat import ( from ..utils import ( clean_html, ExtractorError, + int_or_none, + float_or_none, + parse_iso8601, unified_strdate, ) @@ -151,22 +154,22 @@ class NocoIE(InfoExtractor): formats.append({ 'url': file_url, 'format_id': format_id_extended, - 'width': fmt['res_width'], - 'height': fmt['res_lines'], - 'abr': fmt['audiobitrate'], - 'vbr': fmt['videobitrate'], - 'filesize': fmt['filesize'], - 'format_note': qualities[format_id]['quality_name'], - 'quality': qualities[format_id]['priority'], + 'width': int_or_none(fmt.get('res_width')), + 'height': int_or_none(fmt.get('res_lines')), + 'abr': int_or_none(fmt.get('audiobitrate')), + 'vbr': int_or_none(fmt.get('videobitrate')), + 'filesize': int_or_none(fmt.get('filesize')), + 'format_note': qualities[format_id].get('quality_name'), + 'quality': qualities[format_id].get('priority'), 'preference': preference, }) self._sort_formats(formats) - upload_date = unified_strdate(show['online_date_start_utc']) - uploader = show['partner_name'] - uploader_id = show['partner_key'] - duration = show['duration_ms'] / 1000.0 + timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ') + uploader = show.get('partner_name') + uploader_id = show.get('partner_key') + duration = float_or_none(show.get('duration_ms'), 1000) thumbnails = [] for thumbnail_key, thumbnail_url in show.items(): @@ -198,7 +201,7 @@ class NocoIE(InfoExtractor): 'title': title, 'description': description, 'thumbnails': thumbnails, - 'upload_date': upload_date, + 'timestamp': timestamp, 'uploader': uploader, 'uploader_id': uploader_id, 'duration': duration, From 01e4b1ee14a3e9dedcb6a156c6eaf1603a8a0592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 May 2015 02:50:39 +0600 Subject: [PATCH 0569/2721] [noco] Update tests --- youtube_dl/extractor/noco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 098c564bd..5674ee2a4 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -51,7 +51,7 @@ class NocoIE(InfoExtractor): 'id': '12610', 'ext': 'mp4', 'title': 'The Guild #1 - Wake-Up Call', - 'description': '', + 'timestamp': 1403863200, 'upload_date': '20140627', 'uploader': 'LBL42', 'uploader_id': 'LBL', From ce5c1ae51703d1be4e486cbe8d3dfc2ab16372a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 May 2015 02:52:21 +0600 Subject: [PATCH 0570/2721] [noco] Remove unused import --- youtube_dl/extractor/noco.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 5674ee2a4..664dc81d4 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -17,7 +17,6 @@ from ..utils import ( int_or_none, float_or_none, parse_iso8601, - unified_strdate, ) From e10dc0e1f0f6b33b60a601bf7855e76e7813ecde Mon Sep 17 00:00:00 2001 From: rrooij <rderooij685@gmail.com> Date: Tue, 5 May 2015 08:59:09 +0200 Subject: [PATCH 0571/2721] [southparknl] Add extractor for southpark.nl --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/southpark.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1c3a46dd8..b042e9215 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -485,6 +485,7 @@ from .southpark import ( SouthParkIE, SouthParkEsIE, SouthparkDeIE, + SouthParkNlIE ) from .space import SpaceIE from .spankbang import SpankBangIE diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index e3b73295c..ebcd2ed04 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -46,3 +46,18 @@ class SouthparkDeIE(SouthParkIE): 'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.', }, }] + + +class SouthParkNlIE(SouthParkIE): + IE_NAME = 'southpark.nl' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|full-episodes)/(?P<id>.+?)(\?|#|$))' + _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/' + + _TESTS = [{ + 'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free', + 'info_dict': { + 'id': 'c56b9a1f-0e42-4942-89e0-8e721d814c5c', + 'ext': 'mp4', + 'title': 'South Park 1806: Freemium Isn\'t Free - Act 3', + }, + }] From 3408f6e64a7438dbc1bcc087a00e8540ef660eb3 Mon Sep 17 00:00:00 2001 From: rrooij <rderooij685@gmail.com> Date: Tue, 5 May 2015 09:01:07 +0200 Subject: [PATCH 0572/2721] [southparkde] Fix naming inconsistency The class was first called 'SouthparkDe'. It is now changed to 'SouthParkDe' to match the name of the other extractors. --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/southpark.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b042e9215..5dfa781f8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -484,7 +484,7 @@ from .soundgasm import ( from .southpark import ( SouthParkIE, SouthParkEsIE, - SouthparkDeIE, + SouthParkDeIE, SouthParkNlIE ) from .space import SpaceIE diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index ebcd2ed04..2b12f2ad3 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -32,7 +32,7 @@ class SouthParkEsIE(SouthParkIE): }] -class SouthparkDeIE(SouthParkIE): +class SouthParkDeIE(SouthParkIE): IE_NAME = 'southpark.de' _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' From dcf807790673ef8adb22ed6ff93f6232903d75f6 Mon Sep 17 00:00:00 2001 From: rrooij <rderooij685@gmail.com> Date: Tue, 5 May 2015 09:17:21 +0200 Subject: [PATCH 0573/2721] [southparknl] Fix test to match playlist tests --- youtube_dl/extractor/southpark.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 2b12f2ad3..59e31198c 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -55,9 +55,5 @@ class SouthParkNlIE(SouthParkIE): _TESTS = [{ 'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free', - 'info_dict': { - 'id': 'c56b9a1f-0e42-4942-89e0-8e721d814c5c', - 'ext': 'mp4', - 'title': 'South Park 1806: Freemium Isn\'t Free - Act 3', - }, + 'playlist_count': 4, }] From 07d2921c6d13ce43adb9347677e50c36268fe54f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 5 May 2015 23:39:54 +0800 Subject: [PATCH 0574/2721] [lifenews] Correctly determine iframe links (fixes #5618) --- youtube_dl/extractor/lifenews.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 330138692..963f16e1a 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -39,6 +39,17 @@ class LifeNewsIE(InfoExtractor): 'upload_date': '20150402', 'uploader': 'embed.life.ru', } + }, { + 'url': 'http://lifenews.ru/news/153461', + 'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795', + 'info_dict': { + 'id': '153461', + 'ext': 'mp4', + 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве', + 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', + 'upload_date': '20150505', + 'uploader': 'embed.life.ru', + } }] def _real_extract(self, url): @@ -88,6 +99,8 @@ class LifeNewsIE(InfoExtractor): return cur_info if iframe_link: + if iframe_link.startswith('//'): + iframe_link = 'http:' + iframe_link cur_info = dict(common_info) cur_info.update({ '_type': 'url_transparent', From b326b07adc49466a4bdd0ea3c63f329a9e523121 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 May 2015 21:49:36 +0600 Subject: [PATCH 0575/2721] [lifenews] Use `_proto_relative_url` --- youtube_dl/extractor/lifenews.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 963f16e1a..081016b80 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -99,8 +99,7 @@ class LifeNewsIE(InfoExtractor): return cur_info if iframe_link: - if iframe_link.startswith('//'): - iframe_link = 'http:' + iframe_link + iframe_link = self._proto_relative_url(iframe_link, 'http:') cur_info = dict(common_info) cur_info.update({ '_type': 'url_transparent', From d6a1738892038940ef4af59a33aeddc99ef0c966 Mon Sep 17 00:00:00 2001 From: blissland <blissland.house@googlemail.com> Date: Wed, 6 May 2015 11:48:36 +0100 Subject: [PATCH 0576/2721] [archive.org] Fix incorrect url condition (closes #5628) The condition for assigning to json_url is the wrong way round: currently for url: aaa.com/xxx we get: aaa.com/xxx&output=json instead of the correct value: aaa.com/xxx?output=json --- youtube_dl/extractor/archiveorg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py index 9fc35a42b..8feb7cb74 100644 --- a/youtube_dl/extractor/archiveorg.py +++ b/youtube_dl/extractor/archiveorg.py @@ -33,7 +33,7 @@ class ArchiveOrgIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - json_url = url + ('?' if '?' in url else '&') + 'output=json' + json_url = url + ('&' if '?' in url else '?') + 'output=json' data = self._download_json(json_url, video_id) def get_optional(data_dict, field): From 74f728249fcf58b0c99eb69606b04b304d882423 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 6 May 2015 21:24:24 +0600 Subject: [PATCH 0577/2721] [extractor/common] Fallback to empty string for (yet) missing `format_id` in `_sort_formats` (Closes #5624) --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 3ae5d5212..34e27a35a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -764,7 +764,7 @@ class InfoExtractor(object): f.get('fps') if f.get('fps') is not None else -1, f.get('filesize_approx') if f.get('filesize_approx') is not None else -1, f.get('source_preference') if f.get('source_preference') is not None else -1, - f.get('format_id'), + f.get('format_id') if f.get('format_id') is not None else '', ) formats.sort(key=_formats_key) From e8b9ab8957084d1b7ab60f14574e5d14d9417909 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 6 May 2015 21:31:25 +0600 Subject: [PATCH 0578/2721] [pbs] Add `format_id` for direct links --- youtube_dl/extractor/pbs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 761bd6d8d..143a76696 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -187,6 +187,7 @@ class PBSIE(InfoExtractor): else: formats.append({ 'url': format_url, + 'format_id': redirect.get('eeid'), }) self._sort_formats(formats) From 6a8f9cd22efa7db844e035a3e40571f7d05eebea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 6 May 2015 21:39:53 +0600 Subject: [PATCH 0579/2721] [giga] Fix view count extraction --- youtube_dl/extractor/giga.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/giga.py b/youtube_dl/extractor/giga.py index 775890112..28eb733e2 100644 --- a/youtube_dl/extractor/giga.py +++ b/youtube_dl/extractor/giga.py @@ -85,7 +85,8 @@ class GigaIE(InfoExtractor): r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False) view_count = str_to_int(self._search_regex( - r'<span class="views"><strong>([\d.]+)</strong>', webpage, 'view count', fatal=False)) + r'<span class="views"><strong>([\d.,]+)</strong>', + webpage, 'view count', fatal=False)) return { 'id': video_id, From 1ed34f3dd6f534a3a5a3d6808d8d3466a9e5dea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 6 May 2015 21:43:36 +0600 Subject: [PATCH 0580/2721] [gorillavid] Switch 404 test to only matching --- youtube_dl/extractor/gorillavid.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index ed2623456..6147596e4 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -35,13 +35,7 @@ class GorillaVidIE(InfoExtractor): }, }, { 'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html', - 'md5': 'c9e293ca74d46cad638e199c3f3fe604', - 'info_dict': { - 'id': 'z08zf8le23c6', - 'ext': 'mp4', - 'title': 'Say something nice', - 'thumbnail': 're:http://.*\.jpg', - }, + 'only_matching': True, }, { 'url': 'http://daclips.in/3rso4kdn6f9m', 'md5': '1ad8fd39bb976eeb66004d3a4895f106', From ad0c0ad3b4db546dc2078391f397aa5b44c8150b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 6 May 2015 21:52:26 +0600 Subject: [PATCH 0581/2721] [historicfilms] Fix tape id extraction --- youtube_dl/extractor/historicfilms.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/historicfilms.py b/youtube_dl/extractor/historicfilms.py index 40afbe537..6a36933ac 100644 --- a/youtube_dl/extractor/historicfilms.py +++ b/youtube_dl/extractor/historicfilms.py @@ -25,7 +25,8 @@ class HistoricFilmsIE(InfoExtractor): webpage = self._download_webpage(url, video_id) tape_id = self._search_regex( - r'class="tapeId">([^<]+)<', webpage, 'tape id') + [r'class="tapeId"[^>]*>([^<]+)<', r'tapeId\s*:\s*"([^"]+)"'], + webpage, 'tape id') title = self._og_search_title(webpage) description = self._og_search_description(webpage) From ac6c358c2ab882427f74af47fe5df762dd348c20 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 7 May 2015 02:08:47 +0800 Subject: [PATCH 0582/2721] [teamcoco] Fix extracting preload data again --- youtube_dl/extractor/teamcoco.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 2381676b4..95d58ddd0 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import base64 +import binascii import re from .common import InfoExtractor @@ -9,6 +10,7 @@ from ..utils import ( ExtractorError, qualities, ) +from ..compat import compat_ord class TeamcocoIE(InfoExtractor): @@ -66,7 +68,7 @@ class TeamcocoIE(InfoExtractor): video_id = self._html_search_regex( self._VIDEO_ID_REGEXES, webpage, 'video id') - preload = None + data = preload = None preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage) if preloads: preload = max([(len(p), p) for p in preloads])[1] @@ -80,11 +82,27 @@ class TeamcocoIE(InfoExtractor): ], webpage.replace('","', ''), 'preload data', default=None) if not preload: + preload_codes = self._html_search_regex( + r'(function.+)setTimeout\(function\(\)\{playlist', + webpage, 'preload codes') + base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes) + base64_fragments.remove('init') + for i in range(len(base64_fragments)): + cur_sequence = (''.join(base64_fragments[i:] + base64_fragments[:i])).encode('ascii') + try: + raw_data = base64.b64decode(cur_sequence) + except (TypeError, binascii.Error): + continue + if compat_ord(raw_data[0]) == compat_ord('{'): + data = self._parse_json(raw_data.decode('utf-8'), video_id, fatal=False) + + if not preload and not data: raise ExtractorError( 'Preload information could not be extracted', expected=True) - data = self._parse_json( - base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id) + if not data: + data = self._parse_json( + base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id) formats = [] get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) From d9a743d9178b0ed1e44168e42e8cec2d7dd8d63e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 7 May 2015 18:05:37 +0800 Subject: [PATCH 0583/2721] [vice] Remove a redundant print --- youtube_dl/extractor/vice.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 71f520fb5..04e2b0ba7 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -31,7 +31,6 @@ class ViceIE(InfoExtractor): r'embedCode=([^&\'"]+)', webpage, 'ooyala embed code') ooyala_url = OoyalaIE._url_for_embed_code(embed_code) - print(ooyala_url) except ExtractorError: raise ExtractorError('The page doesn\'t contain a video', expected=True) return self.url_result(ooyala_url, ie='Ooyala') From 05d5392cdaa558dba285c328182d4f3e82fb8e8b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 7 May 2015 18:06:22 +0800 Subject: [PATCH 0584/2721] [common] Ignore subtitles in m3u8 --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 34e27a35a..981e34bc7 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -896,7 +896,7 @@ class InfoExtractor(object): format_id = [] if m3u8_id: format_id.append(m3u8_id) - last_media_name = last_media.get('NAME') if last_media else None + last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats))) f = { 'format_id': '-'.join(format_id), From 84bf31aaf8b9b7397de5f3189295d93e8e93e5e2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 7 May 2015 18:12:01 +0800 Subject: [PATCH 0585/2721] [ooyala] Extract m3u8 information (#2292) --- youtube_dl/extractor/ooyala.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index d5b05c18f..b33e8230d 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( unescapeHTML, ExtractorError, + determine_ext, ) @@ -44,11 +45,21 @@ class OoyalaIE(InfoExtractor): ie=cls.ie_key()) def _extract_result(self, info, more_info): + embedCode = info['embedCode'] + video_url = info.get('ipad_url') or info['url'] + + if determine_ext(video_url) == 'm3u8': + formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4') + else: + formats = [{ + 'url': video_url, + 'ext': 'mp4', + }] + return { - 'id': info['embedCode'], - 'ext': 'mp4', + 'id': embedCode, 'title': unescapeHTML(info['title']), - 'url': info.get('ipad_url') or info['url'], + 'formats': formats, 'description': unescapeHTML(more_info['description']), 'thumbnail': more_info['promo'], } From c09593c04e0b345df02cae663dc064d29e241cba Mon Sep 17 00:00:00 2001 From: blissland <blissland.house@googlemail.com> Date: Thu, 7 May 2015 15:07:11 +0100 Subject: [PATCH 0586/2721] [BildIE] Escape ampersands in xml and update test thumbnail --- youtube_dl/extractor/bild.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py index 77b562d99..ba0c185eb 100644 --- a/youtube_dl/extractor/bild.py +++ b/youtube_dl/extractor/bild.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + fix_xml_ampersands, +) class BildIE(InfoExtractor): @@ -15,7 +18,7 @@ class BildIE(InfoExtractor): 'id': '38184146', 'ext': 'mp4', 'title': 'BILD hat sie getestet', - 'thumbnail': 'http://bilder.bild.de/fotos/stand-das-koennen-die-neuen-ipads-38184138/Bild/1.bild.jpg', + 'thumbnail': 'http://bilder.bild.de/fotos/bild-hat-sie-getestet-das-koennen-apples-neue-ipads-38184138/Bild/1.bild.jpg', 'duration': 196, 'description': 'Mit dem iPad Air 2 und dem iPad Mini 3 hat Apple zwei neue Tablet-Modelle präsentiert. BILD-Reporter Sven Stein durfte die Geräte bereits testen. ', } @@ -25,7 +28,7 @@ class BildIE(InfoExtractor): video_id = self._match_id(url) xml_url = url.split(".bild.html")[0] + ",view=xml.bild.xml" - doc = self._download_xml(xml_url, video_id) + doc = self._download_xml(xml_url, video_id, transform_source=fix_xml_ampersands) duration = int_or_none(doc.attrib.get('duration'), scale=1000) From aafe2739909882931f7f624e83fe532af0bfafc1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 7 May 2015 22:07:32 +0800 Subject: [PATCH 0587/2721] [ooyala] Use SAS API to extract info (fixes #4336) --- youtube_dl/extractor/ooyala.py | 43 ++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index b33e8230d..0b049274a 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -1,12 +1,14 @@ from __future__ import unicode_literals import re import json +import base64 from .common import InfoExtractor from ..utils import ( unescapeHTML, ExtractorError, determine_ext, + int_or_none, ) @@ -33,6 +35,17 @@ class OoyalaIE(InfoExtractor): 'description': '', }, }, + { + # Information available only through SAS api + # From http://community.plm.automation.siemens.com/t5/News-NX-Manufacturing/Tool-Path-Divide/ba-p/4187 + 'url': 'http://player.ooyala.com/player.js?embedCode=FiOG81ZTrvckcchQxmalf4aQj590qTEx', + 'md5': 'a84001441b35ea492bc03736e59e7935', + 'info_dict': { + 'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx', + 'ext': 'mp4', + 'title': 'Ooyala video', + } + } ] @staticmethod @@ -88,6 +101,36 @@ class OoyalaIE(InfoExtractor): mobile_player, 'info', fatal=False, default=None) if videos_info: break + + if not videos_info: + formats = [] + auth_data = self._download_json( + 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (embedCode, embedCode), + embedCode) + + cur_auth_data = auth_data['authorization_data'][embedCode] + + for stream in cur_auth_data['streams']: + formats.append({ + 'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'), + 'ext': stream.get('delivery_type'), + 'format': stream.get('video_codec'), + 'format_id': stream.get('profile'), + 'width': int_or_none(stream.get('width')), + 'height': int_or_none(stream.get('height')), + 'abr': int_or_none(stream.get('audio_bitrate')), + 'vbr': int_or_none(stream.get('video_bitrate')), + }) + if len(formats): + return { + 'id': embedCode, + 'formats': formats, + 'title': 'Ooyala video', + } + + if not cur_auth_data['authorized']: + raise ExtractorError(cur_auth_data['message'], expected=True) + if not videos_info: raise ExtractorError('Unable to extract info') videos_info = videos_info.replace('\\"', '"') From bc08873cff6d36ba175e5121b0ba1ad270c664c9 Mon Sep 17 00:00:00 2001 From: blissland <blissland.house@googlemail.com> Date: Thu, 7 May 2015 15:09:27 +0100 Subject: [PATCH 0588/2721] Fix indents --- youtube_dl/extractor/bild.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py index ba0c185eb..41dd1cbc1 100644 --- a/youtube_dl/extractor/bild.py +++ b/youtube_dl/extractor/bild.py @@ -3,8 +3,8 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - int_or_none, - fix_xml_ampersands, + int_or_none, + fix_xml_ampersands, ) From f22834a3723dd59f1e04c27d0bc3a373ebc17183 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 7 May 2015 20:20:43 +0600 Subject: [PATCH 0589/2721] [bild] Relax thumbnail test check --- youtube_dl/extractor/bild.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py index 41dd1cbc1..4d8cce1ef 100644 --- a/youtube_dl/extractor/bild.py +++ b/youtube_dl/extractor/bild.py @@ -18,7 +18,7 @@ class BildIE(InfoExtractor): 'id': '38184146', 'ext': 'mp4', 'title': 'BILD hat sie getestet', - 'thumbnail': 'http://bilder.bild.de/fotos/bild-hat-sie-getestet-das-koennen-apples-neue-ipads-38184138/Bild/1.bild.jpg', + 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 196, 'description': 'Mit dem iPad Air 2 und dem iPad Mini 3 hat Apple zwei neue Tablet-Modelle präsentiert. BILD-Reporter Sven Stein durfte die Geräte bereits testen. ', } From 3799834dcff27ffeea66e3ec96166f8da8fa73ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 7 May 2015 20:46:11 +0600 Subject: [PATCH 0590/2721] [YoutubeDL] Do not force bestvideo+bestaudio when outtmpl is stdout (#5627) --- youtube_dl/YoutubeDL.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index eb7470f72..d8583a8eb 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1086,7 +1086,9 @@ class YoutubeDL(object): req_format = self.params.get('format') if req_format is None: req_format_list = [] - if info_dict['extractor'] in ['youtube', 'ted'] and FFmpegMergerPP(self).available: + if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' + and info_dict['extractor'] in ['youtube', 'ted'] + and FFmpegMergerPP(self).available): req_format_list.append('bestvideo+bestaudio') req_format_list.append('best') req_format = '/'.join(req_format_list) From 406224be5231e602b543579706ad6056b75fbe68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 7 May 2015 21:02:59 +0600 Subject: [PATCH 0591/2721] [extractor/generic] Fix following incomplete redirects (#5640) --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d09e85665..cd7c47d6d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1453,7 +1453,7 @@ class GenericIE(InfoExtractor): if refresh_header: found = re.search(REDIRECT_REGEX, refresh_header) if found: - new_url = found.group(1) + new_url = compat_urlparse.urljoin(url, found.group(1)) self.report_following_redirect(new_url) return { '_type': 'url', From 5268a05e4722d74e125a97b023d92943745bb249 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 7 May 2015 17:04:15 +0200 Subject: [PATCH 0592/2721] [ooyala] Style fix --- youtube_dl/extractor/ooyala.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 0b049274a..c0e6d643d 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -121,7 +121,7 @@ class OoyalaIE(InfoExtractor): 'abr': int_or_none(stream.get('audio_bitrate')), 'vbr': int_or_none(stream.get('video_bitrate')), }) - if len(formats): + if formats: return { 'id': embedCode, 'formats': formats, From 09b412dafa4bc95b9850d77b3bce5f7eac47a578 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 8 May 2015 02:12:28 +0800 Subject: [PATCH 0593/2721] [nhl] Partial support for hlg id (fixes #4285) --- youtube_dl/extractor/nhl.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 407465998..b572370c2 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -21,6 +21,9 @@ class NHLBaseInfoExtractor(InfoExtractor): return json_string.replace('\\\'', '\'') def _real_extract_video(self, video_id): + vid_parts = video_id.split(',') + if len(vid_parts) == 3: + video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0')) json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id data = self._download_json( json_url, video_id, transform_source=self._fix_json) @@ -60,7 +63,7 @@ class NHLBaseInfoExtractor(InfoExtractor): class NHLIE(NHLBaseInfoExtractor): IE_NAME = 'nhl.com' - _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)' + _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)(?:id|hlg)=(?P<id>[-0-9a-zA-Z,]+)' _TESTS = [{ 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', @@ -101,6 +104,17 @@ class NHLIE(NHLBaseInfoExtractor): }, { 'url': 'http://video.nhl.com/videocenter/?id=736722', 'only_matching': True, + }, { + 'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en', + 'md5': '076fcb88c255154aacbf0a7accc3f340', + 'info_dict': { + 'id': '2014020299-X-h', + 'ext': 'mp4', + 'title': 'Penguins at Islanders / Game Highlights', + 'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014', + 'duration': 268, + 'upload_date': '20141122', + } }] def _real_extract(self, url): From 46be82b811d91be0b0876cf141e6a94e65b8fd7f Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Thu, 7 May 2015 21:58:03 +0300 Subject: [PATCH 0594/2721] [vessel] Use `main_video_asset` when searching for video_asset (Fixes #5623) --- youtube_dl/extractor/vessel.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py index 6215f0642..3c8d2a943 100644 --- a/youtube_dl/extractor/vessel.py +++ b/youtube_dl/extractor/vessel.py @@ -38,9 +38,13 @@ class VesselIE(InfoExtractor): return req @staticmethod - def find_assets(data, asset_type): + def find_assets(data, asset_type, asset_id=None): for asset in data.get('assets', []): - if asset.get('type') == asset_type: + if not asset.get('type') == asset_type: + continue + elif asset_id is not None and not asset.get('id') == asset_id: + continue + else: yield asset def _check_access_rights(self, data): @@ -82,11 +86,13 @@ class VesselIE(InfoExtractor): req = VesselIE.make_json_request( self._API_URL_TEMPLATE % asset_id, {'client': 'web'}) data = self._download_json(req, video_id) + video_asset_id = data.get('main_video_asset') self._check_access_rights(data) try: - video_asset = next(VesselIE.find_assets(data, 'video')) + video_asset = next( + VesselIE.find_assets(data, 'video', asset_id=video_asset_id)) except StopIteration: raise ExtractorError('No video assets found') From 156fc83a55b14258bb4a2fa1ec3b02d4db679603 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 8 May 2015 03:02:34 +0800 Subject: [PATCH 0595/2721] [downloader/rtmp] Fix a typo --- youtube_dl/downloader/rtmp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 6865b5e2f..7d19bb808 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -131,7 +131,7 @@ class RtmpFD(FileDownloader): if play_path is not None: basic_args += ['--playpath', play_path] if tc_url is not None: - basic_args += ['--tcUrl', url] + basic_args += ['--tcUrl', tc_url] if test: basic_args += ['--stop', '1'] if flash_version is not None: From 7ef00afe9da87c7d0fdbea93af39b47d5447f1a0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 8 May 2015 03:09:19 +0800 Subject: [PATCH 0596/2721] [nhl] Support RTMP videos (fixes #4481) --- youtube_dl/extractor/nhl.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index b572370c2..279b18386 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -50,7 +50,7 @@ class NHLBaseInfoExtractor(InfoExtractor): video_url = initial_video_url join = compat_urlparse.urljoin - return { + ret = { 'id': video_id, 'title': info['name'], 'url': video_url, @@ -59,6 +59,15 @@ class NHLBaseInfoExtractor(InfoExtractor): 'thumbnail': join(join(video_url, '/u/'), info['bigImage']), 'upload_date': unified_strdate(info['releaseDate'].split('.')[0]), } + if video_url.startswith('rtmp:'): + mobj = re.match(r'(?P<tc_url>rtmp://[^/]+/(?P<app>[a-z0-9/]+))/(?P<play_path>mp4:.*)', video_url) + ret.update({ + 'tc_url': mobj.group('tc_url'), + 'play_path': mobj.group('play_path'), + 'app': mobj.group('app'), + 'no_resume': True, + }) + return ret class NHLIE(NHLBaseInfoExtractor): @@ -115,6 +124,18 @@ class NHLIE(NHLBaseInfoExtractor): 'duration': 268, 'upload_date': '20141122', } + }, { + 'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4', + 'info_dict': { + 'id': '691469', + 'ext': 'mp4', + 'title': 'RAW | Craig MacTavish Full Press Conference', + 'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.', + 'upload_date': '20141205', + }, + 'params': { + 'skip_download': True, # Requires rtmpdump + } }] def _real_extract(self, url): From a745475808e125a590afb14df48c565309d3f75c Mon Sep 17 00:00:00 2001 From: Behrooz <behrooz.abbasy@gmail.com> Date: Fri, 8 May 2015 02:50:46 +0200 Subject: [PATCH 0597/2721] Ir90Tv Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ir90tv.py | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 youtube_dl/extractor/ir90tv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5dfa781f8..ee05a6958 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -229,6 +229,7 @@ from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import IPrimaIE +from .ir90tv import Ir90TvIE from .ivi import ( IviIE, IviCompilationIE diff --git a/youtube_dl/extractor/ir90tv.py b/youtube_dl/extractor/ir90tv.py new file mode 100644 index 000000000..5aa9d6ff4 --- /dev/null +++ b/youtube_dl/extractor/ir90tv.py @@ -0,0 +1,41 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class Ir90TvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?90tv\.ir/video/(?P<id>[0-9]+)/.*' + _TEST = { + 'url': 'http://90tv.ir/video/95719/%D8%B4%D8%A7%DB%8C%D8%B9%D8%A7%D8%AA-%D9%86%D9%82%D9%84-%D9%88-%D8%A7%D9%86%D8%AA%D9%82%D8%A7%D9%84%D8%A7%D8%AA-%D9%85%D9%87%D9%85-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7-940218', + 'md5': '411dbd94891381960cb9e13daa47a869', + 'info_dict': { + 'id': '95719', + 'ext': 'mp4', + 'title': 'شایعات نقل و انتقالات مهم فوتبال اروپا 94/02/18', + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # TODO more code goes here, for example ... + title = self._html_search_regex( + r'<title>\n90tv.ir :: (.*?)', webpage, 'title') + + video_url = self._search_regex( + r']+src="([^"]+)"', webpage, 'video url') + + thumbnail = self._search_regex(r'poster="([^"]+)"', webpage, 'thumbnail url') + print thumbnail + + + return { + 'url': video_url, + 'id': video_id, + 'title': title, + 'video_url' : video_url, + 'thumbnail' : thumbnail, + } \ No newline at end of file From 54b31d149e7be08eb7be9981a9eec398d11f17ef Mon Sep 17 00:00:00 2001 From: Behrooz Date: Fri, 8 May 2015 02:55:01 +0200 Subject: [PATCH 0598/2721] Ir90Tv Add new extractor --- youtube_dl/extractor/ir90tv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ir90tv.py b/youtube_dl/extractor/ir90tv.py index 5aa9d6ff4..3a3cb4887 100644 --- a/youtube_dl/extractor/ir90tv.py +++ b/youtube_dl/extractor/ir90tv.py @@ -38,4 +38,4 @@ class Ir90TvIE(InfoExtractor): 'title': title, 'video_url' : video_url, 'thumbnail' : thumbnail, - } \ No newline at end of file + } From a650110ba762b2658c64392317c1afd2a284dd3d Mon Sep 17 00:00:00 2001 From: Behrooz Date: Fri, 8 May 2015 04:32:08 +0200 Subject: [PATCH 0599/2721] remove print --- youtube_dl/extractor/ir90tv.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/ir90tv.py b/youtube_dl/extractor/ir90tv.py index 3a3cb4887..b79529b1b 100644 --- a/youtube_dl/extractor/ir90tv.py +++ b/youtube_dl/extractor/ir90tv.py @@ -21,7 +21,6 @@ class Ir90TvIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - # TODO more code goes here, for example ... title = self._html_search_regex( r'\n90tv.ir :: (.*?)', webpage, 'title') @@ -29,8 +28,6 @@ class Ir90TvIE(InfoExtractor): r']+src="([^"]+)"', webpage, 'video url') thumbnail = self._search_regex(r'poster="([^"]+)"', webpage, 'thumbnail url') - print thumbnail - return { 'url': video_url, From 541168039d8f3e7680a15cc366fcc94335308d81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 8 May 2015 11:01:24 +0200 Subject: [PATCH 0600/2721] [utils] get_exe_version: encode executable name (fixes #5647) It failed in python 2.x when $PATH contains a directory with non-ascii characters. --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1013f7c18..de09b53b2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1380,7 +1380,7 @@ def get_exe_version(exe, args=['--version'], or False if the executable is not present """ try: out, _ = subprocess.Popen( - [exe] + args, + [encodeArgument(exe)] + args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate() except OSError: return False From 249962ffa2b155795ebfe0a267eb025cd5e30c56 Mon Sep 17 00:00:00 2001 From: blissland Date: Thu, 7 May 2015 16:56:15 +0100 Subject: [PATCH 0601/2721] [bet] Use unique part of xml url as the video id and fix tests (closes #5642) The guid changes often. --- youtube_dl/extractor/bet.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index d2abd4d77..26b934543 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -16,11 +16,11 @@ class BetIE(InfoExtractor): { 'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', 'info_dict': { - 'id': '740ab250-bb94-4a8a-8787-fe0de7c74471', + 'id': 'news/national/2014/a-conversation-with-president-obama', 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', 'ext': 'flv', - 'title': 'BET News Presents: A Conversation With President Obama', - 'description': 'md5:5a88d8ae912c1b33e090290af7ec33c6', + 'title': 'A Conversation With President Obama', + 'description': 'md5:699d0652a350cf3e491cd15cc745b5da', 'duration': 1534, 'timestamp': 1418075340, 'upload_date': '20141208', @@ -35,7 +35,7 @@ class BetIE(InfoExtractor): { 'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', 'info_dict': { - 'id': 'bcd1b1df-673a-42cf-8d01-b282db608f2d', + 'id': 'news/national/2014/justice-for-ferguson-a-community-reacts', 'display_id': 'justice-for-ferguson-a-community-reacts', 'ext': 'flv', 'title': 'Justice for Ferguson: A Community Reacts', @@ -61,6 +61,9 @@ class BetIE(InfoExtractor): [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"], webpage, 'media URL')) + video_id = self._search_regex( + r'/video/(.*)/_jcr_content/', media_url, 'video id') + mrss = self._download_xml(media_url, display_id) item = mrss.find('./channel/item') @@ -75,8 +78,6 @@ class BetIE(InfoExtractor): description = xpath_text( item, './description', 'description', fatal=False) - video_id = xpath_text(item, './guid', 'video id', fatal=False) - timestamp = parse_iso8601(xpath_text( item, xpath_with_ns('./dc:date', NS_MAP), 'upload date', fatal=False)) From 43837189c18af635cfb1cd8fe503265b4b218c32 Mon Sep 17 00:00:00 2001 From: blissland Date: Fri, 8 May 2015 10:40:25 +0100 Subject: [PATCH 0602/2721] Fix URL template extraction for netzkino. Fixes #5614 --- youtube_dl/extractor/netzkino.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/netzkino.py b/youtube_dl/extractor/netzkino.py index bc17e20aa..0d165a82a 100644 --- a/youtube_dl/extractor/netzkino.py +++ b/youtube_dl/extractor/netzkino.py @@ -49,7 +49,7 @@ class NetzkinoIE(InfoExtractor): 'http://www.netzkino.de/beta/dist/production.min.js', video_id, note='Downloading player code') avo_js = self._search_regex( - r'window\.avoCore\s*=.*?urlTemplate:\s*(\{.*?"\})', + r'var urlTemplate=(\{.*?"\})', production_js, 'URL templates') templates = self._parse_json( avo_js, video_id, transform_source=js_to_json) From d1feb308116f57ceae3888db5e1b93394300f564 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 May 2015 20:07:53 +0600 Subject: [PATCH 0603/2721] [mlb] Fallback to extracting video id from webpage for all URLs that does not contain it explicitly (Closes #5630) --- youtube_dl/extractor/mlb.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index ee9ff73bf..109eecefd 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -10,7 +10,21 @@ from ..utils import ( class MLBIE(InfoExtractor): - _VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/(?:embed|m-internal-embed)\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?Pn?\d+)' + _VALID_URL = r'''(?x) + https?:// + m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/ + (?: + (?: + (?:.*?/)?video/(?:topic/[\da-z_-]+/)?v| + (?: + shared/video/embed/(?:embed|m-internal-embed)\.html| + [^/]+/video/play\.jsp + )\?.*?\bcontent_id= + ) + (?Pn?\d+)| + (?P.+?) + ) + ''' _TESTS = [ { 'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea', @@ -95,6 +109,12 @@ class MLBIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + if not video_id: + video_path = mobj.group('path') + webpage = self._download_webpage(url, video_path) + video_id = self._search_regex( + r'data-videoid="(\d+)"', webpage, 'video id') + detail = self._download_xml( 'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml' % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id) From 4e6e9d21bd62c4e2ab2576347e066891092a5783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 May 2015 21:48:47 +0600 Subject: [PATCH 0604/2721] [mlb] Improve _VALID_URL --- youtube_dl/extractor/mlb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 109eecefd..4e054fb53 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -22,7 +22,7 @@ class MLBIE(InfoExtractor): )\?.*?\bcontent_id= ) (?Pn?\d+)| - (?P.+?) + (?:[^/]+/)*(?P[^/]+) ) ''' _TESTS = [ From 34e7dc81a94d39d48c5b4aac8cddcca46edba94d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 May 2015 22:03:03 +0600 Subject: [PATCH 0605/2721] [vgtv] Add support for generic bt.no URLs (#5620) --- youtube_dl/extractor/vgtv.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 69dc9a759..b0f0b3bc2 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -8,7 +8,8 @@ from ..utils import float_or_none class VGTVIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?vgtv\.no/#!/[^/]+/(?P[0-9]+)' + IE_DESC = 'VGTV and BTTV' + _VALID_URL = r'http://(?:www\.)?(?Pvgtv|bt)\.no/(?:(?:tv/)?#!/(?:video|live)/(?P[0-9]+)|(?:[^/]+/)*(?P[^/]+))' _TESTS = [ { # streamType: vod @@ -64,12 +65,25 @@ class VGTVIE(InfoExtractor): 'skip_download': True, }, }, + { + 'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien', + 'only_matching': True, + }, ] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + host = mobj.group('host') + + HOST_WEBSITES = { + 'vgtv': 'vgtv', + 'bt': 'bttv', + } + data = self._download_json( - 'http://svp.vg.no/svp/api/v1/vgtv/assets/%s?appName=vgtv-website' % video_id, + 'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website' + % (host, video_id, HOST_WEBSITES[host]), video_id, 'Downloading media JSON') streams = data['streamUrls'] From 0ceab8474924c4e7a6e28497c8da40cc5002c8d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 May 2015 22:18:43 +0600 Subject: [PATCH 0606/2721] [vgtv] Add support for bt.no articles (#5620) --- youtube_dl/extractor/__init__.py | 5 +++- youtube_dl/extractor/vgtv.py | 39 +++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5dfa781f8..587a45940 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -587,7 +587,10 @@ from .veoh import VeohIE from .vessel import VesselIE from .vesti import VestiIE from .vevo import VevoIE -from .vgtv import VGTVIE +from .vgtv import ( + BTArticleIE, + VGTVIE, +) from .vh1 import VH1IE from .vice import ViceIE from .viddler import ViddlerIE diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index b0f0b3bc2..ad07e54c9 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -9,7 +9,18 @@ from ..utils import float_or_none class VGTVIE(InfoExtractor): IE_DESC = 'VGTV and BTTV' - _VALID_URL = r'http://(?:www\.)?(?Pvgtv|bt)\.no/(?:(?:tv/)?#!/(?:video|live)/(?P[0-9]+)|(?:[^/]+/)*(?P[^/]+))' + _VALID_URL = r'''(?x) + (?: + vgtv:| + http://(?:www\.)? + ) + (?Pvgtv|bt) + (?: + :| + \.no/(?:tv/)?#!/(?:video|live)/ + ) + (?P[0-9]+) + ''' _TESTS = [ { # streamType: vod @@ -129,3 +140,29 @@ class VGTVIE(InfoExtractor): 'view_count': data['displays'], 'formats': formats, } + + +class BTArticleIE(InfoExtractor): + IE_DESC = 'Bergens Tidende' + _VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P[^/]+)-\d+\.html' + _TEST = { + 'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html', + 'md5': 'd055e8ee918ef2844745fcfd1a4175fb', + 'info_dict': { + 'id': '23199', + 'ext': 'mp4', + 'title': 'Alrekstad internat', + 'description': 'md5:dc81a9056c874fedb62fc48a300dac58', + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 191, + 'timestamp': 1289991323, + 'upload_date': '20101117', + 'view_count': int, + }, + } + + def _real_extract(self, url): + webpage = self._download_webpage(url, self._match_id(url)) + video_id = self._search_regex( + r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id') + return self.url_result('vgtv:bt:%s' % video_id, 'VGTV') From 2c0c9dc46cda490137b6788d6d66f31ca092f58f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 May 2015 22:50:01 +0600 Subject: [PATCH 0607/2721] [xstream] Move xstream to separate extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/xstream.py | 115 +++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 youtube_dl/extractor/xstream.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 587a45940..5cc35c8eb 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -653,6 +653,7 @@ from .xboxclips import XboxClipsIE from .xhamster import XHamsterIE from .xminus import XMinusIE from .xnxx import XNXXIE +from .xstream import XstreamIE from .xvideos import XVideosIE from .xtube import XTubeUserIE, XTubeIE from .xuite import XuiteIE diff --git a/youtube_dl/extractor/xstream.py b/youtube_dl/extractor/xstream.py new file mode 100644 index 000000000..71584c291 --- /dev/null +++ b/youtube_dl/extractor/xstream.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, + xpath_with_ns, + xpath_text, + find_xpath_attr, +) + + +class XstreamIE(InfoExtractor): + _VALID_URL = r'''(?x) + (?: + xstream:| + https?://frontend\.xstream\.(?:dk|net)/ + ) + (?P[^/]+) + (?: + :| + /feed/video/\?.*?\bid= + ) + (?P\d+) + ''' + _TESTS = [{ + 'url': 'http://frontend.xstream.dk/btno/feed/video/?platform=web&id=86588', + 'md5': 'd7d17e3337dc80de6d3a540aefbe441b', + 'info_dict': { + 'id': '86588', + 'ext': 'mov', + 'title': 'Otto Wollertsen', + 'description': 'Vestlendingen Otto Fredrik Wollertsen', + 'timestamp': 1430473209, + 'upload_date': '20150501', + }, + }, { + 'url': 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=21039', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + partner_id = mobj.group('partner_id') + video_id = mobj.group('id') + + data = self._download_xml( + 'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s' + % (partner_id, video_id), + video_id) + + NS_MAP = { + 'atom': 'http://www.w3.org/2005/Atom', + 'xt': 'http://xstream.dk/', + 'media': 'http://search.yahoo.com/mrss/', + } + + entry = data.find(xpath_with_ns('./atom:entry', NS_MAP)) + + title = xpath_text( + entry, xpath_with_ns('./atom:title', NS_MAP), 'title') + description = xpath_text( + entry, xpath_with_ns('./atom:summary', NS_MAP), 'description') + timestamp = parse_iso8601(xpath_text( + entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date')) + + formats = [] + media_group = entry.find(xpath_with_ns('./media:group', NS_MAP)) + for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)): + media_url = media_content.get('url') + if not media_url: + continue + tbr = int_or_none(media_content.get('bitrate')) + mobj = re.search(r'^(?Prtmp://[^/]+/(?P[^/]+))/(?P.+)$', media_url) + if mobj: + formats.append({ + 'url': mobj.group('url'), + 'play_path': 'mp4:%s' % mobj.group('playpath'), + 'app': mobj.group('app'), + 'ext': 'flv', + 'tbr': tbr, + 'format_id': 'rtmp-%d' % tbr, + }) + else: + formats.append({ + 'url': media_url, + 'tbr': tbr, + }) + self._sort_formats(formats) + + link = find_xpath_attr( + entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original') + if link is not None: + formats.append({ + 'url': link.get('href'), + 'format_id': link.get('rel'), + }) + + thumbnails = [{ + 'url': splash.get('url'), + 'width': int_or_none(splash.get('width')), + 'height': int_or_none(splash.get('height')), + } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'formats': formats, + 'thumbnails': thumbnails, + } From cbe443362f91ab111e2a01fe8246e17a98668f88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 May 2015 22:52:20 +0600 Subject: [PATCH 0608/2721] [aftenposten] Implement in terms of xtream extractor --- youtube_dl/extractor/aftenposten.py | 77 +---------------------------- 1 file changed, 1 insertion(+), 76 deletions(-) diff --git a/youtube_dl/extractor/aftenposten.py b/youtube_dl/extractor/aftenposten.py index e15c015fb..0c00acfb5 100644 --- a/youtube_dl/extractor/aftenposten.py +++ b/youtube_dl/extractor/aftenposten.py @@ -1,21 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_iso8601, - xpath_with_ns, - xpath_text, - find_xpath_attr, -) class AftenpostenIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P\d+)' - _TEST = { 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more', 'md5': 'fd828cd29774a729bf4d4425fe192972', @@ -30,69 +20,4 @@ class AftenpostenIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._match_id(url) - - data = self._download_xml( - 'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id) - - NS_MAP = { - 'atom': 'http://www.w3.org/2005/Atom', - 'xt': 'http://xstream.dk/', - 'media': 'http://search.yahoo.com/mrss/', - } - - entry = data.find(xpath_with_ns('./atom:entry', NS_MAP)) - - title = xpath_text( - entry, xpath_with_ns('./atom:title', NS_MAP), 'title') - description = xpath_text( - entry, xpath_with_ns('./atom:summary', NS_MAP), 'description') - timestamp = parse_iso8601(xpath_text( - entry, xpath_with_ns('./atom:published', NS_MAP), 'upload date')) - - formats = [] - media_group = entry.find(xpath_with_ns('./media:group', NS_MAP)) - for media_content in media_group.findall(xpath_with_ns('./media:content', NS_MAP)): - media_url = media_content.get('url') - if not media_url: - continue - tbr = int_or_none(media_content.get('bitrate')) - mobj = re.search(r'^(?Prtmp://[^/]+/(?P[^/]+))/(?P.+)$', media_url) - if mobj: - formats.append({ - 'url': mobj.group('url'), - 'play_path': 'mp4:%s' % mobj.group('playpath'), - 'app': mobj.group('app'), - 'ext': 'flv', - 'tbr': tbr, - 'format_id': 'rtmp-%d' % tbr, - }) - else: - formats.append({ - 'url': media_url, - 'tbr': tbr, - }) - self._sort_formats(formats) - - link = find_xpath_attr( - entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original') - if link is not None: - formats.append({ - 'url': link.get('href'), - 'format_id': link.get('rel'), - }) - - thumbnails = [{ - 'url': splash.get('url'), - 'width': int_or_none(splash.get('width')), - 'height': int_or_none(splash.get('height')), - } for splash in media_group.findall(xpath_with_ns('./xt:splash', NS_MAP))] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'timestamp': timestamp, - 'formats': formats, - 'thumbnails': thumbnails, - } + return self.url_result('xstream:ap:%s' % self._match_id(url), 'Xstream') From fe373287ebdda002ed84dca1d8b9d6f8a5686138 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 May 2015 22:59:50 +0600 Subject: [PATCH 0609/2721] [vgtv] Add support for bt vestlendingen (Closes #5620) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vgtv.py | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5cc35c8eb..96cf28efe 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -589,6 +589,7 @@ from .vesti import VestiIE from .vevo import VevoIE from .vgtv import ( BTArticleIE, + BTVestlendingenIE, VGTVIE, ) from .vh1 import VH1IE diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index ad07e54c9..db7a4bdb1 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -143,7 +143,8 @@ class VGTVIE(InfoExtractor): class BTArticleIE(InfoExtractor): - IE_DESC = 'Bergens Tidende' + IE_NAME = 'bt:article' + IE_DESC = 'Bergens Tidende Articles' _VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P[^/]+)-\d+\.html' _TEST = { 'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html', @@ -166,3 +167,26 @@ class BTArticleIE(InfoExtractor): video_id = self._search_regex( r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id') return self.url_result('vgtv:bt:%s' % video_id, 'VGTV') + + +class BTVestlendingenIE(InfoExtractor): + IE_NAME = 'bt:vestlendingen' + IE_DESC = 'Bergens Tidende - Vestlendingen' + _VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P\d+)' + _TEST = { + 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588', + 'md5': 'd7d17e3337dc80de6d3a540aefbe441b', + 'info_dict': { + 'id': '86588', + 'ext': 'mov', + 'title': 'Otto Wollertsen', + 'description': 'Vestlendingen Otto Fredrik Wollertsen', + 'timestamp': 1430473209, + 'upload_date': '20150501', + }, + } + + def _real_extract(self, url): + return self.url_result('xstream:btno:%s' % self._match_id(url), 'Xstream') + + From 4384cf9e7d59492141ebd45f77830238097c695c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 8 May 2015 23:04:27 +0600 Subject: [PATCH 0610/2721] [extractor/__init__] Fix alphabetic order --- youtube_dl/extractor/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 96cf28efe..7f0070784 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -655,9 +655,9 @@ from .xhamster import XHamsterIE from .xminus import XMinusIE from .xnxx import XNXXIE from .xstream import XstreamIE -from .xvideos import XVideosIE from .xtube import XTubeUserIE, XTubeIE from .xuite import XuiteIE +from .xvideos import XVideosIE from .xxxymovies import XXXYMoviesIE from .yahoo import ( YahooIE, From bb03fdae0d4da9c591c2967044c5e30bf797c22a Mon Sep 17 00:00:00 2001 From: "Sergey M." Date: Fri, 8 May 2015 23:19:57 +0600 Subject: [PATCH 0611/2721] [README.md] Clarify format selection when streaming to stdout --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9aeb114f3..b6e9429df 100644 --- a/README.md +++ b/README.md @@ -269,7 +269,7 @@ The simplest case is requesting a specific format, for example `-f 22`. You can If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f +` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`. -Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. +Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded an muxed. If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl. From 50b901306406d5c37f31880860e2a4dbb5e0a165 Mon Sep 17 00:00:00 2001 From: "Sergey M." Date: Fri, 8 May 2015 23:21:23 +0600 Subject: [PATCH 0612/2721] [README.md] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b6e9429df..3d9436456 100644 --- a/README.md +++ b/README.md @@ -269,7 +269,7 @@ The simplest case is requesting a specific format, for example `-f 22`. You can If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f +` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`. -Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded an muxed. +Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed. If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl. From 79998cd5afb2e16fe14cebdbec81b21c45c24c32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 May 2015 00:12:42 +0600 Subject: [PATCH 0613/2721] [svtplay] Generalize svt extractors and add svt.se extractor --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/svtplay.py | 98 ++++++++++++++++++++------------ 2 files changed, 67 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7f0070784..79236c6c1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -505,7 +505,10 @@ from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE from .sunporno import SunPornoIE -from .svtplay import SVTPlayIE +from .svtplay import ( + SVTIE, + SVTPlayIE, +) from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE from .sztvhu import SztvHuIE diff --git a/youtube_dl/extractor/svtplay.py b/youtube_dl/extractor/svtplay.py index 433dfd1cb..732f02048 100644 --- a/youtube_dl/extractor/svtplay.py +++ b/youtube_dl/extractor/svtplay.py @@ -9,41 +9,9 @@ from ..utils import ( ) -class SVTPlayIE(InfoExtractor): - IE_DESC = 'SVT Play and Öppet arkiv' - _VALID_URL = r'https?://(?:www\.)?(?Psvtplay|oppetarkiv)\.se/video/(?P[0-9]+)' - _TESTS = [{ - 'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final', - 'md5': 'ade3def0643fa1c40587a422f98edfd9', - 'info_dict': { - 'id': '2609989', - 'ext': 'flv', - 'title': 'SM veckan vinter, Örebro - Rally, final', - 'duration': 4500, - 'thumbnail': 're:^https?://.*[\.-]jpg$', - 'age_limit': 0, - }, - }, { - 'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318', - 'md5': 'c3101a17ce9634f4c1f9800f0746c187', - 'info_dict': { - 'id': '1058509', - 'ext': 'flv', - 'title': 'Farlig kryssning', - 'duration': 2566, - 'thumbnail': 're:^https?://.*[\.-]jpg$', - 'age_limit': 0, - }, - 'skip': 'Only works from Sweden', - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - host = mobj.group('host') - - info = self._download_json( - 'http://www.%s.se/video/%s?output=json' % (host, video_id), video_id) +class SVTBaseIE(InfoExtractor): + def _extract_video(self, url, video_id): + info = self._download_json(url, video_id) title = info['context']['title'] thumbnail = info['context'].get('thumbnailImage') @@ -80,3 +48,63 @@ class SVTPlayIE(InfoExtractor): 'duration': duration, 'age_limit': age_limit, } + + +class SVTIE(SVTBaseIE): + _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P\d+)&.*?\barticleId=(?P\d+)' + _TEST = { + 'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false', + 'md5': '9648197555fc1b49e3dc22db4af51d46', + 'info_dict': { + 'id': '2900353', + 'ext': 'flv', + 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)', + 'duration': 27, + 'age_limit': 0, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + widget_id = mobj.group('widget_id') + article_id = mobj.group('id') + return self._extract_video( + 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id), + article_id) + + +class SVTPlayIE(SVTBaseIE): + IE_DESC = 'SVT Play and Öppet arkiv' + _VALID_URL = r'https?://(?:www\.)?(?Psvtplay|oppetarkiv)\.se/video/(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final', + 'md5': 'ade3def0643fa1c40587a422f98edfd9', + 'info_dict': { + 'id': '2609989', + 'ext': 'flv', + 'title': 'SM veckan vinter, Örebro - Rally, final', + 'duration': 4500, + 'thumbnail': 're:^https?://.*[\.-]jpg$', + 'age_limit': 0, + }, + }, { + 'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318', + 'md5': 'c3101a17ce9634f4c1f9800f0746c187', + 'info_dict': { + 'id': '1058509', + 'ext': 'flv', + 'title': 'Farlig kryssning', + 'duration': 2566, + 'thumbnail': 're:^https?://.*[\.-]jpg$', + 'age_limit': 0, + }, + 'skip': 'Only works from Sweden', + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + host = mobj.group('host') + return self._extract_video( + 'http://www.%s.se/video/%s?output=json' % (host, video_id), + video_id) From 322915014f0378e2675a2a17cd67fe89a6e6a7d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 May 2015 00:13:40 +0600 Subject: [PATCH 0614/2721] [svtplay] Rename to svt --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/{svtplay.py => svt.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename youtube_dl/extractor/{svtplay.py => svt.py} (100%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 79236c6c1..0a18dba5c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -505,7 +505,7 @@ from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE from .sunporno import SunPornoIE -from .svtplay import ( +from .svt import ( SVTIE, SVTPlayIE, ) diff --git a/youtube_dl/extractor/svtplay.py b/youtube_dl/extractor/svt.py similarity index 100% rename from youtube_dl/extractor/svtplay.py rename to youtube_dl/extractor/svt.py From bab19a8e91153705d6600fe1d1a0b0aa0bf93bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 May 2015 00:23:35 +0600 Subject: [PATCH 0615/2721] [extractor/generic] Add support for svt embeds (Closes #5622) --- youtube_dl/extractor/generic.py | 6 ++++++ youtube_dl/extractor/svt.py | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index cd7c47d6d..046bcb0f0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -37,6 +37,7 @@ from .condenast import CondeNastIE from .udn import UDNEmbedIE from .senateisvp import SenateISVPIE from .bliptv import BlipTVIE +from .svt import SVTIE class GenericIE(InfoExtractor): @@ -1091,6 +1092,11 @@ class GenericIE(InfoExtractor): if bliptv_url: return self.url_result(bliptv_url, 'BlipTV') + # Look for SVT player + svt_url = SVTIE._extract_url(webpage) + if svt_url: + return self.url_result(svt_url, 'SVT') + # Look for embedded condenast player matches = re.findall( r' Date: Sat, 9 May 2015 00:27:37 +0600 Subject: [PATCH 0616/2721] [extractor/generic] Add test for svt embed --- youtube_dl/extractor/generic.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 046bcb0f0..7c38bce7c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -659,6 +659,17 @@ class GenericIE(InfoExtractor): 'title': 'Facebook Creates "On This Day" | Crunch Report', }, }, + # SVT embed + { + 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun', + 'info_dict': { + 'id': '2900353', + 'ext': 'flv', + 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)', + 'duration': 27, + 'age_limit': 0, + }, + }, # RSS feed with enclosure { 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', From de765f6c3188802bb2dea704a645f539fa61c8aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 May 2015 02:15:51 +0600 Subject: [PATCH 0617/2721] [foxsports] Support some more URLs (#5611) --- youtube_dl/extractor/foxsports.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dl/extractor/foxsports.py index 363866b64..df7665176 100644 --- a/youtube_dl/extractor/foxsports.py +++ b/youtube_dl/extractor/foxsports.py @@ -5,7 +5,7 @@ from ..utils import smuggle_url class FoxSportsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?foxsports\.com/video\?vid=(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P[^/]+)' _TEST = { 'url': 'http://www.foxsports.com/video?vid=432609859715', From 3dbec410a0e195036025aa3a3792932783f371d2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 9 May 2015 13:19:54 +0800 Subject: [PATCH 0618/2721] [sohu] Enhance error handling --- youtube_dl/extractor/sohu.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index f8a4840f7..13b9e9133 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -8,7 +8,10 @@ from ..compat import ( compat_str, compat_urllib_request ) -from ..utils import sanitize_url_path_consecutive_slashes +from ..utils import ( + sanitize_url_path_consecutive_slashes, + ExtractorError, +) class SohuIE(InfoExtractor): @@ -117,6 +120,15 @@ class SohuIE(InfoExtractor): r'var vid ?= ?["\'](\d+)["\']', webpage, 'video path') vid_data = _fetch_data(vid, mytv) + if vid_data['play'] != 1: + if vid_data.get('status') == 12: + raise ExtractorError( + 'Sohu said: There\'s something wrong in the video.', + expected=True) + else: + raise ExtractorError( + 'Sohu said: The video is only licensed to users in Mainland China.', + expected=True) formats_json = {} for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'): From 32060c6d6b618fa858b2ce43db34d02fd43bc542 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 9 May 2015 13:54:28 +0800 Subject: [PATCH 0619/2721] [sohu] Update extractor The original extraction logic always fails for all test videos --- youtube_dl/extractor/sohu.py | 44 ++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 13b9e9133..eab4adfca 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -8,10 +8,7 @@ from ..compat import ( compat_str, compat_urllib_request ) -from ..utils import ( - sanitize_url_path_consecutive_slashes, - ExtractorError, -) +from ..utils import ExtractorError class SohuIE(InfoExtractor): @@ -31,7 +28,7 @@ class SohuIE(InfoExtractor): } }, { 'url': 'http://tv.sohu.com/20150305/n409385080.shtml', - 'md5': '699060e75cf58858dd47fb9c03c42cfb', + 'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a', 'info_dict': { 'id': '409385080', 'ext': 'mp4', @@ -39,7 +36,7 @@ class SohuIE(InfoExtractor): } }, { 'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml', - 'md5': '9bf34be48f2f4dadcb226c74127e203c', + 'md5': '49308ff6dafde5ece51137d04aec311e', 'info_dict': { 'id': '78693464', 'ext': 'mp4', @@ -53,7 +50,7 @@ class SohuIE(InfoExtractor): 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', }, 'playlist': [{ - 'md5': 'bdbfb8f39924725e6589c146bc1883ad', + 'md5': '492923eac023ba2f13ff69617c32754a', 'info_dict': { 'id': '78910339_part1', 'ext': 'mp4', @@ -61,7 +58,7 @@ class SohuIE(InfoExtractor): 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', } }, { - 'md5': '3e1f46aaeb95354fd10e7fca9fc1804e', + 'md5': 'de604848c0e8e9c4a4dde7e1347c0637', 'info_dict': { 'id': '78910339_part2', 'ext': 'mp4', @@ -69,7 +66,7 @@ class SohuIE(InfoExtractor): 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', } }, { - 'md5': '8407e634175fdac706766481b9443450', + 'md5': '93584716ee0657c0b205b8aa3d27aa13', 'info_dict': { 'id': '78910339_part3', 'ext': 'mp4', @@ -144,24 +141,21 @@ class SohuIE(InfoExtractor): for i in range(part_count): formats = [] for format_id, format_data in formats_json.items(): - allot = format_data['allot'] - prot = format_data['prot'] - data = format_data['data'] - clips_url = data['clipsURL'] - su = data['su'] - part_str = self._download_webpage( - 'http://%s/?prot=%s&file=%s&new=%s' % - (allot, prot, clips_url[i], su[i]), - video_id, - 'Downloading %s video URL part %d of %d' - % (format_id, i + 1, part_count)) - - part_info = part_str.split('|') - - video_url = sanitize_url_path_consecutive_slashes( - '%s%s?key=%s' % (part_info[0], su[i], part_info[3])) + # URLs starts with http://newflv.sohu.ccgslb.net/ is not usable + # so retry until got a working URL + video_url = 'newflv.sohu.ccgslb.net' + retries = 0 + while 'newflv.sohu.ccgslb.net' in video_url and retries < 5: + download_note = 'Download information from CDN gateway for format ' + format_id + if retries > 0: + download_note += ' (retry #%d)' % retries + retries += 1 + cdn_info = self._download_json( + 'http://data.vod.itc.cn/cdnList?new=' + data['su'][i], + video_id, download_note) + video_url = cdn_info['url'] formats.append({ 'url': video_url, From 6d14d08e062ff3d6e0fd17f04cb341099097902c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 9 May 2015 17:36:07 +0800 Subject: [PATCH 0620/2721] [yam] Fix title and uploader id --- youtube_dl/extractor/yam.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py index 19f8762ae..9d851bae3 100644 --- a/youtube_dl/extractor/yam.py +++ b/youtube_dl/extractor/yam.py @@ -9,6 +9,7 @@ from ..utils import ( float_or_none, month_by_abbreviation, ExtractorError, + get_element_by_attribute, ) @@ -23,6 +24,7 @@ class YamIE(InfoExtractor): 'id': '2283921', 'ext': 'mp3', 'title': '發現 - 趙薇 京華煙雲主題曲', + 'description': '發現 - 趙薇 京華煙雲主題曲', 'uploader_id': 'princekt', 'upload_date': '20080807', 'duration': 313.0, @@ -55,6 +57,17 @@ class YamIE(InfoExtractor): 'ext': 'mp4', }, 'skip': 'invalid YouTube URL', + }, { + 'url': 'http://mymedia.yam.com/m/2373534', + 'md5': '7ff74b91b7a817269d83796f8c5890b1', + 'info_dict': { + 'id': '2373534', + 'ext': 'mp3', + 'title': '林俊傑&蔡卓妍-小酒窩', + 'description': 'md5:904003395a0fcce6cfb25028ff468420', + 'upload_date': '20080928', + 'uploader_id': 'onliner2', + } }] def _real_extract(self, url): @@ -75,15 +88,19 @@ class YamIE(InfoExtractor): if youtube_url: return self.url_result(youtube_url, 'Youtube') + title = self._html_search_regex( + r']+class="heading"[^>]*>\s*(.+)\s*', page, 'title') + api_page = self._download_webpage( 'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id, note='Downloading API page') api_result_obj = compat_urlparse.parse_qs(api_page) + info_table = get_element_by_attribute('class', 'info', page) uploader_id = self._html_search_regex( - r':[\n ]+(?P[A-Z][a-z]{2}) ' + + r':[\n ]+(?P[A-Z][a-z]{2})\s+' + r'(?P\d{1,2}), (?P\d{4})', page) if mobj: upload_date = '%s%02d%02d' % ( @@ -97,7 +114,8 @@ class YamIE(InfoExtractor): return { 'id': video_id, 'url': api_result_obj['mp3file'][0], - 'title': self._html_search_meta('description', page), + 'title': title, + 'description': self._html_search_meta('description', page), 'duration': duration, 'uploader_id': uploader_id, 'upload_date': upload_date, From d39e0f05db226ef5691f5730d40da796aec6bac6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 9 May 2015 17:37:39 +0800 Subject: [PATCH 0621/2721] [utils] Remove sanitize_url_path_consecutive_slashes() This function is used only in SohuIE, which is updated to use a new extraction logic. --- test/test_utils.py | 21 --------------------- youtube_dl/utils.py | 7 ------- 2 files changed, 28 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 032d3656a..86b110a7d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -40,7 +40,6 @@ from youtube_dl.utils import ( read_batch_urls, sanitize_filename, sanitize_path, - sanitize_url_path_consecutive_slashes, prepend_extension, replace_extension, shell_quote, @@ -176,26 +175,6 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc') - def test_sanitize_url_path_consecutive_slashes(self): - self.assertEqual( - sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'), - 'http://hostname/foo/bar/filename.html') - self.assertEqual( - sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'), - 'http://hostname/foo/bar/filename.html') - self.assertEqual( - sanitize_url_path_consecutive_slashes('http://hostname//'), - 'http://hostname/') - self.assertEqual( - sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'), - 'http://hostname/foo/bar/filename.html') - self.assertEqual( - sanitize_url_path_consecutive_slashes('http://hostname/'), - 'http://hostname/') - self.assertEqual( - sanitize_url_path_consecutive_slashes('http://hostname/abc//'), - 'http://hostname/abc/') - def test_prepend_extension(self): self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index de09b53b2..d73efcf25 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -327,13 +327,6 @@ def sanitize_path(s): return os.path.join(*sanitized_path) -def sanitize_url_path_consecutive_slashes(url): - """Collapses consecutive slashes in URLs' path""" - parsed_url = list(compat_urlparse.urlparse(url)) - parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2]) - return compat_urlparse.urlunparse(parsed_url) - - def orderedSet(iterable): """ Remove all duplicates from the input iterable """ res = [] From 5c0b2c16a80c509dbcee48f48da3de0bf9912cda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 9 May 2015 12:34:45 +0200 Subject: [PATCH 0622/2721] [vgtv] Escape '#' in _VALID_URL and remove empty newlines at the end In verbose mode, '#' is interpreted as the start of a comment. --- youtube_dl/extractor/vgtv.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index db7a4bdb1..eb2652fb1 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -17,7 +17,7 @@ class VGTVIE(InfoExtractor): (?Pvgtv|bt) (?: :| - \.no/(?:tv/)?#!/(?:video|live)/ + \.no/(?:tv/)?\#!/(?:video|live)/ ) (?P[0-9]+) ''' @@ -188,5 +188,3 @@ class BTVestlendingenIE(InfoExtractor): def _real_extract(self, url): return self.url_result('xstream:btno:%s' % self._match_id(url), 'Xstream') - - From 3b5f65a64c06859cdee0b93f319c80d5c116cedd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 9 May 2015 12:41:56 +0200 Subject: [PATCH 0623/2721] [mlb] Fix extraction of articles And move test from generic, since it's directly handled by MLBIE --- youtube_dl/extractor/generic.py | 13 ------------- youtube_dl/extractor/mlb.py | 14 +++++++++++++- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7c38bce7c..3d756e848 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -414,19 +414,6 @@ class GenericIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', }, }, - # MLB articles - { - 'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer', - 'md5': 'b190e70141fb9a1552a85426b4da1b5d', - 'info_dict': { - 'id': '75609783', - 'ext': 'mp4', - 'title': 'Must C: Pillar climbs for catch', - 'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run', - 'timestamp': 1429124820, - 'upload_date': '20150415', - } - }, # Wistia embed { 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 4e054fb53..40c9ecb35 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -82,6 +82,18 @@ class MLBIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', }, }, + { + 'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer', + 'md5': 'b190e70141fb9a1552a85426b4da1b5d', + 'info_dict': { + 'id': '75609783', + 'ext': 'mp4', + 'title': 'Must C: Pillar climbs for catch', + 'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run', + 'timestamp': 1429124820, + 'upload_date': '20150415', + } + }, { 'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb', 'only_matching': True, @@ -113,7 +125,7 @@ class MLBIE(InfoExtractor): video_path = mobj.group('path') webpage = self._download_webpage(url, video_path) video_id = self._search_regex( - r'data-videoid="(\d+)"', webpage, 'video id') + r'data-video-?id="(\d+)"', webpage, 'video id') detail = self._download_xml( 'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml' From d592b42f5c37c4b0d9f1587dd76f225b0287959f Mon Sep 17 00:00:00 2001 From: blissland Date: Sat, 9 May 2015 15:26:00 +0100 Subject: [PATCH 0624/2721] Updated two tests for BRIE --- youtube_dl/extractor/br.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 45ba51732..04a3ecd17 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -16,27 +16,27 @@ class BRIE(InfoExtractor): _TESTS = [ { - 'url': 'http://www.br.de/mediathek/video/sendungen/heimatsound/heimatsound-festival-2014-trailer-100.html', - 'md5': '93556dd2bcb2948d9259f8670c516d59', + 'url': 'http://www.br.de/mediathek/video/sendungen/abendschau/betriebliche-altersvorsorge-104.html', + 'md5': '83a0477cf0b8451027eb566d88b51106', 'info_dict': { - 'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a', + 'id': '48f656ef-287e-486f-be86-459122db22cc', 'ext': 'mp4', - 'title': 'Wenn das Traditions-Theater wackelt', - 'description': 'Heimatsound-Festival 2014: Wenn das Traditions-Theater wackelt', - 'duration': 34, - 'uploader': 'BR', - 'upload_date': '20140802', + 'title': 'Die böse Überraschung', + 'description': 'Betriebliche Altersvorsorge: Die böse Überraschung', + 'duration': 180, + 'uploader': 'Reinhard Weber', + 'upload_date': '20150422', } }, { - 'url': 'http://www.br.de/nachrichten/schaeuble-haushaltsentwurf-bundestag-100.html', - 'md5': '3db0df1a9a9cd9fa0c70e6ea8aa8e820', + 'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html', + 'md5': 'a44396d73ab6a68a69a568fae10705bb', 'info_dict': { - 'id': 'c6aae3de-2cf9-43f2-957f-f17fef9afaab', - 'ext': 'aac', - 'title': '"Keine neuen Schulden im nächsten Jahr"', - 'description': 'Haushaltsentwurf: "Keine neuen Schulden im nächsten Jahr"', - 'duration': 64, + 'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05', + 'ext': 'mp4', + 'title': 'Manfred Schreiber ist tot', + 'description': 'Abendschau kompakt: Manfred Schreiber ist tot', + 'duration': 26, } }, { From 0892090a56e04726175c247d13ecce7f6c9cb839 Mon Sep 17 00:00:00 2001 From: blissland Date: Sat, 9 May 2015 16:02:07 +0100 Subject: [PATCH 0625/2721] Added audio test for BRIE --- youtube_dl/extractor/br.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 04a3ecd17..66e394e10 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -39,6 +39,17 @@ class BRIE(InfoExtractor): 'duration': 26, } }, + { + 'url': 'http://www.br.de/radio/br-klassik/sendungen/allegro/premiere-urauffuehrung-the-land-2015-dance-festival-muenchen-100.html', + 'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d', + 'info_dict': { + 'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b', + 'ext': 'aac', + 'title': 'Kurzweilig und sehr bewegend', + 'description': '"The Land" von Peeping Tom: Kurzweilig und sehr bewegend', + 'duration': 296, + } + }, { 'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html', 'md5': 'dbab0aef2e047060ea7a21fc1ce1078a', From 32fffff2ccc044c639c8723281981aa347423762 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 May 2015 21:19:09 +0600 Subject: [PATCH 0626/2721] [eroprofile] Fix video URL extraction (Closes #5657) --- youtube_dl/extractor/eroprofile.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py index 0cbca90b0..316033cf1 100644 --- a/youtube_dl/extractor/eroprofile.py +++ b/youtube_dl/extractor/eroprofile.py @@ -4,7 +4,10 @@ import re from .common import InfoExtractor from ..compat import compat_urllib_parse -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + unescapeHTML +) class EroProfileIE(InfoExtractor): @@ -75,8 +78,8 @@ class EroProfileIE(InfoExtractor): [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], webpage, 'video id', default=None) - video_url = self._search_regex( - r'([^<]+)', webpage, 'title') thumbnail = self._search_regex( From f2e0056579ac507b776ce2c86b5281fc28bbc275 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 May 2015 21:23:09 +0600 Subject: [PATCH 0627/2721] [vgtv] Avoid duplicate format_id --- youtube_dl/extractor/vgtv.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index eb2652fb1..e6ee1e471 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -103,11 +103,14 @@ class VGTVIE(InfoExtractor): hls_url = streams.get('hls') if hls_url: - formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4')) + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, 'mp4', m3u8_id='hls')) hds_url = streams.get('hds') if hds_url: - formats.extend(self._extract_f4m_formats(hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id)) + formats.extend(self._extract_f4m_formats( + hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', + video_id, f4m_id='hds')) mp4_url = streams.get('mp4') if mp4_url: From 480065172d4c97f00973b3f0bf24cd1b8e567627 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 May 2015 00:26:42 +0600 Subject: [PATCH 0628/2721] [lifenews] Add support for video URLs (Closes #5660) --- youtube_dl/extractor/lifenews.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 081016b80..92031e843 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -14,7 +14,7 @@ from ..utils import ( class LifeNewsIE(InfoExtractor): IE_NAME = 'lifenews' IE_DESC = 'LIFE | NEWS' - _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?news/(?P\d+)' + _VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P
    news|video)/(?P\d+)' _TESTS = [{ 'url': 'http://lifenews.ru/news/126342', @@ -55,12 +55,15 @@ class LifeNewsIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + section = mobj.group('section') - webpage = self._download_webpage('http://lifenews.ru/news/%s' % video_id, video_id, 'Downloading page') + webpage = self._download_webpage( + 'http://lifenews.ru/%s/%s' % (section, video_id), + video_id, 'Downloading page') videos = re.findall(r'[^"]+)".*?src="(?P', webpage) iframe_link = self._html_search_regex( - ']+src="([^"]+)', webpage, 'iframe link', default=None) + ']+src=["\']([^"\']+)["\']', webpage, 'iframe link', default=None) if not videos and not iframe_link: raise ExtractorError('No media links available for %s' % video_id) From 057ebeaca3da40b901b2592e2302a0d4bbab48e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 May 2015 00:27:49 +0600 Subject: [PATCH 0629/2721] [lifenews] Add test for #5660 --- youtube_dl/extractor/lifenews.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 92031e843..7d5b8621b 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -50,6 +50,9 @@ class LifeNewsIE(InfoExtractor): 'upload_date': '20150505', 'uploader': 'embed.life.ru', } + }, { + 'url': 'http://lifenews.ru/video/13035', + 'only_matching': True, }] def _real_extract(self, url): From 754270313a2b337eda98fa95232bd1064e294173 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 May 2015 01:03:26 +0600 Subject: [PATCH 0630/2721] [life:embed] Move to separated extractor and extract m3u8 formats --- youtube_dl/extractor/__init__.py | 5 +++- youtube_dl/extractor/lifenews.py | 47 ++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0a18dba5c..f117578a2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -258,7 +258,10 @@ from .letv import ( LetvPlaylistIE ) from .libsyn import LibsynIE -from .lifenews import LifeNewsIE +from .lifenews import ( + LifeNewsIE, + LifeEmbedIE, +) from .liveleak import LiveLeakIE from .livestream import ( LivestreamIE, diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 7d5b8621b..7f39fa4cf 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -4,7 +4,9 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( + determine_ext, int_or_none, unified_strdate, ExtractorError, @@ -119,3 +121,48 @@ class LifeNewsIE(InfoExtractor): return make_entry(video_id, videos[0]) else: return [make_entry(video_id, media, video_number + 1) for video_number, media in enumerate(videos)] + + +class LifeEmbedIE(InfoExtractor): + IE_NAME = 'life:embed' + _VALID_URL = r'http://embed\.life\.ru/embed/(?P[\da-f]{32})' + + _TEST = { + 'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291', + 'md5': 'b889715c9e49cb1981281d0e5458fbbe', + 'info_dict': { + 'id': 'e50c2dec2867350528e2574c899b8291', + 'ext': 'mp4', + 'title': 'e50c2dec2867350528e2574c899b8291', + 'thumbnail': 're:http://.*\.jpg', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + formats = [] + for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage): + video_url = compat_urlparse.urljoin(url, video_url) + ext = determine_ext(video_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id='m3u8')) + else: + formats.append({ + 'url': video_url, + 'format_id': ext, + 'preference': 1, + }) + + thumbnail = self._search_regex( + r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None) + + return { + 'id': video_id, + 'title': video_id, + 'thumbnail': thumbnail, + 'formats': formats, + } From 69fe3a5f0961c0ae602da531d2b0fb3f11b9d7c9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 10 May 2015 01:05:24 +0200 Subject: [PATCH 0631/2721] release 2015.05.10 --- docs/supportedsites.md | 9 ++++++++- youtube_dl/version.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 6d2e496a8..98b625380 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -64,6 +64,8 @@ - **BR**: Bayerischer Rundfunk Mediathek - **Break** - **Brightcove** + - **bt:article**: Bergens Tidende Articles + - **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **BuzzFeed** - **BYUtv** - **Camdemy** @@ -240,6 +242,7 @@ - **LetvPlaylist** - **LetvTv** - **Libsyn** + - **life:embed** - **lifenews**: LIFE | NEWS - **LiveLeak** - **livestream** @@ -328,6 +331,7 @@ - **ntv.ru** - **Nuvid** - **NYTimes** + - **NYTimesArticle** - **ocw.mit.edu** - **Odnoklassniki** - **OktoberfestTV** @@ -434,6 +438,7 @@ - **southpark.cc.com** - **southpark.cc.com:español** - **southpark.de** + - **southpark.nl** - **Space** - **SpankBang** - **Spankwire** @@ -453,6 +458,7 @@ - **StreamCZ** - **StreetVoice** - **SunPorno** + - **SVT** - **SVTPlay**: SVT Play and Öppet arkiv - **SWRMediathek** - **Syfy** @@ -529,7 +535,7 @@ - **Vessel** - **Vesti**: Вести.Ru - **Vevo** - - **VGTV** + - **VGTV**: VGTV and BTTV - **vh1.com** - **Vice** - **Viddler** @@ -587,6 +593,7 @@ - **XHamster** - **XMinus** - **XNXX** + - **Xstream** - **XTube** - **XTubeUser**: XTube user profile - **Xuite** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a5a81bcd2..83c5a1659 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.05.04' +__version__ = '2015.05.10' From 3800b908b1976f242b41d5a2d114418559ce3b48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 May 2015 06:14:34 +0600 Subject: [PATCH 0632/2721] [mlb] Fix #5663 --- youtube_dl/extractor/mlb.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 40c9ecb35..e242b897f 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -12,13 +12,13 @@ from ..utils import ( class MLBIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// - m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/ + (?:[\da-z_-]+\.)*mlb\.com/ (?: (?: (?:.*?/)?video/(?:topic/[\da-z_-]+/)?v| (?: shared/video/embed/(?:embed|m-internal-embed)\.html| - [^/]+/video/play\.jsp + (?:[^/]+/)+(?:play|index)\.jsp| )\?.*?\bcontent_id= ) (?Pn?\d+)| @@ -114,6 +114,10 @@ class MLBIE(InfoExtractor): # From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer 'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb', 'only_matching': True, + }, + { + 'url': 'http://washington.nationals.mlb.com/mlb/gameday/index.jsp?c_id=was&gid=2015_05_09_atlmlb_wasmlb_1&lang=en&content_id=108309983&mode=video#', + 'only_matching': True, } ] @@ -125,7 +129,7 @@ class MLBIE(InfoExtractor): video_path = mobj.group('path') webpage = self._download_webpage(url, video_path) video_id = self._search_regex( - r'data-video-?id="(\d+)"', webpage, 'video id') + [r'data-video-?id="(\d+)"', r'content_id=(\d+)'], webpage, 'video id') detail = self._download_xml( 'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml' From c6ddbdb66c5d6ead5e198013c54ef53d641063f1 Mon Sep 17 00:00:00 2001 From: Duncan Date: Sun, 10 May 2015 12:30:07 +1200 Subject: [PATCH 0633/2721] [voicerepublic] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/voicerepublic.py | 55 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/voicerepublic.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f117578a2..5cb3c304d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -634,6 +634,7 @@ from .vk import ( VKUserVideosIE, ) from .vodlocker import VodlockerIE +from .voicerepublic import VoiceRepublicIE from .vporn import VpornIE from .vrt import VRTIE from .vube import VubeIE diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py new file mode 100644 index 000000000..1a90693cb --- /dev/null +++ b/youtube_dl/extractor/voicerepublic.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..compat import ( + compat_urllib_request, +) + + +class VoiceRepublicIE(InfoExtractor): + _VALID_URL = r'https?://voicerepublic\.com/talks/(?P[0-9a-z-]+)' + _TEST = { + 'url': 'https://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', + 'md5': '0554a24d1657915aa8e8f84e15dc9353', + 'info_dict': { + 'id': '2296', + 'ext': 'm4a', + 'title': 'Watching the Watchers: Building a Sousveillance State', + 'thumbnail': 'https://voicerepublic.com/system/flyer/2296.png', + 'description': 'md5:715ba964958afa2398df615809cfecb1', + 'creator': 'M. C. McGrath', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + req = compat_urllib_request.Request(url) + # Older versions of Firefox get redirected to an "upgrade browser" page + req.add_header('User-Agent', 'youtube-dl') + webpage = self._download_webpage(req, display_id) + thumbnail = self._og_search_thumbnail(webpage) + video_id = self._search_regex(r'/(\d+)\.png', thumbnail, 'id') + + if '
    ', webpage, 'author', fatal=False), + } From 95eb1adda8692a61db639fb21344ad22d1847044 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 May 2015 08:54:50 +0600 Subject: [PATCH 0634/2721] [life:embed] Sort formats --- youtube_dl/extractor/lifenews.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 7f39fa4cf..42cb6e35f 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -156,6 +156,7 @@ class LifeEmbedIE(InfoExtractor): 'format_id': ext, 'preference': 1, }) + self._sort_formats(formats) thumbnail = self._search_regex( r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None) From f900dc3fb9e17e399b0f33925ee239696cc46010 Mon Sep 17 00:00:00 2001 From: Duncan Date: Sun, 10 May 2015 15:01:58 +1200 Subject: [PATCH 0635/2721] [voicerepublic] Extract author using _html_search_meta --- youtube_dl/extractor/voicerepublic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py index 1a90693cb..7d255d6fa 100644 --- a/youtube_dl/extractor/voicerepublic.py +++ b/youtube_dl/extractor/voicerepublic.py @@ -51,5 +51,5 @@ class VoiceRepublicIE(InfoExtractor): 'url': self._og_search_url(webpage), 'thumbnail': thumbnail, 'description': self._og_search_description(webpage), - 'creator': self._search_regex(r'', webpage, 'author', fatal=False), + 'creator': self._html_search_meta('author', webpage), } From 03f760b1c0478c1f65cf6e978d7592be46873313 Mon Sep 17 00:00:00 2001 From: Duncan Date: Sun, 10 May 2015 15:40:09 +1200 Subject: [PATCH 0636/2721] [voicerepublic] Remove creator field --- youtube_dl/extractor/voicerepublic.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py index 7d255d6fa..960974e16 100644 --- a/youtube_dl/extractor/voicerepublic.py +++ b/youtube_dl/extractor/voicerepublic.py @@ -19,7 +19,6 @@ class VoiceRepublicIE(InfoExtractor): 'title': 'Watching the Watchers: Building a Sousveillance State', 'thumbnail': 'https://voicerepublic.com/system/flyer/2296.png', 'description': 'md5:715ba964958afa2398df615809cfecb1', - 'creator': 'M. C. McGrath', } } @@ -51,5 +50,4 @@ class VoiceRepublicIE(InfoExtractor): 'url': self._og_search_url(webpage), 'thumbnail': thumbnail, 'description': self._og_search_description(webpage), - 'creator': self._html_search_meta('author', webpage), } From f03a8a3c4ec4dc95164c12181ffc1ddcb7583ef6 Mon Sep 17 00:00:00 2001 From: Duncan Date: Sun, 10 May 2015 15:12:29 +1200 Subject: [PATCH 0637/2721] [voicerepublic] Raise ExtractorError if audio is still being processed --- youtube_dl/extractor/voicerepublic.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py index 960974e16..d3e35a815 100644 --- a/youtube_dl/extractor/voicerepublic.py +++ b/youtube_dl/extractor/voicerepublic.py @@ -2,10 +2,8 @@ from __future__ import unicode_literals from .common import InfoExtractor - -from ..compat import ( - compat_urllib_request, -) +from ..compat import compat_urllib_request +from ..utils import ExtractorError class VoiceRepublicIE(InfoExtractor): @@ -31,17 +29,16 @@ class VoiceRepublicIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) video_id = self._search_regex(r'/(\d+)\.png', thumbnail, 'id') - if '
    Queued for processing, please stand by...' in webpage: + raise ExtractorError('Audio is still queued for processing') + + formats = [{ + 'url': 'https://voicerepublic.com/vrmedia/{}-clean.{}'.format(video_id, ext), + 'ext': ext, + 'format_id': ext, + 'vcodec': 'none', + } for ext in ['m4a', 'mp3', 'ogg']] + self._sort_formats(formats) return { 'id': video_id, From 28ebef0b1b1b7b97137fbd8e093c09cb51954606 Mon Sep 17 00:00:00 2001 From: Duncan Date: Sun, 10 May 2015 16:03:09 +1200 Subject: [PATCH 0638/2721] [voicerepublic] Detect list of available formats from the web page --- youtube_dl/extractor/voicerepublic.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py index d3e35a815..d150b5b5e 100644 --- a/youtube_dl/extractor/voicerepublic.py +++ b/youtube_dl/extractor/voicerepublic.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_urllib_request from ..utils import ExtractorError @@ -32,12 +34,15 @@ class VoiceRepublicIE(InfoExtractor): if 'Queued for processing, please stand by...' in webpage: raise ExtractorError('Audio is still queued for processing') + ext_matches = re.finditer(r'data-\w+=\'/vrmedia/\d+-clean\.(\w+)\'', webpage) + exts = [match.group(1) for match in ext_matches] + formats = [{ 'url': 'https://voicerepublic.com/vrmedia/{}-clean.{}'.format(video_id, ext), 'ext': ext, 'format_id': ext, 'vcodec': 'none', - } for ext in ['m4a', 'mp3', 'ogg']] + } for ext in exts] self._sort_formats(formats) return { From 1dcb52188d3709711b3ea5ae1ff6bdb985e79c62 Mon Sep 17 00:00:00 2001 From: Duncan Date: Sun, 10 May 2015 16:38:26 +1200 Subject: [PATCH 0639/2721] [voicerepublic] Remove hardcoded paths to media files --- youtube_dl/extractor/voicerepublic.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py index d150b5b5e..a3e40b940 100644 --- a/youtube_dl/extractor/voicerepublic.py +++ b/youtube_dl/extractor/voicerepublic.py @@ -34,15 +34,12 @@ class VoiceRepublicIE(InfoExtractor): if 'Queued for processing, please stand by...' in webpage: raise ExtractorError('Audio is still queued for processing') - ext_matches = re.finditer(r'data-\w+=\'/vrmedia/\d+-clean\.(\w+)\'', webpage) - exts = [match.group(1) for match in ext_matches] - formats = [{ - 'url': 'https://voicerepublic.com/vrmedia/{}-clean.{}'.format(video_id, ext), + 'url': 'https://voicerepublic.com' + path, 'ext': ext, 'format_id': ext, 'vcodec': 'none', - } for ext in exts] + } for ext, path in re.findall(r"data-([^=]+)='(/[^']+\.\1)'", webpage)] self._sort_formats(formats) return { From a909e6ad43f9d9661691739a810d7b8853e17175 Mon Sep 17 00:00:00 2001 From: ping Date: Sun, 10 May 2015 15:27:55 +0800 Subject: [PATCH 0640/2721] [dailymotion] Patch upload_date detection. (closes #5665) --- youtube_dl/extractor/dailymotion.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index aa595af20..db10b8d00 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -52,6 +52,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'ext': 'mp4', 'uploader': 'IGN', 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', + 'upload_date': '20150306', } }, # Vevo video @@ -106,9 +107,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor): age_limit = self._rta_search(webpage) video_upload_date = None - mobj = re.search(r'
    ([0-9]{2})-([0-9]{2})-([0-9]{4})
    ', webpage) + mobj = re.search(r'', webpage) if mobj is not None: - video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) + video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3) embed_url = 'https://www.dailymotion.com/embed/video/%s' % video_id embed_request = self._build_request(embed_url) From 1934f3a0eaf16ae1d1644178b7128806b8629867 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 10 May 2015 18:22:07 +0800 Subject: [PATCH 0641/2721] [ndr] Extended to support n-joy.de as well (closes #4527) According to http://en.wikipedia.org/wiki/N-Joy, n-joy.de is a service hosted by NDR, so I put them together. --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/ndr.py | 90 ++++++++++++++++++++------------ 2 files changed, 61 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f117578a2..66adb4de7 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -324,7 +324,10 @@ from .nbc import ( NBCSportsIE, NBCSportsVPlayerIE, ) -from .ndr import NDRIE +from .ndr import ( + NDRIE, + NJoyIE, +) from .ndtv import NDTVIE from .netzkino import NetzkinoIE from .nerdcubed import NerdCubedFeedIE diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index f49c66690..afb9eda27 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -8,41 +8,11 @@ from ..utils import ( ExtractorError, int_or_none, qualities, + parse_duration, ) -class NDRIE(InfoExtractor): - IE_NAME = 'ndr' - IE_DESC = 'NDR.de - Mediathek' - _VALID_URL = r'https?://www\.ndr\.de/.+?(?P\d+)\.html' - - _TESTS = [ - { - 'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html', - 'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c', - 'note': 'Video file', - 'info_dict': { - 'id': '25866', - 'ext': 'mp4', - 'title': 'Kartoffeltage in der Lewitz', - 'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8', - 'duration': 166, - } - }, - { - 'url': 'http://www.ndr.de/info/audio51535.html', - 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', - 'note': 'Audio file', - 'info_dict': { - 'id': '51535', - 'ext': 'mp3', - 'title': 'La Valette entgeht der Hinrichtung', - 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', - 'duration': 884, - } - } - ] - +class NDRBaseIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -54,7 +24,11 @@ class NDRIE(InfoExtractor): if description: description = description.strip() - duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', fatal=False)) + duration = int_or_none(self._html_search_regex(r'duration: (\d+),\n', page, 'duration', default=None)) + if not duration: + duration = parse_duration(self._html_search_regex( + r'(\d+:\d+)', + page, 'duration', default=None)) formats = [] @@ -92,3 +66,53 @@ class NDRIE(InfoExtractor): 'duration': duration, 'formats': formats, } + + +class NDRIE(NDRBaseIE): + IE_NAME = 'ndr' + IE_DESC = 'NDR.de - Mediathek' + _VALID_URL = r'https?://www\.ndr\.de/.+?(?P\d+)\.html' + + _TESTS = [ + { + 'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html', + 'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c', + 'note': 'Video file', + 'info_dict': { + 'id': '25866', + 'ext': 'mp4', + 'title': 'Kartoffeltage in der Lewitz', + 'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8', + 'duration': 166, + } + }, + { + 'url': 'http://www.ndr.de/info/audio51535.html', + 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', + 'note': 'Audio file', + 'info_dict': { + 'id': '51535', + 'ext': 'mp3', + 'title': 'La Valette entgeht der Hinrichtung', + 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', + 'duration': 884, + } + } + ] + + +class NJoyIE(NDRBaseIE): + IE_NAME = 'N-JOY' + _VALID_URL = r'https?://www\.n-joy\.de/.+?(?P\d+)\.html' + + _TEST = { + 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html', + 'md5': 'cb63be60cd6f9dd75218803146d8dc67', + 'info_dict': { + 'id': '2480', + 'ext': 'mp4', + 'title': 'Benaissa beim NDR Comedy Contest', + 'description': 'Von seinem sehr "behaarten" Leben lässt sich Benaissa trotz aller Schwierigkeiten nicht unterkriegen.', + 'duration': 654, + } + } From 63cbd19f500eb4d90c1fc7c09f04de5df43a6a04 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 10 May 2015 18:30:26 +0800 Subject: [PATCH 0642/2721] [ndr] Replace the 404 test case --- youtube_dl/extractor/ndr.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index afb9eda27..79a13958b 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -84,7 +84,19 @@ class NDRIE(NDRBaseIE): 'title': 'Kartoffeltage in der Lewitz', 'description': 'md5:48c4c04dde604c8a9971b3d4e3b9eaa8', 'duration': 166, - } + }, + 'skip': '404 Not found', + }, + { + 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', + 'md5': 'dadc003c55ae12a5d2f6bd436cd73f59', + 'info_dict': { + 'id': '988', + 'ext': 'mp4', + 'title': 'Party, Pötte und Parade', + 'description': 'Hunderttausende feiern zwischen Speicherstadt und St. Pauli den 826. Hafengeburtstag. Die NDR Sondersendung zeigt die schönsten und spektakulärsten Bilder vom Auftakt.', + 'duration': 3498, + }, }, { 'url': 'http://www.ndr.de/info/audio51535.html', From a6762c4a22325b5b69770de82df8725d2eb5c3df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 May 2015 18:29:15 +0600 Subject: [PATCH 0643/2721] [voicerepublic] Make more robust and extract more metadata --- youtube_dl/extractor/voicerepublic.py | 95 ++++++++++++++++++++------- 1 file changed, 71 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py index a3e40b940..1106c655b 100644 --- a/youtube_dl/extractor/voicerepublic.py +++ b/youtube_dl/extractor/voicerepublic.py @@ -1,52 +1,99 @@ -# coding: utf-8 from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_request -from ..utils import ExtractorError +from ..compat import ( + compat_urllib_request, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + determine_ext, + int_or_none, +) class VoiceRepublicIE(InfoExtractor): - _VALID_URL = r'https?://voicerepublic\.com/talks/(?P[0-9a-z-]+)' - _TEST = { - 'url': 'https://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', + _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P[0-9a-z-]+)' + _TESTS = [{ + 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', 'md5': '0554a24d1657915aa8e8f84e15dc9353', 'info_dict': { 'id': '2296', + 'display_id': 'watching-the-watchers-building-a-sousveillance-state', 'ext': 'm4a', 'title': 'Watching the Watchers: Building a Sousveillance State', - 'thumbnail': 'https://voicerepublic.com/system/flyer/2296.png', 'description': 'md5:715ba964958afa2398df615809cfecb1', + 'thumbnail': 're:^https?://.*\.(?:png|jpg)$', + 'duration': 1800, + 'view_count': int, } - } + }, { + 'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) - req = compat_urllib_request.Request(url) + + req = compat_urllib_request.Request( + compat_urlparse.urljoin(url, '/talks/%s' % display_id)) # Older versions of Firefox get redirected to an "upgrade browser" page req.add_header('User-Agent', 'youtube-dl') webpage = self._download_webpage(req, display_id) - thumbnail = self._og_search_thumbnail(webpage) - video_id = self._search_regex(r'/(\d+)\.png', thumbnail, 'id') - if 'Queued for processing, please stand by...' in webpage: - raise ExtractorError('Audio is still queued for processing') + if '>Queued for processing, please stand by...<' in webpage: + raise ExtractorError( + 'Audio is still queued for processing', expected=True) - formats = [{ - 'url': 'https://voicerepublic.com' + path, - 'ext': ext, - 'format_id': ext, - 'vcodec': 'none', - } for ext, path in re.findall(r"data-([^=]+)='(/[^']+\.\1)'", webpage)] + data = self._parse_json( + self._search_regex( + r'(?s)return ({.+?});\s*\n', webpage, + 'data', default=None), + display_id, fatal=False) + + if data: + title = data['title'] + description = data.get('teaser') + talk_id = data.get('talk_id') or display_id + talk = data['talk'] + duration = int_or_none(talk.get('duration')) + formats = [{ + 'url': compat_urlparse.urljoin(url, talk_url), + 'format_id': format_id, + 'ext': determine_ext(talk_url) or format_id, + 'vcodec': 'none', + } for format_id, talk_url in talk['links'].items()] + else: + title = self._og_search_title(webpage) + description = self._html_search_regex( + r"(?s)
    ]*>(.+?)
    ", + webpage, 'description', fatal=False) + talk_id = self._search_regex( + [r"id='jc-(\d+)'", r"data-shareable-id='(\d+)'"], + webpage, 'talk id', default=None) or display_id + duration = None + formats = [{ + 'url': compat_urlparse.urljoin(url, talk_url), + 'format_id': format_id, + 'ext': determine_ext(talk_url) or format_id, + 'vcodec': 'none', + } for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", webpage)] self._sort_formats(formats) + thumbnail = self._og_search_thumbnail(webpage) + view_count = int_or_none(self._search_regex( + r"class='play-count[^']*'>\s*(\d+) plays", + webpage, 'play count', fatal=False)) + return { - 'id': video_id, - 'title': self._og_search_title(webpage), - 'formats': formats, - 'url': self._og_search_url(webpage), + 'id': talk_id, + 'display_id': display_id, + 'title': title, + 'description': description, 'thumbnail': thumbnail, - 'description': self._og_search_description(webpage), + 'duration': duration, + 'view_count': view_count, + 'formats': formats, } From 370b39e8ece9f475d489eda721130eec9a9f15e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 May 2015 18:37:52 +0600 Subject: [PATCH 0644/2721] [voicerepublic] Fix fallback branch formats extraction --- youtube_dl/extractor/voicerepublic.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py index 1106c655b..254383d6c 100644 --- a/youtube_dl/extractor/voicerepublic.py +++ b/youtube_dl/extractor/voicerepublic.py @@ -47,12 +47,10 @@ class VoiceRepublicIE(InfoExtractor): raise ExtractorError( 'Audio is still queued for processing', expected=True) - data = self._parse_json( - self._search_regex( - r'(?s)return ({.+?});\s*\n', webpage, - 'data', default=None), - display_id, fatal=False) - + config = self._search_regex( + r'(?s)return ({.+?});\s*\n', webpage, + 'data', default=None) + data = self._parse_json(config, display_id, fatal=False) if config else None if data: title = data['title'] description = data.get('teaser') @@ -74,12 +72,14 @@ class VoiceRepublicIE(InfoExtractor): [r"id='jc-(\d+)'", r"data-shareable-id='(\d+)'"], webpage, 'talk id', default=None) or display_id duration = None + player = self._search_regex( + r"class='vr-player jp-jplayer'([^>]+)>", webpage, 'player') formats = [{ 'url': compat_urlparse.urljoin(url, talk_url), 'format_id': format_id, 'ext': determine_ext(talk_url) or format_id, 'vcodec': 'none', - } for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", webpage)] + } for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", player)] self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage) From 95c5534f8ed016a81f715f291ab09c4ea2c3679c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 10 May 2015 17:41:11 +0200 Subject: [PATCH 0645/2721] ExecAfterDownloadPP, YoutubeDL: remove unused parameters --- youtube_dl/YoutubeDL.py | 1 - youtube_dl/__init__.py | 2 -- youtube_dl/postprocessor/execafterdownload.py | 3 +-- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d8583a8eb..4cf83c510 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -260,7 +260,6 @@ class YoutubeDL(object): The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, otherwise prefer avconv. - exec_cmd: Arbitrary command to run after downloading """ params = None diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c88489f29..9cc9f851f 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -246,7 +246,6 @@ def _real_main(argv=None): if opts.exec_cmd: postprocessors.append({ 'key': 'ExecAfterDownload', - 'verboseOutput': opts.verbose, 'exec_cmd': opts.exec_cmd, }) if opts.xattr_set_filesize: @@ -345,7 +344,6 @@ def _real_main(argv=None): 'default_search': opts.default_search, 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest, 'encoding': opts.encoding, - 'exec_cmd': opts.exec_cmd, 'extract_flat': opts.extract_flat, 'merge_output_format': opts.merge_output_format, 'postprocessors': postprocessors, diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py index 341437575..765fd8fe4 100644 --- a/youtube_dl/postprocessor/execafterdownload.py +++ b/youtube_dl/postprocessor/execafterdownload.py @@ -8,8 +8,7 @@ from ..utils import PostProcessingError class ExecAfterDownloadPP(PostProcessor): - def __init__(self, downloader=None, verboseOutput=None, exec_cmd=None): - self.verboseOutput = verboseOutput + def __init__(self, downloader=None, exec_cmd=None): self.exec_cmd = exec_cmd def run(self, information): From 69b46b3d956220e4b3a3d5eda55768753a67f19d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 10 May 2015 17:47:49 +0200 Subject: [PATCH 0646/2721] ExecAfterDownloadPP: fix __init__ method --- youtube_dl/postprocessor/execafterdownload.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py index 765fd8fe4..13794b7ba 100644 --- a/youtube_dl/postprocessor/execafterdownload.py +++ b/youtube_dl/postprocessor/execafterdownload.py @@ -8,7 +8,8 @@ from ..utils import PostProcessingError class ExecAfterDownloadPP(PostProcessor): - def __init__(self, downloader=None, exec_cmd=None): + def __init__(self, downloader, exec_cmd): + super(ExecAfterDownloadPP, self).__init__(downloader) self.exec_cmd = exec_cmd def run(self, information): From 70484b9f8ae629ccb87e8c0569f8f4bf2dfdb0ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 11 May 2015 00:26:39 +0600 Subject: [PATCH 0647/2721] [postprocessor/ffmpeg] Extract `check_outdated` method --- youtube_dl/postprocessor/ffmpeg.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 214de39f9..211faf69a 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -36,7 +36,9 @@ class FFmpegPostProcessor(PostProcessor): def check_version(self): if not self.available: raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.') + self.check_outdated() + def check_outdated(self): required_version = '10-0' if self.basename == 'avconv' else '1.0' if is_outdated_version( self._versions[self.basename], required_version): @@ -44,6 +46,8 @@ class FFmpegPostProcessor(PostProcessor): self.basename, self.basename, required_version) if self._downloader: self._downloader.report_warning(warning) + return True + return False @staticmethod def get_versions(downloader=None): From 7fcb605b82796e79a5f559624808ca9404df1154 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 11 May 2015 00:27:29 +0600 Subject: [PATCH 0648/2721] [YoutubeDL] Fallback to `-f best` when merger is outdated --- youtube_dl/YoutubeDL.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4cf83c510..7c3bdb964 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1086,9 +1086,10 @@ class YoutubeDL(object): if req_format is None: req_format_list = [] if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' - and info_dict['extractor'] in ['youtube', 'ted'] - and FFmpegMergerPP(self).available): - req_format_list.append('bestvideo+bestaudio') + and info_dict['extractor'] in ['youtube', 'ted']): + merger = FFmpegMergerPP(self) + if merger.available and not merger.check_outdated(): + req_format_list.append('bestvideo+bestaudio') req_format_list.append('best') req_format = '/'.join(req_format_list) formats_to_download = [] From 13763ce599c8fbba43e57d2d79a9b007cfbd4ced Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 11 May 2015 02:00:31 +0600 Subject: [PATCH 0649/2721] [postprocessor/ffmpeg] Add `can_merge` method --- youtube_dl/postprocessor/ffmpeg.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 211faf69a..cc65b34e7 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -36,9 +36,7 @@ class FFmpegPostProcessor(PostProcessor): def check_version(self): if not self.available: raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.') - self.check_outdated() - def check_outdated(self): required_version = '10-0' if self.basename == 'avconv' else '1.0' if is_outdated_version( self._versions[self.basename], required_version): @@ -46,8 +44,6 @@ class FFmpegPostProcessor(PostProcessor): self.basename, self.basename, required_version) if self._downloader: self._downloader.report_warning(warning) - return True - return False @staticmethod def get_versions(downloader=None): @@ -595,6 +591,23 @@ class FFmpegMergerPP(FFmpegPostProcessor): os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return info['__files_to_merge'], info + def can_merge(self): + # TODO: figure out merge-capable ffmpeg version + if self.basename != 'avconv': + return True + + required_version = '10-0' + if is_outdated_version( + self._versions[self.basename], required_version): + warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, ' + 'youtube-dl will download single file media. ' + 'Update %s to version %s or newer to fix this.') % ( + self.basename, self.basename, required_version) + if self._downloader: + self._downloader.report_warning(warning) + return False + return True + class FFmpegFixupStretchedPP(FFmpegPostProcessor): def run(self, info): From 97fcf1bbd07ae0c5b6e530dcf2623d199452a76c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 11 May 2015 02:01:16 +0600 Subject: [PATCH 0650/2721] [YoutubeDL] Check if merger can actually merge --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 7c3bdb964..00f86b342 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1088,7 +1088,7 @@ class YoutubeDL(object): if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and info_dict['extractor'] in ['youtube', 'ted']): merger = FFmpegMergerPP(self) - if merger.available and not merger.check_outdated(): + if merger.available and merger.can_merge(): req_format_list.append('bestvideo+bestaudio') req_format_list.append('best') req_format = '/'.join(req_format_list) From e41f450f2860ab5aa3f3a04bc646594c6dbc6714 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 11 May 2015 20:04:05 +0800 Subject: [PATCH 0651/2721] [tmz] Add support for articles (fixes #5477) --- youtube_dl/extractor/__init__.py | 5 ++++- youtube_dl/extractor/tmz.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e808f2734..b376fd279 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -543,7 +543,10 @@ from .thesixtyone import TheSixtyOneIE from .thisav import ThisAVIE from .tinypic import TinyPicIE from .tlc import TlcIE, TlcDeIE -from .tmz import TMZIE +from .tmz import ( + TMZIE, + TMZArticleIE, +) from .tnaflix import TNAFlixIE from .thvideo import ( THVideoIE, diff --git a/youtube_dl/extractor/tmz.py b/youtube_dl/extractor/tmz.py index c5c6fdc51..7dbe68b5c 100644 --- a/youtube_dl/extractor/tmz.py +++ b/youtube_dl/extractor/tmz.py @@ -30,3 +30,31 @@ class TMZIE(InfoExtractor): 'description': self._og_search_description(webpage), 'thumbnail': self._html_search_meta('ThumbURL', webpage), } + + +class TMZArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tmz\.com/\d{4}/\d{2}/\d{2}/(?P[^/]+)/?' + _TEST = { + 'url': 'http://www.tmz.com/2015/04/19/bobby-brown-bobbi-kristina-awake-video-concert', + 'md5': 'e482a414a38db73087450e3a6ce69d00', + 'info_dict': { + 'id': '0_6snoelag', + 'ext': 'mp4', + 'title': 'Bobby Brown Tells Crowd ... Bobbi Kristina is Awake', + 'description': 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + embedded_video_info_str = self._html_search_regex( + r'tmzVideoEmbedV2\("([^)]+)"\);', webpage, 'embedded video info') + + embedded_video_info = self._parse_json( + embedded_video_info_str, video_id, + transform_source=lambda s: s.replace('\\', '')) + + return self.url_result( + 'http://www.tmz.com/videos/%s/' % embedded_video_info['id']) From 1f92865494c6efa1a0d5d90ffa849e85b80c8248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 11 May 2015 21:05:39 +0600 Subject: [PATCH 0652/2721] [dumpert] Add cpc cookie (Closes #5672) --- youtube_dl/extractor/dumpert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py index 9c594b757..999fb5620 100644 --- a/youtube_dl/extractor/dumpert.py +++ b/youtube_dl/extractor/dumpert.py @@ -26,7 +26,7 @@ class DumpertIE(InfoExtractor): video_id = self._match_id(url) req = compat_urllib_request.Request(url) - req.add_header('Cookie', 'nsfw=1') + req.add_header('Cookie', 'nsfw=1; cpc=10') webpage = self._download_webpage(req, video_id) files_base64 = self._search_regex( From 511565282861e26913caddc1bcc0c865a9eec786 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 11 May 2015 21:31:36 +0600 Subject: [PATCH 0653/2721] [zingmp3] Capture error message --- youtube_dl/extractor/zingmp3.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dl/extractor/zingmp3.py index 1afbe68ed..7dc1e2f2b 100644 --- a/youtube_dl/extractor/zingmp3.py +++ b/youtube_dl/extractor/zingmp3.py @@ -4,12 +4,18 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ExtractorError class ZingMp3BaseInfoExtractor(InfoExtractor): - @staticmethod - def _extract_item(item): + def _extract_item(self, item): + error_message = item.find('./errormessage').text + if error_message: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error_message), + expected=True) + title = item.find('./title').text.strip() source = item.find('./source').text extension = item.attrib['type'] From 81ed3bb9c0edb9a11b43964459ef57cca5683461 Mon Sep 17 00:00:00 2001 From: rrooij Date: Mon, 11 May 2015 17:38:08 +0200 Subject: [PATCH 0654/2721] [southpark] Sort alphabetically --- youtube_dl/extractor/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b376fd279..3368edf7c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -489,8 +489,8 @@ from .soundgasm import ( ) from .southpark import ( SouthParkIE, - SouthParkEsIE, SouthParkDeIE, + SouthParkEsIE, SouthParkNlIE ) from .space import SpaceIE From 968ee176777a1bf4e33cfb849a7241b0ac45d254 Mon Sep 17 00:00:00 2001 From: rrooij Date: Mon, 11 May 2015 18:02:25 +0200 Subject: [PATCH 0655/2721] [southparkdk] Add extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/southpark.py | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3368edf7c..de19dfd7a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -490,6 +490,7 @@ from .soundgasm import ( from .southpark import ( SouthParkIE, SouthParkDeIE, + SouthParkDkIE, SouthParkEsIE, SouthParkNlIE ) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 59e31198c..83e5a7659 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -57,3 +57,13 @@ class SouthParkNlIE(SouthParkIE): 'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free', 'playlist_count': 4, }] + +class SouthParkDkIE(SouthParkIE): + IE_NAME = 'southpark.dk' + _VALID_URL = r'https?://(?:www\.)?(?Psouthparkstudios\.dk/(?:clips|full-episodes)/(?P.+?)(\?|#|$))' + _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/' + + _TESTS = [{ + 'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop', + 'playlist_count': 4, + }] From 6d3f5935e516760964052718e6b90324c6f07391 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 11 May 2015 23:47:50 +0600 Subject: [PATCH 0656/2721] [southpark] Fix IE_NAME --- youtube_dl/extractor/southpark.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 83e5a7659..7fb165a87 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -58,8 +58,9 @@ class SouthParkNlIE(SouthParkIE): 'playlist_count': 4, }] + class SouthParkDkIE(SouthParkIE): - IE_NAME = 'southpark.dk' + IE_NAME = 'southparkstudios.dk' _VALID_URL = r'https?://(?:www\.)?(?Psouthparkstudios\.dk/(?:clips|full-episodes)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/' From d4b963d0a68f81f4fef5495af14e2e41add21a0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 12 May 2015 01:54:56 +0600 Subject: [PATCH 0657/2721] [vine] Relax `alt_title` (Closes #5677) --- youtube_dl/extractor/vine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index 65c459fad..c733a48fa 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -75,7 +75,7 @@ class VineIE(InfoExtractor): return { 'id': video_id, 'title': self._og_search_title(webpage), - 'alt_title': self._og_search_description(webpage), + 'alt_title': self._og_search_description(webpage, default=None), 'description': data['description'], 'thumbnail': data['thumbnailUrl'], 'upload_date': unified_strdate(data['created']), From 5332fd91bf16867b6777bd6cfd0b5086f84112c5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 12 May 2015 12:42:13 +0800 Subject: [PATCH 0658/2721] [nytimes] Correct _VALID_URL of NYTimesArticleIE --- youtube_dl/extractor/nytimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py index 6ffbe3863..7f254b867 100644 --- a/youtube_dl/extractor/nytimes.py +++ b/youtube_dl/extractor/nytimes.py @@ -89,7 +89,7 @@ class NYTimesIE(NYTimesBaseIE): class NYTimesArticleIE(NYTimesBaseIE): - _VALID_URL = r'https?://(?:www)?\.nytimes\.com/(.(?[^.]+)(?:\.html)?' + _VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?[^.]+)(?:\.html)?' _TESTS = [{ 'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0', 'md5': 'e2076d58b4da18e6a001d53fd56db3c9', From 7dff03636a843a6990e52200edb3ecca1246b3df Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 12 May 2015 12:47:37 +0800 Subject: [PATCH 0659/2721] [utils] Support 'dur' field in TTML --- test/test_utils.py | 2 +- youtube_dl/utils.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 86b110a7d..b40107037 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -600,7 +600,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')

    The following line contains Chinese characters and special symbols

    第二行
    ♪♪

    -

    Third
    Line

    +

    Third
    Line

    ''' diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d73efcf25..5439fcb35 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1866,10 +1866,14 @@ def dfxp2srt(dfxp_data): paras = dfxp.findall(_x('.//ttml:p')) for para, index in zip(paras, itertools.count(1)): + begin_time = parse_dfxp_time_expr(para.attrib['begin']) + end_time = parse_dfxp_time_expr(para.attrib.get('end')) + if not end_time: + end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur']) out.append('%d\n%s --> %s\n%s\n\n' % ( index, - format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))), - format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))), + format_srt_time(begin_time), + format_srt_time(end_time), parse_node(para))) return ''.join(out) From 1c7e2e64f6328024711d5fa999d4498396f4cb5c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 12 May 2015 12:55:14 +0800 Subject: [PATCH 0660/2721] [nrk] Remove TTML to srt conversion codes A common routine is implemented in utils.py and can be used via --convert-subtitles. --- youtube_dl/extractor/nrk.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index e91d3a248..cc70c2950 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, float_or_none, @@ -200,20 +199,10 @@ class NRKTVIE(InfoExtractor): url = "%s%s" % (baseurl, subtitlesurl) self._debug_print('%s: Subtitle url: %s' % (video_id, url)) captions = self._download_xml( - url, video_id, 'Downloading subtitles', - transform_source=lambda s: s.replace(r'
    ', '\r\n')) + url, video_id, 'Downloading subtitles') lang = captions.get('lang', 'no') - ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}')) - srt = '' - for pos, p in enumerate(ps): - begin = parse_duration(p.get('begin')) - duration = parse_duration(p.get('dur')) - starttime = self._subtitles_timecode(begin) - endtime = self._subtitles_timecode(begin + duration) - srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text) return {lang: [ {'ext': 'ttml', 'url': url}, - {'ext': 'srt', 'data': srt}, ]} def _extract_f4m(self, manifest_url, video_id): From c1c924abfeda45f29b991bb74f315f0e79dcf126 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 12 May 2015 13:04:54 +0800 Subject: [PATCH 0661/2721] [utils,common] Merge format_srt_time and _subtitles_timecode format_srt_time uses a comma as the delimiter between seconds and milliseconds while _subtitles_timecode uses a dot. All .srt examples I found on the Internet uses a comma, so I use a comma in the merged version. See http://matroska.org/technical/specs/subtitles/srt.html and http://devel.aegisub.org/wiki/SubtitleFormats/SRT --- youtube_dl/extractor/common.py | 3 --- youtube_dl/extractor/kanalplay.py | 5 +++-- youtube_dl/utils.py | 12 ++++-------- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 981e34bc7..65bb77086 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1072,9 +1072,6 @@ class InfoExtractor(object): def _get_automatic_captions(self, *args, **kwargs): raise NotImplementedError("This method must be implemented by subclasses") - def _subtitles_timecode(self, seconds): - return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) - class SearchInfoExtractor(InfoExtractor): """ diff --git a/youtube_dl/extractor/kanalplay.py b/youtube_dl/extractor/kanalplay.py index 2bb078036..4597d1b96 100644 --- a/youtube_dl/extractor/kanalplay.py +++ b/youtube_dl/extractor/kanalplay.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, float_or_none, + srt_subtitles_timecode, ) @@ -39,8 +40,8 @@ class KanalPlayIE(InfoExtractor): '%s\r\n%s --> %s\r\n%s' % ( num, - self._subtitles_timecode(item['startMillis'] / 1000.0), - self._subtitles_timecode(item['endMillis'] / 1000.0), + srt_subtitles_timecode(item['startMillis'] / 1000.0), + srt_subtitles_timecode(item['endMillis'] / 1000.0), item['text'], ) for num, item in enumerate(subs, 1)) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5439fcb35..ed9ed9ed6 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1835,12 +1835,8 @@ def parse_dfxp_time_expr(time_expr): return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3)) -def format_srt_time(seconds): - (mins, secs) = divmod(seconds, 60) - (hours, mins) = divmod(mins, 60) - millisecs = (secs - int(secs)) * 1000 - secs = int(secs) - return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs) +def srt_subtitles_timecode(seconds): + return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) def dfxp2srt(dfxp_data): @@ -1872,8 +1868,8 @@ def dfxp2srt(dfxp_data): end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur']) out.append('%d\n%s --> %s\n%s\n\n' % ( index, - format_srt_time(begin_time), - format_srt_time(end_time), + srt_subtitles_timecode(begin_time), + srt_subtitles_timecode(end_time), parse_node(para))) return ''.join(out) From 41333b97b9471316cf0f395db59196e6571fc776 Mon Sep 17 00:00:00 2001 From: ping Date: Tue, 12 May 2015 22:35:16 +0800 Subject: [PATCH 0662/2721] [qqmusic] Add support for charts / top lists --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/qqmusic.py | 55 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index de19dfd7a..8ec0c1032 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -414,6 +414,7 @@ from .qqmusic import ( QQMusicIE, QQMusicSingerIE, QQMusicAlbumIE, + QQMusicToplistIE, ) from .quickvid import QuickVidIE from .r7 import R7IE diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 174c8e0ae..d4a85d8c3 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -9,6 +9,7 @@ from .common import InfoExtractor from ..utils import ( strip_jsonp, unescapeHTML, + js_to_json, ) from ..compat import compat_urllib_request @@ -168,3 +169,57 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): album_page, 'album details', default=None) return self.playlist_result(entries, mid, album_name, album_detail) + + +class QQMusicToplistIE(QQPlaylistBaseIE): + _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P(top|global)_[0-9]+)' + + _TESTS = [{ + 'url': 'http://y.qq.com/#type=toplist&p=global_12', + 'info_dict': { + 'id': 'global_12', + 'title': 'itunes榜', + }, + 'playlist_count': 10, + }, { + 'url': 'http://y.qq.com/#type=toplist&p=top_6', + 'info_dict': { + 'id': 'top_6', + 'title': 'QQ音乐巅峰榜·欧美', + }, + 'playlist_count': 100, + }] + + @staticmethod + def strip_qq_jsonp(code): + return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code)) + + def _real_extract(self, url): + list_id = self._match_id(url) + + list_type = list_id.split("_")[0] + num_id = list_id.split("_")[1] + + list_page = self._download_webpage("http://y.qq.com/y/static/toplist/index/%s.html" % list_id, list_id, 'Download toplist page') + entries = [] + if list_type == 'top': + list = self._download_json( + "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id, + list_id, note='Retrieve toplist json', errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp) + + for song in list['l']: + s = song['s'] + song_mid = s.split("|")[20] + entries.append(self.url_result( + 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', + song_mid)) + + elif list_type == 'global': + entries = self.get_entries_from_page(list_page) + + list_name = self._html_search_regex( + r'

    ([^\']+)

    ', list_page, 'top list name', + default=None) + list_desc = None + + return self.playlist_result(entries, list_id, list_name, list_desc) \ No newline at end of file From b480e7874b45862eae343ab8484aa43381cd28fa Mon Sep 17 00:00:00 2001 From: ping Date: Tue, 12 May 2015 22:41:37 +0800 Subject: [PATCH 0663/2721] [qqmusic] Fix code formatting --- youtube_dl/extractor/qqmusic.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index d4a85d8c3..bca4a8f90 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -200,12 +200,15 @@ class QQMusicToplistIE(QQPlaylistBaseIE): list_type = list_id.split("_")[0] num_id = list_id.split("_")[1] - list_page = self._download_webpage("http://y.qq.com/y/static/toplist/index/%s.html" % list_id, list_id, 'Download toplist page') + list_page = self._download_webpage( + "http://y.qq.com/y/static/toplist/index/%s.html" % list_id, + list_id, 'Download toplist page') entries = [] if list_type == 'top': list = self._download_json( "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id, - list_id, note='Retrieve toplist json', errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp) + list_id, note='Retrieve toplist json', errnote='Unable to get toplist json', + transform_source=self.strip_qq_jsonp) for song in list['l']: s = song['s'] @@ -222,4 +225,5 @@ class QQMusicToplistIE(QQPlaylistBaseIE): default=None) list_desc = None - return self.playlist_result(entries, list_id, list_name, list_desc) \ No newline at end of file + return self.playlist_result(entries, list_id, list_name, list_desc) + \ No newline at end of file From fd4eefed39595850b864d3be9711224e4e8e9dd4 Mon Sep 17 00:00:00 2001 From: ping Date: Wed, 13 May 2015 01:14:02 +0800 Subject: [PATCH 0664/2721] [qqmusic] Fix extraction for global list --- youtube_dl/extractor/qqmusic.py | 34 ++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index bca4a8f90..3401dcaef 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -188,6 +188,13 @@ class QQMusicToplistIE(QQPlaylistBaseIE): 'title': 'QQ音乐巅峰榜·欧美', }, 'playlist_count': 100, + }, { + 'url': 'http://y.qq.com/#type=toplist&p=global_5', + 'info_dict': { + 'id': 'global_5', + 'title': '韩国mnet排行榜', + }, + 'playlist_count': 50, }] @staticmethod @@ -203,22 +210,23 @@ class QQMusicToplistIE(QQPlaylistBaseIE): list_page = self._download_webpage( "http://y.qq.com/y/static/toplist/index/%s.html" % list_id, list_id, 'Download toplist page') + entries = [] + jsonp_url = "" if list_type == 'top': - list = self._download_json( - "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id, - list_id, note='Retrieve toplist json', errnote='Unable to get toplist json', - transform_source=self.strip_qq_jsonp) - - for song in list['l']: - s = song['s'] - song_mid = s.split("|")[20] - entries.append(self.url_result( - 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', - song_mid)) - + jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id elif list_type == 'global': - entries = self.get_entries_from_page(list_page) + jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id + + list = self._download_json(jsonp_url, list_id, note='Retrieve toplist json', + errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp) + + for song in list['l']: + s = song['s'] + song_mid = s.split("|")[20] + entries.append(self.url_result( + 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', + song_mid)) list_name = self._html_search_regex( r'

    ([^\']+)

    ', list_page, 'top list name', From 86ec1e487c4908f4d0d0ece512007a2e5fedc593 Mon Sep 17 00:00:00 2001 From: ping Date: Wed, 13 May 2015 01:37:56 +0800 Subject: [PATCH 0665/2721] [qqmusic] Code fixes --- youtube_dl/extractor/qqmusic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 3401dcaef..bae2ce31a 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -212,10 +212,9 @@ class QQMusicToplistIE(QQPlaylistBaseIE): list_id, 'Download toplist page') entries = [] - jsonp_url = "" if list_type == 'top': jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id - elif list_type == 'global': + else: jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id list = self._download_json(jsonp_url, list_id, note='Retrieve toplist json', From 0b4253fa3710c656e12b6147ed7c1f7843bb9aae Mon Sep 17 00:00:00 2001 From: blissland Date: Tue, 12 May 2015 18:57:06 +0100 Subject: [PATCH 0666/2721] [BYUtvIE] Change thumbnail regex so test does not fail --- youtube_dl/extractor/byutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 6252be05b..3b2de517e 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -16,7 +16,7 @@ class BYUtvIE(InfoExtractor): 'ext': 'mp4', 'description': 'md5:5438d33774b6bdc662f9485a340401cc', 'title': 'Season 5 Episode 5', - 'thumbnail': 're:^https?://.*promo.*' + 'thumbnail': 're:^https?://.*\.jpg$' }, 'params': { 'skip_download': True, From 3749e36e9f0e6be2a3a3ab1b15c0c02be5a50e2f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 13 May 2015 21:16:45 +0800 Subject: [PATCH 0667/2721] [YoutubeDL] Fix PEP8 W503 --- youtube_dl/YoutubeDL.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4cf83c510..84d50dab7 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1085,9 +1085,9 @@ class YoutubeDL(object): req_format = self.params.get('format') if req_format is None: req_format_list = [] - if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' - and info_dict['extractor'] in ['youtube', 'ted'] - and FFmpegMergerPP(self).available): + if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and + info_dict['extractor'] in ['youtube', 'ted'] and + FFmpegMergerPP(self).available): req_format_list.append('bestvideo+bestaudio') req_format_list.append('best') req_format = '/'.join(req_format_list) From 372744c544ec3de1b35583e7d6fc2cbc4cc39f3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 13 May 2015 22:26:30 +0600 Subject: [PATCH 0668/2721] [odnoklassniki] Fix extraction (Closes #5671) --- youtube_dl/extractor/odnoklassniki.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 155d0ee6a..fbc521d1a 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -6,6 +6,7 @@ from ..utils import ( unified_strdate, int_or_none, qualities, + unescapeHTML, ) @@ -36,8 +37,8 @@ class OdnoklassnikiIE(InfoExtractor): webpage = self._download_webpage(url, video_id) player = self._parse_json( - self._search_regex( - r"OKVideo\.start\(({.+?})\s*,\s*'VideoAutoplay_player'", webpage, 'player'), + unescapeHTML(self._search_regex( + r'data-attributes="([^"]+)"', webpage, 'player')), video_id) metadata = self._parse_json(player['flashvars']['metadata'], video_id) From 8e595397529abc71093264e3695fb00d95be4d78 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 14 May 2015 02:32:00 +0800 Subject: [PATCH 0669/2721] [postprocessor/embedthumbnail] Use thumbnails downloaded by YoutubeDL --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 8 +++++++- youtube_dl/postprocessor/embedthumbnail.py | 23 +++++++++++----------- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 84d50dab7..0fbfe9642 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1848,7 +1848,7 @@ class YoutubeDL(object): thumb_ext = determine_ext(t['url'], 'jpg') suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' - thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext + t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): self.to_screen('[%s] %s: Thumbnail %sis already present' % diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 9cc9f851f..ace17857c 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -240,7 +240,13 @@ def _real_main(argv=None): if opts.xattrs: postprocessors.append({'key': 'XAttrMetadata'}) if opts.embedthumbnail: - postprocessors.append({'key': 'EmbedThumbnail'}) + already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails + postprocessors.append({ + 'key': 'EmbedThumbnail', + 'already_have_thumbnail': already_have_thumbnail + }) + if not already_have_thumbnail: + opts.writethumbnail = True # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. # So if the user is able to remove the file before your postprocessor runs it might cause a few problems. if opts.exec_cmd: diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 4868a42fd..95c52f65f 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -7,11 +7,7 @@ import subprocess from .ffmpeg import FFmpegPostProcessor -from ..compat import ( - compat_urlretrieve, -) from ..utils import ( - determine_ext, check_executable, encodeFilename, PostProcessingError, @@ -25,26 +21,30 @@ class EmbedThumbnailPPError(PostProcessingError): class EmbedThumbnailPP(FFmpegPostProcessor): + def __init__(self, downloader=None, already_have_thumbnail=False): + super(EmbedThumbnailPP, self).__init__(downloader) + self._already_have_thumbnail = already_have_thumbnail + def run(self, info): filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') - temp_thumbnail = filename + '.' + determine_ext(info['thumbnail']) - if not info.get('thumbnail'): + if not info.get('thumbnails'): raise EmbedThumbnailPPError('Thumbnail was not found. Nothing to do.') - compat_urlretrieve(info['thumbnail'], temp_thumbnail) + thumbnail_filename = info['thumbnails'][-1]['filename'] if info['ext'] == 'mp3': options = [ - '-i', temp_thumbnail, '-c', 'copy', '-map', '0', '-map', '1', + '-i', thumbnail_filename, '-c', 'copy', '-map', '0', '-map', '1', '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"'] self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename) self.run_ffmpeg(filename, temp_filename, options) - os.remove(encodeFilename(temp_thumbnail)) + if not self._already_have_thumbnail: + os.remove(encodeFilename(thumbnail_filename)) os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) @@ -52,7 +52,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): if not check_executable('AtomicParsley', ['-v']): raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.') - cmd = ['AtomicParsley', filename, '--artwork', temp_thumbnail, '-o', temp_filename] + cmd = ['AtomicParsley', filename, '--artwork', thumbnail_filename, '-o', temp_filename] self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename) @@ -66,7 +66,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor): msg = stderr.decode('utf-8', 'replace').strip() raise EmbedThumbnailPPError(msg) - os.remove(encodeFilename(temp_thumbnail)) + if not self._already_have_thumbnail: + os.remove(encodeFilename(thumbnail_filename)) # for formats that don't support thumbnails (like 3gp) AtomicParsley # won't create to the temporary file if b'No changes' in stdout: From bb8ca1d112e95cd3fe48fff5af980a62a9db2572 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 14 May 2015 02:35:28 +0800 Subject: [PATCH 0670/2721] [postprocessor/embedthumbnail] Use run_ffmpeg_multiple_files --- youtube_dl/postprocessor/embedthumbnail.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 95c52f65f..4e08c2709 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -36,12 +36,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor): if info['ext'] == 'mp3': options = [ - '-i', thumbnail_filename, '-c', 'copy', '-map', '0', '-map', '1', + '-c', 'copy', '-map', '0', '-map', '1', '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"'] self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename) - self.run_ffmpeg(filename, temp_filename, options) + self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) if not self._already_have_thumbnail: os.remove(encodeFilename(thumbnail_filename)) From 2cc6d135479c5dbd6e715a1e767c5be163cd22ce Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 14 May 2015 04:41:30 +0800 Subject: [PATCH 0671/2721] [postprocessor/embedthumbnail] Encode arguments in calling AtomicParsley --- youtube_dl/postprocessor/embedthumbnail.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 4e08c2709..8f825f785 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -9,6 +9,7 @@ from .ffmpeg import FFmpegPostProcessor from ..utils import ( check_executable, + encodeArgument, encodeFilename, PostProcessingError, prepend_extension, @@ -52,7 +53,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor): if not check_executable('AtomicParsley', ['-v']): raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.') - cmd = ['AtomicParsley', filename, '--artwork', thumbnail_filename, '-o', temp_filename] + cmd = [encodeFilename('AtomicParsley', True), + encodeFilename(filename, True), + encodeArgument('--artwork'), + encodeFilename(thumbnail_filename, True), + encodeArgument('-o'), + encodeFilename(temp_filename, True)] self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename) From 86c7fdb17c0dcbff88a8daa131fddc57b6304b83 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 14 May 2015 14:26:47 +0800 Subject: [PATCH 0672/2721] [xattr] Enhance error handling to catch ENOSPC Fixes #5589 --- youtube_dl/postprocessor/xattrpp.py | 61 +++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py index 93d0abcf6..16f2966e9 100644 --- a/youtube_dl/postprocessor/xattrpp.py +++ b/youtube_dl/postprocessor/xattrpp.py @@ -3,18 +3,32 @@ from __future__ import unicode_literals import os import subprocess import sys +import errno from .common import PostProcessor -from ..compat import ( - subprocess_check_output -) from ..utils import ( check_executable, hyphenate_date, version_tuple, + PostProcessingError, + encodeArgument, + encodeFilename, ) +class XAttrMetadataError(PostProcessingError): + def __init__(self, code=None, msg='Unknown error'): + super(XAttrMetadataError, self).__init__(msg) + self.code = code + + # Parsing code and msg + if (self.code in (errno.ENOSPC, errno.EDQUOT) or + 'No space left' in self.msg or 'Disk quota excedded' in self.msg): + self.reason = 'NO_SPACE' + else: + self.reason = 'NOT_SUPPORTED' + + class XAttrMetadataPP(PostProcessor): # @@ -51,7 +65,10 @@ class XAttrMetadataPP(PostProcessor): raise ImportError def write_xattr(path, key, value): - return xattr.setxattr(path, key, value) + try: + xattr.set(path, key, value) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) except ImportError: if os.name == 'nt': @@ -62,8 +79,11 @@ class XAttrMetadataPP(PostProcessor): assert os.path.exists(path) ads_fn = path + ":" + key - with open(ads_fn, "wb") as f: - f.write(value) + try: + with open(ads_fn, "wb") as f: + f.write(value) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) else: user_has_setfattr = check_executable("setfattr", ['--version']) user_has_xattr = check_executable("xattr", ['-h']) @@ -71,12 +91,24 @@ class XAttrMetadataPP(PostProcessor): if user_has_setfattr or user_has_xattr: def write_xattr(path, key, value): + value = value.decode('utf-8') if user_has_setfattr: - cmd = ['setfattr', '-n', key, '-v', value, path] + executable = 'setfattr' + opts = ['-n', key, '-v', value] elif user_has_xattr: - cmd = ['xattr', '-w', key, value, path] + executable = 'xattr' + opts = ['-w', key, value] - subprocess_check_output(cmd) + cmd = ([encodeFilename(executable, True)] + + [encodeArgument(o) for o in opts] + + [encodeFilename(path, True)]) + + p = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + stdout, stderr = p.communicate() + stderr = stderr.decode('utf-8', 'replace') + if p.returncode != 0: + raise XAttrMetadataError(p.returncode, stderr) else: # On Unix, and can't find pyxattr, setfattr, or xattr. @@ -121,6 +153,13 @@ class XAttrMetadataPP(PostProcessor): return [], info - except (subprocess.CalledProcessError, OSError): - self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)") + except XAttrMetadataError as e: + if e.reason == 'NO_SPACE': + self._downloader.report_warning( + 'There\'s no disk space left or disk quota exceeded. ' + + 'Extended attributes are not written.') + else: + self._downloader.report_error( + 'This filesystem doesn\'t support extended attributes. ' + + '(You may have to enable them in your /etc/fstab)') return [], info From fbff30d2dbc6462c628384ea5960c2461e7cdcca Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 14 May 2015 14:51:00 +0800 Subject: [PATCH 0673/2721] [xattr] Catch 'Argument list too long' --- youtube_dl/postprocessor/xattrpp.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py index 16f2966e9..27e273000 100644 --- a/youtube_dl/postprocessor/xattrpp.py +++ b/youtube_dl/postprocessor/xattrpp.py @@ -25,6 +25,8 @@ class XAttrMetadataError(PostProcessingError): if (self.code in (errno.ENOSPC, errno.EDQUOT) or 'No space left' in self.msg or 'Disk quota excedded' in self.msg): self.reason = 'NO_SPACE' + elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: + self.reason = 'VALUE_TOO_LONG' else: self.reason = 'NOT_SUPPORTED' @@ -103,8 +105,11 @@ class XAttrMetadataPP(PostProcessor): [encodeArgument(o) for o in opts] + [encodeFilename(path, True)]) - p = subprocess.Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + try: + p = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + except EnvironmentError as e: + raise XAttrMetadataError(e.errno, e.strerror) stdout, stderr = p.communicate() stderr = stderr.decode('utf-8', 'replace') if p.returncode != 0: @@ -158,6 +163,9 @@ class XAttrMetadataPP(PostProcessor): self._downloader.report_warning( 'There\'s no disk space left or disk quota exceeded. ' + 'Extended attributes are not written.') + elif e.reason == 'VALUE_TOO_LONG': + self._downloader.report_warning( + 'Unable to write extended attributes due to too long values.') else: self._downloader.report_error( 'This filesystem doesn\'t support extended attributes. ' + From 509c630db8cdaff473f95805cda1ae350107e36b Mon Sep 17 00:00:00 2001 From: blissland Date: Thu, 14 May 2015 08:09:56 +0100 Subject: [PATCH 0674/2721] [CanalplusIE] Update tests that were no longer working --- youtube_dl/extractor/canalplus.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 1b14471e5..71801488a 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -25,14 +25,13 @@ class CanalplusIE(InfoExtractor): } _TESTS = [{ - 'url': 'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470', - 'md5': '3db39fb48b9685438ecf33a1078023e4', + 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092', 'info_dict': { - 'id': '922470', + 'id': '1263092', 'ext': 'flv', - 'title': 'Zapping - 26/08/13', - 'description': 'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013', - 'upload_date': '20130826', + 'title': 'Le Zapping - 13/05/15', + 'description': 'md5:09738c0d06be4b5d06a0940edb0da73f', + 'upload_date': '20150513', }, }, { 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', @@ -56,7 +55,6 @@ class CanalplusIE(InfoExtractor): 'skip': 'videos get deleted after a while', }, { 'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559', - 'md5': '65aa83ad62fe107ce29e564bb8712580', 'info_dict': { 'id': '1213714', 'ext': 'flv', From c827d4cfdb9ce47d13ccbec32d2b32dfb429ea8a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 14 May 2015 16:53:10 +0800 Subject: [PATCH 0675/2721] [xattr] Enhanced error messages on Windows --- youtube_dl/postprocessor/xattrpp.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py index 27e273000..7d88e1308 100644 --- a/youtube_dl/postprocessor/xattrpp.py +++ b/youtube_dl/postprocessor/xattrpp.py @@ -167,7 +167,10 @@ class XAttrMetadataPP(PostProcessor): self._downloader.report_warning( 'Unable to write extended attributes due to too long values.') else: - self._downloader.report_error( - 'This filesystem doesn\'t support extended attributes. ' + - '(You may have to enable them in your /etc/fstab)') + msg = 'This filesystem doesn\'t support extended attributes. ' + if os.name == 'nt': + msg += 'You need to use NTFS.' + else: + msg += '(You may have to enable them in your /etc/fstab)' + self._downloader.report_error(msg) return [], info From 7d57d2e18be416faa593364966ccf667243fd3ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 May 2015 14:59:27 +0600 Subject: [PATCH 0676/2721] [canalplus] Restore checksums in tests --- youtube_dl/extractor/canalplus.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 71801488a..699b4f7d0 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -26,6 +26,7 @@ class CanalplusIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092', + 'md5': 'b3481d7ca972f61e37420798d0a9d934', 'info_dict': { 'id': '1263092', 'ext': 'flv', @@ -55,6 +56,7 @@ class CanalplusIE(InfoExtractor): 'skip': 'videos get deleted after a while', }, { 'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559', + 'md5': 'f3a46edcdf28006598ffaf5b30e6a2d4', 'info_dict': { 'id': '1213714', 'ext': 'flv', From 82245a6de77f4755b063310258c5611c15f5ffbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 May 2015 15:21:27 +0600 Subject: [PATCH 0677/2721] [YoutubeDL] Restore filename for thumbnails --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4c8196d08..691f3e09f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1849,7 +1849,7 @@ class YoutubeDL(object): thumb_ext = determine_ext(t['url'], 'jpg') suffix = '_%s' % t['id'] if len(thumbnails) > 1 else '' thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else '' - thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext + t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)): self.to_screen('[%s] %s: Thumbnail %sis already present' % From fa6a16996e4a1aeee4e421b172efc6c351b1b123 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 14 May 2015 18:00:57 +0800 Subject: [PATCH 0678/2721] [worldstarhiphop] Support Android URLs (fixes #5629) --- youtube_dl/extractor/worldstarhiphop.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/worldstarhiphop.py b/youtube_dl/extractor/worldstarhiphop.py index d5c26a032..a3ea26feb 100644 --- a/youtube_dl/extractor/worldstarhiphop.py +++ b/youtube_dl/extractor/worldstarhiphop.py @@ -6,8 +6,8 @@ from .common import InfoExtractor class WorldStarHipHopIE(InfoExtractor): - _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/videos/video\.php\?v=(?P.*)' - _TEST = { + _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?v=(?P.*)' + _TESTS = [{ "url": "http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO", "md5": "9d04de741161603bf7071bbf4e883186", "info_dict": { @@ -15,7 +15,15 @@ class WorldStarHipHopIE(InfoExtractor): "ext": "mp4", "title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!" } - } + }, { + 'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO', + 'md5': 'dc1c76c83ecc4190bb1eb143899b87d3', + 'info_dict': { + 'id': 'wshh6a7q1ny0G34ZwuIO', + 'ext': 'mp4', + "title": "KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!" + } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -26,19 +34,22 @@ class WorldStarHipHopIE(InfoExtractor): return self.url_result('vevo:%s' % m_vevo_id.group(1), ie='Vevo') video_url = self._search_regex( - r'so\.addVariable\("file","(.*?)"\)', webpage, 'video URL') + [r'so\.addVariable\("file","(.*?)"\)', + r'
    \s*]+href="([^"]+)">'], + webpage, 'video URL') if 'youtube' in video_url: return self.url_result(video_url, ie='Youtube') video_title = self._html_search_regex( - r'(?s)
    \s*

    (.*?)

    ', + [r'(?s)
    \s*

    (.*?)

    ', + r']+class="tc-sp-pinned-title">(.*)'], webpage, 'title') # Getting thumbnail and if not thumbnail sets correct title for WSHH candy video. thumbnail = self._html_search_regex( r'rel="image_src" href="(.*)" />', webpage, 'thumbnail', - fatal=False) + default=None) if not thumbnail: _title = r'candytitles.*>(.*)' mobj = re.search(_title, webpage) From 7a012d5a16632a103466f9e9794dd98ad573ce88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 May 2015 16:39:35 +0600 Subject: [PATCH 0679/2721] [screenwavemedia] Add support for player2 URLs (Closes #5696) --- youtube_dl/extractor/screenwavemedia.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index 74fb1983a..d1ab66b32 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -11,7 +11,7 @@ from ..utils import ( class ScreenwaveMediaIE(InfoExtractor): - _VALID_URL = r'http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P.+)' + _VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P.+)' _TESTS = [{ 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', @@ -20,7 +20,10 @@ class ScreenwaveMediaIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - playerdata = self._download_webpage(url, video_id, 'Downloading player webpage') + + playerdata = self._download_webpage( + 'http://player.screenwavemedia.com/play/player.php?id=%s' % video_id, + video_id, 'Downloading player webpage') vidtitle = self._search_regex( r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/') @@ -99,7 +102,7 @@ class TeamFourIE(InfoExtractor): webpage = self._download_webpage(url, display_id) playerdata_url = self._search_regex( - r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', + r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', webpage, 'player data URL') video_title = self._html_search_regex( From 548897396158d7822020f45c10301e9ca3c46453 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 14 May 2015 23:25:43 +0800 Subject: [PATCH 0680/2721] [qqmusic] flake8 --- youtube_dl/extractor/qqmusic.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index bae2ce31a..5ddbb183e 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -173,7 +173,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): class QQMusicToplistIE(QQPlaylistBaseIE): _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P(top|global)_[0-9]+)' - + _TESTS = [{ 'url': 'http://y.qq.com/#type=toplist&p=global_12', 'info_dict': { @@ -200,7 +200,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): @staticmethod def strip_qq_jsonp(code): return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code)) - + def _real_extract(self, url): list_id = self._match_id(url) @@ -208,7 +208,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): num_id = list_id.split("_")[1] list_page = self._download_webpage( - "http://y.qq.com/y/static/toplist/index/%s.html" % list_id, + "http://y.qq.com/y/static/toplist/index/%s.html" % list_id, list_id, 'Download toplist page') entries = [] @@ -216,10 +216,11 @@ class QQMusicToplistIE(QQPlaylistBaseIE): jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id else: jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id - - list = self._download_json(jsonp_url, list_id, note='Retrieve toplist json', + + list = self._download_json( + jsonp_url, list_id, note='Retrieve toplist json', errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp) - + for song in list['l']: s = song['s'] song_mid = s.split("|")[20] @@ -233,4 +234,3 @@ class QQMusicToplistIE(QQPlaylistBaseIE): list_desc = None return self.playlist_result(entries, list_id, list_name, list_desc) - \ No newline at end of file From 29ea57283e473e94c72cf9cbc065c3c05a14830f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 14 May 2015 23:28:42 +0800 Subject: [PATCH 0681/2721] [qqmusic] Refactoring QQMusicToplistIE --- youtube_dl/extractor/qqmusic.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 5ddbb183e..59e93a1dd 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -204,8 +204,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): def _real_extract(self, url): list_id = self._match_id(url) - list_type = list_id.split("_")[0] - num_id = list_id.split("_")[1] + list_type, num_id = list_id.split("_") list_page = self._download_webpage( "http://y.qq.com/y/static/toplist/index/%s.html" % list_id, @@ -217,11 +216,11 @@ class QQMusicToplistIE(QQPlaylistBaseIE): else: jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id - list = self._download_json( + toplist_json = self._download_json( jsonp_url, list_id, note='Retrieve toplist json', errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp) - for song in list['l']: + for song in toplist_json['l']: s = song['s'] song_mid = s.split("|")[20] entries.append(self.url_result( @@ -231,6 +230,5 @@ class QQMusicToplistIE(QQPlaylistBaseIE): list_name = self._html_search_regex( r'

    ([^\']+)

    ', list_page, 'top list name', default=None) - list_desc = None - return self.playlist_result(entries, list_id, list_name, list_desc) + return self.playlist_result(entries, list_id, list_name) From 7ec676bb3dd6cba4b56fccb2d5aae08e66086b4e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 14 May 2015 23:32:36 +0800 Subject: [PATCH 0682/2721] [qqmusic] Add IE_NAME for all extractors --- youtube_dl/extractor/qqmusic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 59e93a1dd..13113820b 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -15,6 +15,7 @@ from ..compat import compat_urllib_request class QQMusicIE(InfoExtractor): + IE_NAME = 'qqmusic' _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P[0-9A-Za-z]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', @@ -97,6 +98,7 @@ class QQPlaylistBaseIE(InfoExtractor): class QQMusicSingerIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:singer' _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P[0-9A-Za-z]+)' _TEST = { 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', @@ -140,6 +142,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE): class QQMusicAlbumIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:album' _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P[0-9A-Za-z]+)' _TEST = { @@ -172,6 +175,7 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): class QQMusicToplistIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:toplist' _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P(top|global)_[0-9]+)' _TESTS = [{ From 1ae72fb23df709687091133602fd715ab6cb7b2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 May 2015 22:28:42 +0600 Subject: [PATCH 0683/2721] [soundcloud:user] Defer download link resolve (Closes #5248) Looks like final download links can expire before downloading process reach them. So, resolving download links right before actual downloading. --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 183ff50f4..c23c5ee0f 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -336,7 +336,7 @@ class SoundcloudUserIE(SoundcloudIE): if len(new_entries) == 0: self.to_screen('%s: End page received' % uploader) break - entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) + entries.extend(self.url_result(e['permalink_url'], 'Soundcloud') for e in new_entries) return { '_type': 'playlist', From 3a105f7b20e8a3f742ac86cc1a6b02935b831778 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 15 May 2015 02:17:22 +0800 Subject: [PATCH 0684/2721] [teamcoco] Rewrite preload data extraction Idea: "puncture" some consecutive fragments and check whether the b64decode result of a punctured string is a valid JSON or not. It's a O(N^3) algorithm, but should be fast for a small N (less than 30 fragments in all test cases) --- youtube_dl/extractor/teamcoco.py | 53 ++++++++++++++++---------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 95d58ddd0..410eb7d3a 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import base64 import binascii import re +import json from .common import InfoExtractor from ..utils import ( @@ -68,41 +69,39 @@ class TeamcocoIE(InfoExtractor): video_id = self._html_search_regex( self._VIDEO_ID_REGEXES, webpage, 'video id') - data = preload = None - preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage) - if preloads: - preload = max([(len(p), p) for p in preloads])[1] + data = None - if not preload: - preload = ''.join(re.findall(r'this\.push\("([^"]+)"\);', webpage)) + preload_codes = self._html_search_regex( + r'(function.+)setTimeout\(function\(\)\{playlist', + webpage, 'preload codes') + base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes) + base64_fragments.remove('init') - if not preload: - preload = self._html_search_regex([ - r'player,\[?"([^"]+)"\]?', r'player.init\(\[?"([^"]+)"\]?\)' - ], webpage.replace('","', ''), 'preload data', default=None) - - if not preload: - preload_codes = self._html_search_regex( - r'(function.+)setTimeout\(function\(\)\{playlist', - webpage, 'preload codes') - base64_fragments = re.findall(r'"([a-zA-z0-9+/=]+)"', preload_codes) - base64_fragments.remove('init') - for i in range(len(base64_fragments)): - cur_sequence = (''.join(base64_fragments[i:] + base64_fragments[:i])).encode('ascii') + def _check_sequence(cur_fragments): + if not cur_fragments: + return + for i in range(len(cur_fragments)): + cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii') try: raw_data = base64.b64decode(cur_sequence) - except (TypeError, binascii.Error): + if compat_ord(raw_data[0]) == compat_ord('{'): + return json.loads(raw_data.decode('utf-8')) + except (TypeError, binascii.Error, UnicodeDecodeError, ValueError): continue - if compat_ord(raw_data[0]) == compat_ord('{'): - data = self._parse_json(raw_data.decode('utf-8'), video_id, fatal=False) - if not preload and not data: - raise ExtractorError( - 'Preload information could not be extracted', expected=True) + def _check_data(): + for i in range(len(base64_fragments) + 1): + for j in range(i, len(base64_fragments) + 1): + data = _check_sequence(base64_fragments[:i] + base64_fragments[j:]) + if data: + return data + + self.to_screen('Try to compute possible data sequence. This may take some time.') + data = _check_data() if not data: - data = self._parse_json( - base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id) + raise ExtractorError( + 'Preload information could not be extracted', expected=True) formats = [] get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) From 12675275a1d2158fbe409361888569e4cb52ef07 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 15 May 2015 02:27:41 +0800 Subject: [PATCH 0685/2721] [teamcoco] Detect expired videos (#5626) --- youtube_dl/extractor/teamcoco.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 410eb7d3a..56be52638 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -62,7 +62,9 @@ class TeamcocoIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) display_id = mobj.group('display_id') - webpage = self._download_webpage(url, display_id) + webpage, urlh = self._download_webpage_handle(url, display_id) + if 'src=expired' in urlh.geturl(): + raise ExtractorError('This video is expired.', expected=True) video_id = mobj.group('video_id') if not video_id: From 2bc43303031215436b201e656094b60ab3ec7e9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 14 May 2015 23:41:27 +0200 Subject: [PATCH 0686/2721] [youtube:history] Fix extraction (fixes #5702) It uses the same method as YoutubeSubscriptionsIE, if other feed starts using it we should consider using base class. --- youtube_dl/extractor/youtube.py | 37 +++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0869c9fd4..e58184adc 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1667,13 +1667,42 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): return self._extract_playlist('WL') -class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): +class YoutubeHistoryIE(YoutubePlaylistIE): IE_NAME = 'youtube:history' IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory' - _FEED_NAME = 'history' - _PERSONAL_FEED = True - _PLAYLIST_TITLE = 'Youtube Watch History' + _TESTS = [] + + def _real_extract(self, url): + title = 'Youtube History' + page = self._download_webpage('https://www.youtube.com/feed/history', title) + + # The extraction process is the same as for playlists, but the regex + # for the video ids doesn't contain an index + ids = [] + more_widget_html = content_html = page + + for page_num in itertools.count(1): + matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html) + new_ids = orderedSet(matches) + ids.extend(new_ids) + + mobj = re.search(r'data-uix-load-more-href="/?(?P[^"]+)"', more_widget_html) + if not mobj: + break + + more = self._download_json( + 'https://youtube.com/%s' % mobj.group('more'), title, + 'Downloading page #%s' % page_num, + transform_source=uppercase_escape) + content_html = more['content_html'] + more_widget_html = more['load_more_widget_html'] + + return { + '_type': 'playlist', + 'title': title, + 'entries': self._ids_to_results(ids), + } class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): From c4fc559f45ea5c40409eab44867ff2b4f08976c2 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 15 May 2015 10:13:43 +0200 Subject: [PATCH 0687/2721] release 2015.05.15 --- docs/supportedsites.md | 11 ++++++++--- youtube_dl/version.py | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 98b625380..43fbe8b1d 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -291,6 +291,7 @@ - **MySpass** - **myvideo** - **MyVidster** + - **N-JOY** - **n-tv.de** - **NationalGeographic** - **Naver** @@ -368,9 +369,10 @@ - **prosiebensat1**: ProSiebenSat.1 Digital - **Puls4** - **Pyvideo** - - **QQMusic** - - **QQMusicAlbum** - - **QQMusicSinger** + - **qqmusic** + - **qqmusic:album** + - **qqmusic:singer** + - **qqmusic:toplist** - **QuickVid** - **R7** - **radio.de** @@ -439,6 +441,7 @@ - **southpark.cc.com:español** - **southpark.de** - **southpark.nl** + - **southparkstudios.dk** - **Space** - **SpankBang** - **Spankwire** @@ -492,6 +495,7 @@ - **tlc.com** - **tlc.de** - **TMZ** + - **TMZArticle** - **TNAFlix** - **tou.tv** - **Toypics**: Toypics user profile @@ -569,6 +573,7 @@ - **vk.com** - **vk.com:user-videos**: vk.com:All of a user's videos - **Vodlocker** + - **VoiceRepublic** - **Vporn** - **VRT** - **vube**: Vube.com diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 83c5a1659..38f00bc9b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.05.10' +__version__ = '2015.05.15' From 3884dcf313223040049e4153e0c398fbc36b5117 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 15 May 2015 14:03:00 +0200 Subject: [PATCH 0688/2721] YoutubeDL: ignore indexes from 'playlist_items' that are not in the list (fixes #5706) We ignore them instead of failing to match the behaviour of the 'playliststart' parameter. --- youtube_dl/YoutubeDL.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 691f3e09f..5df889945 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -759,7 +759,9 @@ class YoutubeDL(object): if isinstance(ie_entries, list): n_all_entries = len(ie_entries) if playlistitems: - entries = [ie_entries[i - 1] for i in playlistitems] + entries = [ + ie_entries[i - 1] for i in playlistitems + if -n_all_entries <= i - 1 < n_all_entries] else: entries = ie_entries[playliststart:playlistend] n_entries = len(entries) From e9eaf3fbcf497e76a55d2ba15d5880af83a065d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 15 May 2015 14:06:19 +0200 Subject: [PATCH 0689/2721] [test/YoutubeDL] Add tests for 'playliststart', 'playlistend' and 'playlist_items' --- test/test_YoutubeDL.py | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 82b827536..a13c09ef4 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -12,6 +12,7 @@ import copy from test.helper import FakeYDL, assertRegexpMatches from youtube_dl import YoutubeDL +from youtube_dl.compat import compat_str from youtube_dl.extractor import YoutubeIE from youtube_dl.postprocessor.common import PostProcessor from youtube_dl.utils import match_filter_func @@ -507,6 +508,51 @@ class TestYoutubeDL(unittest.TestCase): res = get_videos(f) self.assertEqual(res, ['1']) + def test_playlist_items_selection(self): + entries = [{ + 'id': compat_str(i), + 'title': compat_str(i), + 'url': TEST_URL, + } for i in range(1, 5)] + playlist = { + '_type': 'playlist', + 'id': 'test', + 'entries': entries, + 'extractor': 'test:playlist', + 'extractor_key': 'test:playlist', + 'webpage_url': 'http://example.com', + } + + def get_ids(params): + ydl = YDL(params) + # make a copy because the dictionary can be modified + ydl.process_ie_result(playlist.copy()) + return [int(v['id']) for v in ydl.downloaded_info_dicts] + + result = get_ids({}) + self.assertEqual(result, [1, 2, 3, 4]) + + result = get_ids({'playlistend': 10}) + self.assertEqual(result, [1, 2, 3, 4]) + + result = get_ids({'playlistend': 2}) + self.assertEqual(result, [1, 2]) + + result = get_ids({'playliststart': 10}) + self.assertEqual(result, []) + + result = get_ids({'playliststart': 2}) + self.assertEqual(result, [2, 3, 4]) + + result = get_ids({'playlist_items': '2-4'}) + self.assertEqual(result, [2, 3, 4]) + + result = get_ids({'playlist_items': '2,4'}) + self.assertEqual(result, [2, 4]) + + result = get_ids({'playlist_items': '10'}) + self.assertEqual(result, []) + if __name__ == '__main__': unittest.main() From 15da7ce7fb89203247f4c959a748281ecf353e2a Mon Sep 17 00:00:00 2001 From: blissland Date: Fri, 15 May 2015 12:28:10 +0100 Subject: [PATCH 0690/2721] Fix file format extraction regex and update test file checksum --- youtube_dl/extractor/ccc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py index 2a5d4be18..6924eac70 100644 --- a/youtube_dl/extractor/ccc.py +++ b/youtube_dl/extractor/ccc.py @@ -16,7 +16,7 @@ class CCCIE(InfoExtractor): _TEST = { 'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video', - 'md5': '205a365d0d57c0b1e43a12c9ffe8f9be', + 'md5': '3a1eda8f3a29515d27f5adb967d7e740', 'info_dict': { 'id': '20131228183', 'ext': 'mp4', @@ -51,7 +51,7 @@ class CCCIE(InfoExtractor): matches = re.finditer(r'''(?xs) <(?:span|div)\s+class='label\s+filetype'>(?P.*?)\s* - [^']+)'>\s* + [^']+)'>\s* (?: .*? [^']+\.torrent)' From a7b8467ac0baecd02a815b1f57731ae9bb10ab87 Mon Sep 17 00:00:00 2001 From: Vitaliy Syrchikov Date: Fri, 15 May 2015 16:52:11 +0400 Subject: [PATCH 0691/2721] Sportbox extractor fix. --- youtube_dl/extractor/sportbox.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index becdf658f..830220543 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -11,7 +11,7 @@ from ..utils import ( class SportBoxIE(InfoExtractor): - _VALID_URL = r'https?://news\.sportbox\.ru/Vidy_sporta/(?:[^/]+/)+spbvideo_NI\d+_(?P.+)' + _VALID_URL = r'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P.+)' _TESTS = [ { 'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', @@ -50,7 +50,7 @@ class SportBoxIE(InfoExtractor): display_id, 'Downloading player webpage') hls = self._search_regex( - r"var\s+original_hls_file\s*=\s*'([^']+)'", player, 'hls file') + r"sportboxPlayer\.jwplayer_common_params\.file\s*=\s*['\"]+([^\"]+)['\"]+", player, 'hls file') formats = self._extract_m3u8_formats(hls, display_id, 'mp4') From ae670a6ed8019f1b69bbe345621f51c8b32789ec Mon Sep 17 00:00:00 2001 From: Vitaliy Syrchikov Date: Fri, 15 May 2015 17:53:05 +0400 Subject: [PATCH 0692/2721] Sportbox source fix. HD videos support. --- youtube_dl/extractor/sportbox.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 830220543..695b3ff82 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -14,7 +14,7 @@ class SportBoxIE(InfoExtractor): _VALID_URL = r'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P.+)' _TESTS = [ { - 'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', + 'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', 'md5': 'ff56a598c2cf411a9a38a69709e97079', 'info_dict': { 'id': '80822', @@ -42,11 +42,15 @@ class SportBoxIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - r'src="/vdl/player/media/(\d+)"', webpage, 'video id') + sobj = re.search(r'src="/vdl/player/(?P\w+)/(?P\d+)"', webpage) + if (sobj): + video_id = sobj.group('video_id') + media_type = sobj.group('media_type') + else: + raise RegexNotFoundError('Unable to extract video_id') player = self._download_webpage( - 'http://news.sportbox.ru/vdl/player/media/%s' % video_id, + 'http://news.sportbox.ru/vdl/player/%s/%s' % (media_type, video_id), display_id, 'Downloading player webpage') hls = self._search_regex( From 25f14e9f93295a787e0cb436a5f6179d6174733d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 May 2015 21:06:59 +0600 Subject: [PATCH 0693/2721] [youtube] Separate feed extractor --- youtube_dl/extractor/youtube.py | 143 +++++++++----------------------- 1 file changed, 37 insertions(+), 106 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e58184adc..9096a2975 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -49,6 +49,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # YouTube sets the expire time to about two months expire_time=time.time() + 2 * 30 * 24 * 3600) + def _ids_to_results(self, ids): + return [ + self.url_result(vid_id, 'Youtube', video_id=vid_id) + for vid_id in ids] + def _login(self): """ Attempt to log in to YouTube. @@ -1261,11 +1266,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): def _real_initialize(self): self._login() - def _ids_to_results(self, ids): - return [ - self.url_result(vid_id, 'Youtube', video_id=vid_id) - for vid_id in ids] - def _extract_mix(self, playlist_id): # The mixes are generated from a single video # the id of the playlist is just 'RD' + video_id @@ -1601,20 +1601,10 @@ class YoutubeShowIE(InfoExtractor): class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): """ - Base class for extractors that fetch info from - http://www.youtube.com/feed_ajax + Base class for feed extractors Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. """ _LOGIN_REQUIRED = True - # use action_load_personal_feed instead of action_load_system_feed - _PERSONAL_FEED = False - - @property - def _FEED_TEMPLATE(self): - action = 'action_load_system_feed' - if self._PERSONAL_FEED: - action = 'action_load_personal_feed' - return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME) @property def IE_NAME(self): @@ -1624,58 +1614,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): self._login() def _real_extract(self, url): - feed_entries = [] - paging = 0 - for i in itertools.count(1): - info = self._download_json( - self._FEED_TEMPLATE % paging, - '%s feed' % self._FEED_NAME, - 'Downloading page %s' % i, - transform_source=uppercase_escape) - feed_html = info.get('feed_html') or info.get('content_html') - load_more_widget_html = info.get('load_more_widget_html') or feed_html - m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) - ids = orderedSet(m.group(1) for m in m_ids) - feed_entries.extend( - self.url_result(video_id, 'Youtube', video_id=video_id) - for video_id in ids) - mobj = re.search( - r'data-uix-load-more-href="/?[^"]+paging=(?P\d+)', - load_more_widget_html) - if mobj is None: - break - paging = mobj.group('paging') - return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) - - -class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): - IE_NAME = 'youtube:recommended' - IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' - _FEED_NAME = 'recommended' - _PLAYLIST_TITLE = 'Youtube Recommended videos' - - -class YoutubeWatchLaterIE(YoutubePlaylistIE): - IE_NAME = 'youtube:watchlater' - IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater' - - _TESTS = [] # override PlaylistIE tests - - def _real_extract(self, url): - return self._extract_playlist('WL') - - -class YoutubeHistoryIE(YoutubePlaylistIE): - IE_NAME = 'youtube:history' - IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' - _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory' - _TESTS = [] - - def _real_extract(self, url): - title = 'Youtube History' - page = self._download_webpage('https://www.youtube.com/feed/history', title) + page = self._download_webpage( + 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE) # The extraction process is the same as for playlists, but the regex # for the video ids doesn't contain an index @@ -1692,17 +1632,25 @@ class YoutubeHistoryIE(YoutubePlaylistIE): break more = self._download_json( - 'https://youtube.com/%s' % mobj.group('more'), title, + 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE, 'Downloading page #%s' % page_num, transform_source=uppercase_escape) content_html = more['content_html'] more_widget_html = more['load_more_widget_html'] - return { - '_type': 'playlist', - 'title': title, - 'entries': self._ids_to_results(ids), - } + return self.playlist_result( + self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE) + + +class YoutubeWatchLaterIE(YoutubePlaylistIE): + IE_NAME = 'youtube:watchlater' + IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' + _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater' + + _TESTS = [] # override PlaylistIE tests + + def _real_extract(self, url): + return self._extract_playlist('WL') class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): @@ -1717,42 +1665,25 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): return self.url_result(playlist_id, 'YoutubePlaylist') -class YoutubeSubscriptionsIE(YoutubePlaylistIE): - IE_NAME = 'youtube:subscriptions' +class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): + IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' + _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' + _FEED_NAME = 'recommended' + _PLAYLIST_TITLE = 'Youtube Recommended videos' + + +class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' - _TESTS = [] + _FEED_NAME = 'subscriptions' + _PLAYLIST_TITLE = 'Youtube Subscriptions' - def _real_extract(self, url): - title = 'Youtube Subscriptions' - page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title) - # The extraction process is the same as for playlists, but the regex - # for the video ids doesn't contain an index - ids = [] - more_widget_html = content_html = page - - for page_num in itertools.count(1): - matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html) - new_ids = orderedSet(matches) - ids.extend(new_ids) - - mobj = re.search(r'data-uix-load-more-href="/?(?P[^"]+)"', more_widget_html) - if not mobj: - break - - more = self._download_json( - 'https://youtube.com/%s' % mobj.group('more'), title, - 'Downloading page #%s' % page_num, - transform_source=uppercase_escape) - content_html = more['content_html'] - more_widget_html = more['load_more_widget_html'] - - return { - '_type': 'playlist', - 'title': title, - 'entries': self._ids_to_results(ids), - } +class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): + IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' + _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory' + _FEED_NAME = 'history' + _PLAYLIST_TITLE = 'Youtube History' class YoutubeTruncatedURLIE(InfoExtractor): From 62c95fd5fcb8dbea2faeb4edac4c5177cbac5912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 May 2015 21:42:34 +0600 Subject: [PATCH 0694/2721] [youtube:feed] Check each 'load more' portion for unique video ids --- youtube_dl/extractor/youtube.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9096a2975..1f9940cf5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1621,10 +1621,16 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): # for the video ids doesn't contain an index ids = [] more_widget_html = content_html = page - for page_num in itertools.count(1): matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html) - new_ids = orderedSet(matches) + + # 'recommended' feed has infinite 'load more' and each new portion spins + # the same videos in (sometimes) slightly different order, so we'll check + # for unicity and break when portion has no new videos + new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches)) + if not new_ids: + break + ids.extend(new_ids) mobj = re.search(r'data-uix-load-more-href="/?(?P[^"]+)"', more_widget_html) From e9ca615a9872e85a6986061fdf54257244ce1f77 Mon Sep 17 00:00:00 2001 From: Vitaliy Syrchikov Date: Fri, 15 May 2015 19:57:54 +0400 Subject: [PATCH 0695/2721] New test --- youtube_dl/extractor/sportbox.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 695b3ff82..cb1515eff 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -30,10 +30,29 @@ class SportBoxIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - }, { + }, + { + 'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355', + 'md5': 'ff56a598c2cf411a9a38a69709e97079', + 'info_dict': { + 'id': '211355', + 'ext': 'mp4', + 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', + 'description': '16 детских коллективов приняли участие в суперфинале турнира «Поле славы боевой».', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1426237001, + 'upload_date': '20150313', + 'duration': 292, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { 'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4', 'only_matching': True, - } + }, ] def _real_extract(self, url): From 34fe5a94baf9e7ea437de68621a5fa73780c0f17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 15 May 2015 18:42:59 +0200 Subject: [PATCH 0696/2721] [gamespot] Add support for videos that don't use 'f4m_stream' (fixes #5707) --- youtube_dl/extractor/gamespot.py | 60 +++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 47373e215..5927455f6 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -15,7 +15,7 @@ from ..utils import ( class GameSpotIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P\d+)/?' - _TEST = { + _TESTS = [{ 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', 'md5': 'b2a30deaa8654fcccd43713a6b6a4825', 'info_dict': { @@ -23,8 +23,16 @@ class GameSpotIE(InfoExtractor): 'ext': 'mp4', 'title': 'Arma 3 - Community Guide: SITREP I', 'description': 'Check out this video where some of the basics of Arma 3 is explained.', - } - } + }, + }, { + 'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/', + 'info_dict': { + 'id': 'gs-2300-6424837', + 'ext': 'flv', + 'title': 'The Witcher 3: Wild Hunt [Xbox ONE] - Now Playing', + 'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.', + }, + }] def _real_extract(self, url): page_id = self._match_id(url) @@ -32,25 +40,37 @@ class GameSpotIE(InfoExtractor): data_video_json = self._search_regex( r'data-video=["\'](.*?)["\']', webpage, 'data video') data_video = json.loads(unescapeHTML(data_video_json)) + streams = data_video['videoStreams'] - # Transform the manifest url to a link to the mp4 files - # they are used in mobile devices. - f4m_url = data_video['videoStreams']['f4m_stream'] - f4m_path = compat_urlparse.urlparse(f4m_url).path - QUALITIES_RE = r'((,\d+)+,?)' - qualities = self._search_regex(QUALITIES_RE, f4m_path, 'qualities').strip(',').split(',') - http_path = f4m_path[1:].split('/', 1)[1] - http_template = re.sub(QUALITIES_RE, r'%s', http_path) - http_template = http_template.replace('.csmil/manifest.f4m', '') - http_template = compat_urlparse.urljoin( - 'http://video.gamespotcdn.com/', http_template) formats = [] - for q in qualities: - formats.append({ - 'url': http_template % q, - 'ext': 'mp4', - 'format_id': q, - }) + f4m_url = streams.get('f4m_stream') + if f4m_url is not None: + # Transform the manifest url to a link to the mp4 files + # they are used in mobile devices. + f4m_path = compat_urlparse.urlparse(f4m_url).path + QUALITIES_RE = r'((,\d+)+,?)' + qualities = self._search_regex(QUALITIES_RE, f4m_path, 'qualities').strip(',').split(',') + http_path = f4m_path[1:].split('/', 1)[1] + http_template = re.sub(QUALITIES_RE, r'%s', http_path) + http_template = http_template.replace('.csmil/manifest.f4m', '') + http_template = compat_urlparse.urljoin( + 'http://video.gamespotcdn.com/', http_template) + for q in qualities: + formats.append({ + 'url': http_template % q, + 'ext': 'mp4', + 'format_id': q, + }) + else: + for quality in ['sd', 'hd']: + # It's actually a link to a flv file + flv_url = streams.get('f4m_{0}'.format(quality)) + if flv_url is not None: + formats.append({ + 'url': flv_url, + 'ext': 'flv', + 'format_id': quality, + }) return { 'id': data_video['guid'], From eeb23eb7ea6953d7e90ccf669cd0e636d10b2b91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 15 May 2015 18:44:08 +0200 Subject: [PATCH 0697/2721] [gamespot] The protocol is not optional --- youtube_dl/extractor/gamespot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 5927455f6..2d33fa7f5 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -14,7 +14,7 @@ from ..utils import ( class GameSpotIE(InfoExtractor): - _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P\d+)/?' + _VALID_URL = r'http://(?:www\.)?gamespot\.com/.*-(?P\d+)/?' _TESTS = [{ 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', 'md5': 'b2a30deaa8654fcccd43713a6b6a4825', From 3a7382950b6f498f50173c8813f6cb1db3739277 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 May 2015 22:50:44 +0600 Subject: [PATCH 0698/2721] [sportbox:embed] Add extractor --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/sportbox.py | 138 +++++++++++++++++-------------- 2 files changed, 82 insertions(+), 61 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8ec0c1032..f293bc2a4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -502,7 +502,10 @@ from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE from .spike import SpikeIE from .sport5 import Sport5IE -from .sportbox import SportBoxIE +from .sportbox import ( + SportBoxIE, + SportBoxEmbedIE, +) from .sportdeutschland import SportDeutschlandIE from .srf import SrfIE from .srmediathek import SRMediathekIE diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index cb1515eff..10c45eb74 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( parse_duration, parse_iso8601, @@ -12,48 +13,30 @@ from ..utils import ( class SportBoxIE(InfoExtractor): _VALID_URL = r'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P.+)' - _TESTS = [ - { - 'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', - 'md5': 'ff56a598c2cf411a9a38a69709e97079', - 'info_dict': { - 'id': '80822', - 'ext': 'mp4', - 'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн', - 'description': 'md5:81715fa9c4ea3d9e7915dc8180c778ed', - 'thumbnail': 're:^https?://.*\.jpg$', - 'timestamp': 1411896237, - 'upload_date': '20140928', - 'duration': 4846, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355', - 'md5': 'ff56a598c2cf411a9a38a69709e97079', - 'info_dict': { - 'id': '211355', - 'ext': 'mp4', - 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', - 'description': '16 детских коллективов приняли участие в суперфинале турнира «Поле славы боевой».', - 'thumbnail': 're:^https?://.*\.jpg$', - 'timestamp': 1426237001, - 'upload_date': '20150313', - 'duration': 292, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4', - 'only_matching': True, + _TESTS = [{ + 'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', + 'md5': 'ff56a598c2cf411a9a38a69709e97079', + 'info_dict': { + 'id': '80822', + 'ext': 'mp4', + 'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн', + 'description': 'md5:81715fa9c4ea3d9e7915dc8180c778ed', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1411896237, + 'upload_date': '20140928', + 'duration': 4846, }, - ] + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4', + 'only_matching': True, + }, { + 'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -61,39 +44,74 @@ class SportBoxIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - sobj = re.search(r'src="/vdl/player/(?P\w+)/(?P\d+)"', webpage) - if (sobj): - video_id = sobj.group('video_id') - media_type = sobj.group('media_type') - else: - raise RegexNotFoundError('Unable to extract video_id') - - player = self._download_webpage( - 'http://news.sportbox.ru/vdl/player/%s/%s' % (media_type, video_id), - display_id, 'Downloading player webpage') - - hls = self._search_regex( - r"sportboxPlayer\.jwplayer_common_params\.file\s*=\s*['\"]+([^\"]+)['\"]+", player, 'hls file') - - formats = self._extract_m3u8_formats(hls, display_id, 'mp4') + player = self._search_regex( + r'src="/?(vdl/player/[^"]+)"', webpage, 'player') title = self._html_search_regex( r'

    ([^<]+)

    ', webpage, 'title') description = self._html_search_regex( - r'(?s)
    (.+?)
    ', webpage, 'description', fatal=False) + r'(?s)
    (.+?)
    ', + webpage, 'description', fatal=False) thumbnail = self._og_search_thumbnail(webpage) timestamp = parse_iso8601(self._search_regex( - r'([^<]+)', webpage, 'timestamp', fatal=False)) + r'([^<]+)', + webpage, 'timestamp', fatal=False)) duration = parse_duration(self._html_search_regex( - r'', webpage, 'duration', fatal=False)) + r'', + webpage, 'duration', fatal=False)) return { - 'id': video_id, + '_type': 'url_transparent', + 'url': compat_urlparse.urljoin(url, '/%s' % player), 'display_id': display_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'timestamp': timestamp, 'duration': duration, + } + + +class SportBoxEmbedIE(InfoExtractor): + _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P\d+)' + _TESTS = [{ + 'url': 'http://news.sportbox.ru/vdl/player/ci/211355', + 'info_dict': { + 'id': '211355', + 'ext': 'mp4', + 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + hls = self._search_regex( + r"sportboxPlayer\.jwplayer_common_params\.file\s*=\s*['\"]([^'\"]+)['\"]", + webpage, 'hls file') + + formats = self._extract_m3u8_formats(hls, video_id, 'mp4') + + title = self._search_regex( + r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title') + + thumbnail = self._search_regex( + r'sportboxPlayer\.jwplayer_common_params\.image\s*=\s*"([^"]+)"', + webpage, 'thumbnail', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, 'formats': formats, } From e8cfacae3710c2c225488e4b2d41b84268217a55 Mon Sep 17 00:00:00 2001 From: blissland Date: Fri, 15 May 2015 17:57:32 +0100 Subject: [PATCH 0699/2721] [CBSNewsIE] Relax thumbnail regex so test passes --- youtube_dl/extractor/cbsnews.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 7e47960ab..52e61d85b 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -32,7 +32,7 @@ class CBSNewsIE(InfoExtractor): 'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack', 'ext': 'flv', 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', - 'thumbnail': 'http://cbsnews2.cbsistatic.com/hub/i/r/2014/04/04/0c9fbc66-576b-41ca-8069-02d122060dd2/thumbnail/140x90/6dad7a502f88875ceac38202984b6d58/en-0404-werner-replace-640x360.jpg', + 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 205, }, 'params': { From 1436a6835e0f3489a4c37cca3da5087567b68158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 May 2015 23:08:44 +0600 Subject: [PATCH 0700/2721] [sportbox:embed] Add `_extract_urls` --- youtube_dl/extractor/sportbox.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 10c45eb74..a869a1b25 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -91,6 +91,12 @@ class SportBoxEmbedIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"', + webpage) + def _real_extract(self, url): video_id = self._match_id(url) From ef28a6cb26630f8f198a72eee34a2b5c8bd2f802 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 May 2015 23:09:10 +0600 Subject: [PATCH 0701/2721] [sportbox:embed] Relax thumbnail --- youtube_dl/extractor/sportbox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index a869a1b25..8686f9d11 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -113,7 +113,7 @@ class SportBoxEmbedIE(InfoExtractor): thumbnail = self._search_regex( r'sportboxPlayer\.jwplayer_common_params\.image\s*=\s*"([^"]+)"', - webpage, 'thumbnail', fatal=False) + webpage, 'thumbnail', default=None) return { 'id': video_id, From d40a3b5b55973d7ed65538179b71990c1828845a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 May 2015 23:09:34 +0600 Subject: [PATCH 0702/2721] [generic] Add support for sportbox embeds --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3d756e848..9230c3bb0 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -32,6 +32,7 @@ from .brightcove import BrightcoveIE from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE +from .sportbox import SportBoxEmbedIE from .smotri import SmotriIE from .condenast import CondeNastIE from .udn import UDNEmbedIE @@ -1229,6 +1230,11 @@ class GenericIE(InfoExtractor): if rutv_url: return self.url_result(rutv_url, 'RUTV') + # Look for embedded SportBox player + sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) + if sportbox_urls: + return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') + # Look for embedded TED player mobj = re.search( r']+?src=(["\'])(?Phttps?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) From b827a6015c145d67a4d4e9ea38aa54ebe347d3fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 May 2015 23:18:21 +0600 Subject: [PATCH 0703/2721] [generic] Add test for sportbox embeds --- youtube_dl/extractor/generic.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9230c3bb0..610e33091 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -225,6 +225,37 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # SportBox embed + { + 'url': 'http://www.vestifinance.ru/articles/25753', + 'info_dict': { + 'id': '25753', + 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"', + }, + 'playlist': [{ + 'info_dict': { + 'id': '370908', + 'title': 'Госзаказ. День 3', + 'ext': 'mp4', + } + }, { + 'info_dict': { + 'id': '370905', + 'title': 'Госзаказ. День 2', + 'ext': 'mp4', + } + }, { + 'info_dict': { + 'id': '370902', + 'title': 'Госзаказ. День 1', + 'ext': 'mp4', + } + }], + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, # Embedded TED video { 'url': 'http://en.support.wordpress.com/videos/ted-talks/', From 25c3a7348f7971c0af32dcea2d7fd57bd5c63f05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 May 2015 23:23:51 +0600 Subject: [PATCH 0704/2721] [generic] Fix typo --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 610e33091..9a7b0d25d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1425,7 +1425,7 @@ class GenericIE(InfoExtractor): # Look for Senate ISVP iframe senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) if senate_isvp_url: - return self.url_result(surl, 'SenateISVP') + return self.url_result(senate_isvp_url, 'SenateISVP') def check_video(vurl): if YoutubeIE.suitable(vurl): From 70d0d43b5eeff04b41b089e499401e38c115e456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 15 May 2015 23:32:25 +0600 Subject: [PATCH 0705/2721] [rts] Check formats (Closes #5711) --- youtube_dl/extractor/rts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py index d0981115d..9fbe239d8 100644 --- a/youtube_dl/extractor/rts.py +++ b/youtube_dl/extractor/rts.py @@ -190,6 +190,7 @@ class RTSIE(InfoExtractor): 'tbr': media['rate'] or extract_bitrate(media['url']), } for media in info['media'] if media.get('rate')]) + self._check_formats(formats, video_id) self._sort_formats(formats) return { From 0d7f03642976e7859e290b06db41d20a4bfd3a38 Mon Sep 17 00:00:00 2001 From: ping Date: Sat, 16 May 2015 15:43:13 +0800 Subject: [PATCH 0706/2721] [viki] Add support for shows --- youtube_dl/extractor/__init__.py | 5 ++++- youtube_dl/extractor/viki.py | 33 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f293bc2a4..cb6635610 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -639,7 +639,10 @@ from .vine import ( VineIE, VineUserIE, ) -from .viki import VikiIE +from .viki import ( + VikiIE, + VikiShowIE, +) from .vk import ( VKIE, VKUserVideosIE, diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index cf6af1e5c..4d185c0e6 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -145,3 +145,36 @@ class VikiIE(InfoExtractor): 'ext': 'vtt', }] return res + + +class VikiShowIE(InfoExtractor): + IE_NAME = 'viki:show' + _VALID_URL = r'^https?://(?:www\.)?viki\.com/tv/(?P[0-9]+c)' + _TESTS = [{ + 'url': 'http://www.viki.com/tv/50c-boys-over-flowers', + 'info_dict': { + 'id': '50c', + 'title': 'Boys Over Flowers', + 'description': 'md5:ecd3cff47967fe193cff37c0bec52790', + }, + 'playlist_count': 25, + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + show_page = self._download_webpage(url, show_id, 'Download show page') + + title = self._og_search_title(show_page) + description = self._og_search_description(show_page) + + show_json = self._download_json( + 'http://api.viki.io/v4/containers/%s/episodes.json?app=100000a&per_page=999&sort=number&direction=asc' % show_id, + show_id, note='Retrieve show json', errnote='Unable to get show json' + ) + entries = [] + for video in show_json['response']: + video_id = video['id'] + entries.append(self.url_result( + 'http://www.viki.com/videos/%s' % video_id, 'Viki', video_id)) + + return self.playlist_result(entries, show_id, title, description) From 2f3bdab2b90c6695c0a478f352967b0c9da4f23f Mon Sep 17 00:00:00 2001 From: ping Date: Sat, 16 May 2015 15:56:37 +0800 Subject: [PATCH 0707/2721] [viki] Fix code format --- youtube_dl/extractor/viki.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 4d185c0e6..40a73f561 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -169,8 +169,7 @@ class VikiShowIE(InfoExtractor): show_json = self._download_json( 'http://api.viki.io/v4/containers/%s/episodes.json?app=100000a&per_page=999&sort=number&direction=asc' % show_id, - show_id, note='Retrieve show json', errnote='Unable to get show json' - ) + show_id, note='Retrieve show json', errnote='Unable to get show json') entries = [] for video in show_json['response']: video_id = video['id'] From 1c97b0a777f52c520587e93e7e61721fa6195977 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 May 2015 20:00:40 +0600 Subject: [PATCH 0708/2721] [ooyala:external] Add extractor --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/ooyala.py | 221 +++++++++++++++++++------------ 2 files changed, 137 insertions(+), 89 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f293bc2a4..1731f4fb2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -376,7 +376,10 @@ from .nytimes import ( from .nuvid import NuvidIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE -from .ooyala import OoyalaIE +from .ooyala import ( + OoyalaIE, + OoyalaExternalIE, +) from .openfilm import OpenFilmIE from .orf import ( ORFTVthekIE, diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index c0e6d643d..9f4fe5b29 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -12,7 +12,100 @@ from ..utils import ( ) -class OoyalaIE(InfoExtractor): +class OoyalaBaseIE(InfoExtractor): + + def _extract_result(self, info, more_info): + embedCode = info['embedCode'] + video_url = info.get('ipad_url') or info['url'] + + if determine_ext(video_url) == 'm3u8': + formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4') + else: + formats = [{ + 'url': video_url, + 'ext': 'mp4', + }] + + return { + 'id': embedCode, + 'title': unescapeHTML(info['title']), + 'formats': formats, + 'description': unescapeHTML(more_info['description']), + 'thumbnail': more_info['promo'], + } + + def _extract(self, player_url, video_id): + player = self._download_webpage(player_url, video_id) + mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', + player, 'mobile player url') + # Looks like some videos are only available for particular devices + # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0 + # is only available for ipad) + # Working around with fetching URLs for all the devices found starting with 'unknown' + # until we succeed or eventually fail for each device. + devices = re.findall(r'device\s*=\s*"([^"]+)";', player) + devices.remove('unknown') + devices.insert(0, 'unknown') + for device in devices: + mobile_player = self._download_webpage( + '%s&device=%s' % (mobile_url, device), video_id, + 'Downloading mobile player JS for %s device' % device) + videos_info = self._search_regex( + r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', + mobile_player, 'info', fatal=False, default=None) + if videos_info: + break + + if not videos_info: + formats = [] + auth_data = self._download_json( + 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (embedCode, embedCode), + video_id) + + cur_auth_data = auth_data['authorization_data'][video_id] + + for stream in cur_auth_data['streams']: + formats.append({ + 'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'), + 'ext': stream.get('delivery_type'), + 'format': stream.get('video_codec'), + 'format_id': stream.get('profile'), + 'width': int_or_none(stream.get('width')), + 'height': int_or_none(stream.get('height')), + 'abr': int_or_none(stream.get('audio_bitrate')), + 'vbr': int_or_none(stream.get('video_bitrate')), + }) + if formats: + return { + 'id': video_id, + 'formats': formats, + 'title': 'Ooyala video', + } + + if not cur_auth_data['authorized']: + raise ExtractorError(cur_auth_data['message'], expected=True) + + if not videos_info: + raise ExtractorError('Unable to extract info') + videos_info = videos_info.replace('\\"', '"') + videos_more_info = self._search_regex( + r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"') + videos_info = json.loads(videos_info) + videos_more_info = json.loads(videos_more_info) + + if videos_more_info.get('lineup'): + videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] + return { + '_type': 'playlist', + 'id': video_id, + 'title': unescapeHTML(videos_more_info['title']), + 'entries': videos, + } + else: + return self._extract_result(videos_info[0], videos_more_info) + + +class OoyalaIE(OoyalaBaseIE): _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P.+?)(&|$)' _TESTS = [ @@ -57,95 +150,47 @@ class OoyalaIE(InfoExtractor): return cls.url_result(cls._url_for_embed_code(embed_code), ie=cls.ie_key()) - def _extract_result(self, info, more_info): - embedCode = info['embedCode'] - video_url = info.get('ipad_url') or info['url'] + def _real_extract(self, url): + embed_code = self._match_id(url) + player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code + return self._extract(player_url, embed_code) - if determine_ext(video_url) == 'm3u8': - formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4') - else: - formats = [{ - 'url': video_url, - 'ext': 'mp4', - }] - return { - 'id': embedCode, - 'title': unescapeHTML(info['title']), - 'formats': formats, - 'description': unescapeHTML(more_info['description']), - 'thumbnail': more_info['promo'], - } +class OoyalaExternalIE(OoyalaBaseIE): + _VALID_URL = r'''(?x) + (?: + ooyalaexternal:| + https?://.+?\.ooyala\.com/.*?\bexternalId= + ) + (?P[^:]+) + : + (?P.+) + (?: + :| + .*?&pcode= + ) + (?P.+?) + (&|$) + ''' + + _TEST = { + 'url': 'https://player.ooyala.com/player.js?externalId=espn:10365079&pcode=1kNG061cgaoolOncv54OAO1ceO-I&adSetCode=91cDU6NuXTGKz3OdjOxFdAgJVtQcKJnI&callback=handleEvents&hasModuleParams=1&height=968&playerBrandingId=7af3bd04449c444c964f347f11873075&targetReplaceId=videoPlayer&width=1656&wmode=opaque&allowScriptAccess=always', + 'info_dict': { + 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', + 'ext': 'mp4', + 'title': 'dm_140128_30for30Shorts___JudgingJewellv2', + 'description': '', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - embedCode = mobj.group('id') - player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode - player = self._download_webpage(player_url, embedCode) - mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', - player, 'mobile player url') - # Looks like some videos are only available for particular devices - # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0 - # is only available for ipad) - # Working around with fetching URLs for all the devices found starting with 'unknown' - # until we succeed or eventually fail for each device. - devices = re.findall(r'device\s*=\s*"([^"]+)";', player) - devices.remove('unknown') - devices.insert(0, 'unknown') - for device in devices: - mobile_player = self._download_webpage( - '%s&device=%s' % (mobile_url, device), embedCode, - 'Downloading mobile player JS for %s device' % device) - videos_info = self._search_regex( - r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', - mobile_player, 'info', fatal=False, default=None) - if videos_info: - break - - if not videos_info: - formats = [] - auth_data = self._download_json( - 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (embedCode, embedCode), - embedCode) - - cur_auth_data = auth_data['authorization_data'][embedCode] - - for stream in cur_auth_data['streams']: - formats.append({ - 'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'), - 'ext': stream.get('delivery_type'), - 'format': stream.get('video_codec'), - 'format_id': stream.get('profile'), - 'width': int_or_none(stream.get('width')), - 'height': int_or_none(stream.get('height')), - 'abr': int_or_none(stream.get('audio_bitrate')), - 'vbr': int_or_none(stream.get('video_bitrate')), - }) - if formats: - return { - 'id': embedCode, - 'formats': formats, - 'title': 'Ooyala video', - } - - if not cur_auth_data['authorized']: - raise ExtractorError(cur_auth_data['message'], expected=True) - - if not videos_info: - raise ExtractorError('Unable to extract info') - videos_info = videos_info.replace('\\"', '"') - videos_more_info = self._search_regex( - r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"') - videos_info = json.loads(videos_info) - videos_more_info = json.loads(videos_more_info) - - if videos_more_info.get('lineup'): - videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] - return { - '_type': 'playlist', - 'id': embedCode, - 'title': unescapeHTML(videos_more_info['title']), - 'entries': videos, - } - else: - return self._extract_result(videos_info[0], videos_more_info) + partner_id = mobj.group('partner_id') + video_id = mobj.group('id') + pcode = mobj.group('pcode') + player_url = 'http://player.ooyala.com/player.js?externalId=%s:%s&pcode=%s' % (partner_id, video_id, pcode) + return self._extract(player_url, video_id) From 9354a5fad4521687eb9f08c1a42848621857400b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 May 2015 20:15:31 +0600 Subject: [PATCH 0709/2721] [ooyala] Fix unresolved reference --- youtube_dl/extractor/ooyala.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 9f4fe5b29..a262a9f6d 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -59,7 +59,7 @@ class OoyalaBaseIE(InfoExtractor): if not videos_info: formats = [] auth_data = self._download_json( - 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (embedCode, embedCode), + 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (video_id, video_id), video_id) cur_auth_data = auth_data['authorization_data'][video_id] From ef2dcbe4adce4478d409397faaae7ec6453ecf7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 May 2015 21:07:29 +0600 Subject: [PATCH 0710/2721] [sbs] Fix extraction (Closes #5725) --- youtube_dl/extractor/sbs.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index b8775c2f9..3073e5e86 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -33,16 +33,18 @@ class SBSIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - release_urls_json = js_to_json(self._search_regex( + player = self._search_regex( r'(?s)playerParams\.releaseUrls\s*=\s*(\{.*?\n\});\n', - webpage, '')) - release_urls = json.loads(release_urls_json) - theplatform_url = ( - release_urls.get('progressive') or release_urls.get('standard')) + webpage, 'player') + player = re.sub(r"'\s*\+\s*[\da-zA-Z_]+\s*\+\s*'", '', player) + + release_urls = self._parse_json(js_to_json(player), video_id) + + theplatform_url = release_urls.get('progressive') or release_urls['standard'] title = remove_end(self._og_search_title(webpage), ' (The Feed)') description = self._html_search_meta('description', webpage) @@ -52,7 +54,6 @@ class SBSIE(InfoExtractor): '_type': 'url_transparent', 'id': video_id, 'url': theplatform_url, - 'title': title, 'description': description, 'thumbnail': thumbnail, From 7e760fc18897663db7c0717434e28a8cca9f3810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 16 May 2015 21:14:19 +0600 Subject: [PATCH 0711/2721] [espn] Add extractor (#4396) Unfinished --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/espn.py | 55 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/espn.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1731f4fb2..6b19eb6f8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -141,6 +141,7 @@ from .engadget import EngadgetIE from .eporner import EpornerIE from .eroprofile import EroProfileIE from .escapist import EscapistIE +from .espn import ESPNIE from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE from .expotv import ExpoTVIE diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py new file mode 100644 index 000000000..e6f8f0337 --- /dev/null +++ b/youtube_dl/extractor/espn.py @@ -0,0 +1,55 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class ESPNIE(InfoExtractor): + _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P[^/]+)' + _WORKING = False + _TESTS = [{ + 'url': 'http://espn.go.com/video/clip?id=10365079', + 'info_dict': { + 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', + 'ext': 'mp4', + 'title': 'dm_140128_30for30Shorts___JudgingJewellv2', + 'description': '', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', + 'only_matching': True, + }, { + 'url': 'http://espn.go.com/nba/recap?gameId=400793786', + 'only_matching': True, + }, { + 'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge', + 'only_matching': True, + }, { + 'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings', + 'only_matching': True, + }, { + 'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_id = self._search_regex( + r'class="video-play-button"[^>]+data-id="(\d+)', + webpage, 'video id') + + player = self._download_webpage( + 'https://espn.go.com/video/iframe/twitter/?id=%s' % video_id, video_id) + + pcode = self._search_regex( + r'["\']pcode=([^"\']+)["\']', player, 'pcode') + + return self.url_result( + 'ooyalaexternal:espn:%s:%s' % (video_id, pcode), + 'OoyalaExternal') From 4d52f2eb7f4b16ea5491f20abf0b29a1fcb24a02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 16 May 2015 18:38:28 +0200 Subject: [PATCH 0712/2721] [sbs] Remove unused import --- youtube_dl/extractor/sbs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index 3073e5e86..d4bd1a0d7 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import json import re from .common import InfoExtractor from ..utils import ( From 1c18de00192d195357989861563cc1fad9256128 Mon Sep 17 00:00:00 2001 From: ping Date: Sun, 17 May 2015 01:38:50 +0800 Subject: [PATCH 0713/2721] [viki] Add proper paging and include clips --- youtube_dl/extractor/viki.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 40a73f561..4d477b03c 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -93,7 +93,7 @@ class VikiIE(InfoExtractor): 'Video %s is blocked from your location.' % video_id, expected=True) else: - raise ExtractorError('Viki said: ' + err_msg) + raise ExtractorError('Viki said: %s %s' % (err_msg, url)) mobj = re.search( r']+type="(?P[^"]+)"[^>]+src="(?P[^"]+)"', info_webpage) if not mobj: @@ -157,7 +157,15 @@ class VikiShowIE(InfoExtractor): 'title': 'Boys Over Flowers', 'description': 'md5:ecd3cff47967fe193cff37c0bec52790', }, - 'playlist_count': 25, + 'playlist_count': 70, + }, { + 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete', + 'info_dict': { + 'id': '1354c', + 'title': 'Poor Nastya [COMPLETE]', + 'description': 'md5:05bf5471385aa8b21c18ad450e350525', + }, + 'playlist_count': 127, }] def _real_extract(self, url): @@ -167,13 +175,16 @@ class VikiShowIE(InfoExtractor): title = self._og_search_title(show_page) description = self._og_search_description(show_page) - show_json = self._download_json( - 'http://api.viki.io/v4/containers/%s/episodes.json?app=100000a&per_page=999&sort=number&direction=asc' % show_id, - show_id, note='Retrieve show json', errnote='Unable to get show json') entries = [] - for video in show_json['response']: - video_id = video['id'] - entries.append(self.url_result( - 'http://www.viki.com/videos/%s' % video_id, 'Viki', video_id)) + for video_type in ['episodes', 'clips']: + json_url = 'http://api.viki.io/v4/containers/%s/%s.json?app=100000a&per_page=25&sort=number&direction=asc&with_paging=true&page=1' % (show_id, video_type) + while json_url is not None: + show_json = self._download_json( + json_url, show_id, note='Retrieve show json', errnote='Unable to get show json') + for video in show_json['response']: + video_id = video['id'] + entries.append(self.url_result( + 'http://www.viki.com/videos/%s' % video_id, 'Viki', video_id)) + json_url = show_json['pagination']['next'] return self.playlist_result(entries, show_id, title, description) From baa43cbaf01a575eacb8e1bb39c7200f68c36daa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 May 2015 02:59:35 +0600 Subject: [PATCH 0714/2721] [extractor/common] Relax valid url check verbosity --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 65bb77086..cecf917ff 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -786,8 +786,8 @@ class InfoExtractor(object): return True except ExtractorError as e: if isinstance(e.cause, compat_HTTPError): - self.report_warning( - '%s URL is invalid, skipping' % item, video_id) + self.to_screen( + '%s: %s URL is invalid, skipping' % (video_id, item)) return False raise From bc0f937b55aae6ce731d259a7658b0281c2e62ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 May 2015 03:01:52 +0600 Subject: [PATCH 0715/2721] [tv2] Add extractor (#5724) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/tv2.py | 93 ++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 youtube_dl/extractor/tv2.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6b19eb6f8..fb4f63ca3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -572,6 +572,7 @@ from .tumblr import TumblrIE from .tunein import TuneInIE from .turbo import TurboIE from .tutv import TutvIE +from .tv2 import TV2IE from .tv4 import TV4IE from .tvigle import TvigleIE from .tvp import TvpIE, TvpSeriesIE diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py new file mode 100644 index 000000000..2dcc0e971 --- /dev/null +++ b/youtube_dl/extractor/tv2.py @@ -0,0 +1,93 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + float_or_none, + parse_iso8601, +) + + +class TV2IE(InfoExtractor): + _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P\d+)' + _TEST = { + 'url': 'http://www.tv2.no/v/916509/', + 'md5': '9cb9e3410b18b515d71892f27856e9b1', + 'info_dict': { + 'id': '916509', + 'ext': 'flv', + 'title': 'Se Gryttens hyllest av Steven Gerrard', + 'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.', + 'timestamp': 1431715610, + 'upload_date': '20150515', + 'duration': 156.967, + 'view_count': int, + 'categories': list, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + formats = [] + format_urls = [] + for protocol in ('HDS', 'HLS'): + data = self._download_json( + 'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol), + video_id, 'Downloading play JSON')['playback'] + for item in data['items']['item']: + video_url = item.get('url') + if not video_url or video_url in format_urls: + continue + format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat')) + if not self._is_valid_url(video_url, video_id, format_id): + continue + format_urls.append(video_url) + ext = determine_ext(video_url) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id=format_id)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id=format_id)) + elif ext == 'ism' or video_url.endswith('.ism/Manifest'): + pass + else: + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'tbr': int_or_none(item.get('bitrate')), + 'filesize': int_or_none(item.get('fileSize')), + }) + self._sort_formats(formats) + + asset = self._download_json( + 'http://sumo.tv2.no/api/web/asset/%s.json' % video_id, + video_id, 'Downloading metadata JSON')['asset'] + + title = asset['title'] + description = asset.get('description') + timestamp = parse_iso8601(asset.get('createTime')) + duration = float_or_none(asset.get('accurateDuration') or asset.get('duration')) + view_count = int_or_none(asset.get('views')) + categories = asset.get('keywords', '').split(',') + + thumbnails = [{ + 'id': thumbnail.get('@type'), + 'url': thumbnail.get('url'), + } for _, thumbnail in asset.get('imageVersions', {}).items()] + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnails': thumbnails, + 'timestamp': timestamp, + 'duration': duration, + 'view_count': view_count, + 'categories': categories, + 'formats': formats, + } From 588b82bbf8c90981c54f180eca40e6c743f8f89f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 May 2015 03:32:53 +0600 Subject: [PATCH 0716/2721] [tv2:article] Add extractor (Closes #5724) --- youtube_dl/extractor/__init__.py | 5 ++++- youtube_dl/extractor/tv2.py | 33 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fb4f63ca3..6f8c261d5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -572,7 +572,10 @@ from .tumblr import TumblrIE from .tunein import TuneInIE from .turbo import TurboIE from .tutv import TutvIE -from .tv2 import TV2IE +from .tv2 import ( + TV2IE, + TV2ArticleIE, +) from .tv4 import TV4IE from .tvigle import TvigleIE from .tvp import TvpIE, TvpSeriesIE diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py index 2dcc0e971..fa338b936 100644 --- a/youtube_dl/extractor/tv2.py +++ b/youtube_dl/extractor/tv2.py @@ -1,12 +1,15 @@ # encoding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( determine_ext, int_or_none, float_or_none, parse_iso8601, + remove_end, ) @@ -91,3 +94,33 @@ class TV2IE(InfoExtractor): 'categories': categories, 'formats': formats, } + + +class TV2ArticleIE(InfoExtractor): + _VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542', + 'info_dict': { + 'id': '6930542', + 'title': 'Russen hetses etter pingvintyveri – innrømmer å ha åpnet luken på buret', + 'description': 'md5:339573779d3eea3542ffe12006190954', + }, + 'playlist_count': 2, + }, { + 'url': 'http://www.tv2.no/a/6930542', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('http://www.tv2.no/v/%s' % video_id, 'TV2') + for video_id in re.findall(r'data-assetid="(\d+)"', webpage)] + + title = remove_end(self._og_search_title(webpage), ' - TV2.no') + description = remove_end(self._og_search_description(webpage), ' - TV2.no') + + return self.playlist_result(entries, playlist_id, title, description) From 8da0e0e94682faa0463f33d991df70a2402b5a86 Mon Sep 17 00:00:00 2001 From: ping Date: Sun, 17 May 2015 06:19:38 +0800 Subject: [PATCH 0717/2721] [viki] Change IE name to channel, better message output --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/viki.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index cb6635610..21f7b7290 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -641,7 +641,7 @@ from .vine import ( ) from .viki import ( VikiIE, - VikiShowIE, + VikiChannelIE, ) from .vk import ( VKIE, diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 4d477b03c..9bdbdc3e4 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -147,8 +147,8 @@ class VikiIE(InfoExtractor): return res -class VikiShowIE(InfoExtractor): - IE_NAME = 'viki:show' +class VikiChannelIE(InfoExtractor): + IE_NAME = 'viki:channel' _VALID_URL = r'^https?://(?:www\.)?viki\.com/tv/(?P[0-9]+c)' _TESTS = [{ 'url': 'http://www.viki.com/tv/50c-boys-over-flowers', @@ -167,6 +167,7 @@ class VikiShowIE(InfoExtractor): }, 'playlist_count': 127, }] + _PER_PAGE = 25 def _real_extract(self, url): show_id = self._match_id(url) @@ -177,10 +178,12 @@ class VikiShowIE(InfoExtractor): entries = [] for video_type in ['episodes', 'clips']: - json_url = 'http://api.viki.io/v4/containers/%s/%s.json?app=100000a&per_page=25&sort=number&direction=asc&with_paging=true&page=1' % (show_id, video_type) + json_url = 'http://api.viki.io/v4/containers/%s/%s.json?app=100000a&per_page=%d&sort=number&direction=asc&with_paging=true&page=1' % (show_id, video_type, self._PER_PAGE) while json_url is not None: show_json = self._download_json( - json_url, show_id, note='Retrieve show json', errnote='Unable to get show json') + json_url, show_id, + note='Downloading %s json page #%s' % + (video_type, re.search(r'[?&]page=([0-9]+)', json_url).group(1))) for video in show_json['response']: video_id = video['id'] entries.append(self.url_result( From 725652e9247e1171110b624d748e20fa1c88260e Mon Sep 17 00:00:00 2001 From: Mister Hat Date: Sat, 16 May 2015 19:50:58 -0500 Subject: [PATCH 0718/2721] [karrierevideos] add support for www.karrierevideos.at (closes #5354) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/karrierevideos.py | 52 ++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/karrierevideos.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fb4f63ca3..d131d3ec3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -244,6 +244,7 @@ from .kaltura import KalturaIE from .kanalplay import KanalPlayIE from .kankan import KankanIE from .karaoketv import KaraoketvIE +from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py new file mode 100644 index 000000000..59d29e845 --- /dev/null +++ b/youtube_dl/extractor/karrierevideos.py @@ -0,0 +1,52 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class KarriereVideosIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?karrierevideos\.at/berufsvideos/([a-z-]+)/(?P[a-z-]+)' + _TEST = { + 'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin', + 'info_dict': { + 'id': 'altenpflegerin', + 'ext': 'mp4', + 'title': 'AltenpflegerIn', + 'thumbnail': 're:^http://.*\.png\?v=[0-9]+', + 'description': 'md5:dbadd1259fde2159a9b28667cb664ae2' + }, + 'params': { + 'skip_download': 'requires rtmpdump' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + description = self._html_search_regex( + r'
    \n{0,}?\s{0,}

    (.*?)

    ', + webpage, 'description') + + playlist = self._html_search_regex(r'/config/video/(.*?)\.xml', webpage, 'playlist') + playlist = self._download_xml( + 'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % playlist, + video_id) + + namespace = { + 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' + } + + item = playlist.find('tracklist/item') + streamer = item.find('jwplayer:streamer', namespace).text + + return { + 'id': video_id, + 'title': self._html_search_meta('title', webpage), + 'description': description, + 'thumbnail': 'http://www.karrierevideos.at' + self._html_search_meta('thumbnail', webpage), + 'protocol': 'rtmp', + 'url': streamer.replace('rtmpt', 'http'), + 'play_path': 'mp4:' + item.find('jwplayer:file', namespace).text, + 'tc_url': streamer, + 'ext': 'mp4' + } From ba9d16291b8ace3bd412bcfc0c128c047545e509 Mon Sep 17 00:00:00 2001 From: Mister Hat Date: Sun, 17 May 2015 03:35:08 -0500 Subject: [PATCH 0719/2721] manually specify namespace --- youtube_dl/extractor/karrierevideos.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py index 59d29e845..a05e8ab76 100644 --- a/youtube_dl/extractor/karrierevideos.py +++ b/youtube_dl/extractor/karrierevideos.py @@ -29,15 +29,13 @@ class KarriereVideosIE(InfoExtractor): playlist = self._html_search_regex(r'/config/video/(.*?)\.xml', webpage, 'playlist') playlist = self._download_xml( - 'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % playlist, + 'http://www.karrierevideos.at/player-playlist.xml.php?p=' + playlist, video_id) - namespace = { - 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' - } + namespace = 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' item = playlist.find('tracklist/item') - streamer = item.find('jwplayer:streamer', namespace).text + streamer = item.find('{%s}streamer' % namespace).text return { 'id': video_id, @@ -46,7 +44,7 @@ class KarriereVideosIE(InfoExtractor): 'thumbnail': 'http://www.karrierevideos.at' + self._html_search_meta('thumbnail', webpage), 'protocol': 'rtmp', 'url': streamer.replace('rtmpt', 'http'), - 'play_path': 'mp4:' + item.find('jwplayer:file', namespace).text, + 'play_path': 'mp4:' + item.find('{%s}file' % namespace).text, 'tc_url': streamer, 'ext': 'mp4' } From 4a5a898a8fa392d02102672f9767f33a39a73066 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 17 May 2015 20:56:03 +0600 Subject: [PATCH 0720/2721] [YoutubeDL] Clarify incompatible formats merge message When `-f` is not specified it's misleading to see `You have requested ...` as user did not actually request any formats. --- youtube_dl/YoutubeDL.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5df889945..58b34e087 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1368,7 +1368,7 @@ class YoutubeDL(object): postprocessors = [] self.report_warning('You have requested multiple ' 'formats but ffmpeg or avconv are not installed.' - ' The formats won\'t be merged') + ' The formats won\'t be merged.') else: postprocessors = [merger] @@ -1395,8 +1395,8 @@ class YoutubeDL(object): requested_formats = info_dict['requested_formats'] if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats): info_dict['ext'] = 'mkv' - self.report_warning('You have requested formats incompatible for merge. ' - 'The formats will be merged into mkv') + self.report_warning( + 'Requested formats are incompatible for merge and will be merged into mkv.') # Ensure filename always has a correct extension for successful merge filename = '%s.%s' % (filename_wo_ext, info_dict['ext']) if os.path.exists(encodeFilename(filename)): From fc6e75dd57f3497b99def659b3d0f173b195b7d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 18 May 2015 11:21:09 +0200 Subject: [PATCH 0721/2721] [instagram] Only recognize https urls (fixes #5739) http urls redirect to them. --- youtube_dl/extractor/instagram.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 65f6ca103..b10755788 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -7,9 +7,9 @@ from ..utils import int_or_none class InstagramIE(InfoExtractor): - _VALID_URL = r'https?://instagram\.com/p/(?P[\da-zA-Z]+)' + _VALID_URL = r'https://instagram\.com/p/(?P[\da-zA-Z]+)' _TEST = { - 'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc', + 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', 'info_dict': { 'id': 'aye83DjauH', @@ -41,11 +41,11 @@ class InstagramIE(InfoExtractor): class InstagramUserIE(InfoExtractor): - _VALID_URL = r'http://instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' + _VALID_URL = r'https://instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' IE_DESC = 'Instagram user profile' IE_NAME = 'instagram:user' _TEST = { - 'url': 'http://instagram.com/porsche', + 'url': 'https://instagram.com/porsche', 'info_dict': { 'id': 'porsche', 'title': 'porsche', From 5bdc520cf19f404247ec2be1ffc1e83449fa2375 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 18 May 2015 21:23:05 +0600 Subject: [PATCH 0722/2721] [xminus] Fix extraction --- youtube_dl/extractor/xminus.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xminus.py b/youtube_dl/extractor/xminus.py index 8c6241aed..7c9d8af6f 100644 --- a/youtube_dl/extractor/xminus.py +++ b/youtube_dl/extractor/xminus.py @@ -43,7 +43,7 @@ class XMinusIE(InfoExtractor): r'minus_track\.dur_sec=\'([0-9]*?)\'', webpage, 'duration', fatal=False)) filesize_approx = parse_filesize(self._html_search_regex( - r'
    \s*([0-9.]+\s*[a-zA-Z][bB])', + r'
    ]*>\s*↓\s*([0-9.]+\s*[a-zA-Z][bB])', webpage, 'approximate filesize', fatal=False)) tbr = int_or_none(self._html_search_regex( r'
    \s*([0-9]+)\s*kbps', @@ -58,7 +58,7 @@ class XMinusIE(InfoExtractor): description = re.sub(' *\r *', '\n', description) enc_token = self._html_search_regex( - r'minus_track\.tkn="(.+?)"', webpage, 'enc_token') + r'minus_track\.s?tkn="(.+?)"', webpage, 'enc_token') token = ''.join( c if pos == 3 else compat_chr(compat_ord(c) - 1) for pos, c in enumerate(reversed(enc_token))) From 4f514c7e88d2ce8ebe9c2478183e8797cfb2a4c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 18 May 2015 21:29:41 +0600 Subject: [PATCH 0723/2721] [wimp] Fix youtube extraction (Closes #5690) --- youtube_dl/extractor/wimp.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index d6dec25ca..f69d46a28 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -37,7 +37,8 @@ class WimpIE(InfoExtractor): video_id = mobj.group(1) webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", webpage, 'video URL') + [r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"], + webpage, 'video URL') if YoutubeIE.suitable(video_url): self.to_screen('Found YouTube video') return { From 2328f2fe684f9a9025217c6f149e92a403a4c437 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 18 May 2015 21:34:20 +0600 Subject: [PATCH 0724/2721] [vulture] Fix extraction --- youtube_dl/extractor/vulture.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py index 1eb24a3d6..faa167e65 100644 --- a/youtube_dl/extractor/vulture.py +++ b/youtube_dl/extractor/vulture.py @@ -44,7 +44,7 @@ class VultureIE(InfoExtractor): query_webpage = self._download_webpage( query_url, display_id, note='Downloading query page') params_json = self._search_regex( - r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n', + r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n', query_webpage, 'player params') params = json.loads(params_json) From 5d8dcb5342c97b05c037c8c4e80002540db261b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 18 May 2015 21:39:15 +0600 Subject: [PATCH 0725/2721] [vuclip] Fix extraction --- youtube_dl/extractor/vuclip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py index c3fde53f5..a6d9b5fee 100644 --- a/youtube_dl/extractor/vuclip.py +++ b/youtube_dl/extractor/vuclip.py @@ -49,7 +49,7 @@ class VuClipIE(InfoExtractor): links_code = self._search_regex( r'''(?xs) (?: - | + | \s*
    ) (.*?) From 484c9d2d5b669220c24c865947c3f65049916b56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 18 May 2015 21:43:54 +0600 Subject: [PATCH 0726/2721] [vier] Fix extraction --- youtube_dl/extractor/vier.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 619039e51..15377097e 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -38,11 +38,14 @@ class VierIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_id = self._search_regex( - r'"nid"\s*:\s*"(\d+)"', webpage, 'video id') + [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'], + webpage, 'video id') application = self._search_regex( - r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod') + [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], + webpage, 'application', default='vier_vod') filename = self._search_regex( - r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename') + [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], + webpage, 'filename') playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename) formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4') From 2aa64b89b3ac8f387d4c0c27ce7de64bc0ff68de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 18 May 2015 17:58:53 +0200 Subject: [PATCH 0727/2721] tox: Pass HOME environment variable Since version 2.0 it only passes a limited set of variables and we need HOME for the tests --- tox.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tox.ini b/tox.ini index 00c6e00e3..cd805fe8a 100644 --- a/tox.ini +++ b/tox.ini @@ -4,6 +4,8 @@ envlist = py26,py27,py33,py34 deps = nose coverage +# We need a valid $HOME for test_compat_expanduser +passenv = HOME defaultargs = test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py From 1b0427e6c433c0b6db5e210db6e3173e19e702ed Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 19 May 2015 00:45:01 +0800 Subject: [PATCH 0728/2721] [utils] Support TTML without default namespace In a strict sense such TTML is invalid, but Yahoo uses it. --- test/test_utils.py | 15 +++++++++++++++ youtube_dl/utils.py | 9 ++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index b40107037..e13e11b59 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -621,6 +621,21 @@ Line ''' self.assertEqual(dfxp2srt(dfxp_data), srt_data) + dfxp_data_no_default_namespace = ''' + + +
    +

    The first line

    +
    + +
    ''' + srt_data = '''1 +00:00:00,000 --> 00:00:01,000 +The first line + +''' + self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ed9ed9ed6..507f07383 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1848,9 +1848,9 @@ def dfxp2srt(dfxp_data): out = str_or_empty(node.text) for child in node: - if child.tag == _x('ttml:br'): + if child.tag in (_x('ttml:br'), 'br'): out += '\n' + str_or_empty(child.tail) - elif child.tag == _x('ttml:span'): + elif child.tag in (_x('ttml:span'), 'span'): out += str_or_empty(parse_node(child)) else: out += str_or_empty(xml.etree.ElementTree.tostring(child)) @@ -1859,7 +1859,10 @@ def dfxp2srt(dfxp_data): dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) out = [] - paras = dfxp.findall(_x('.//ttml:p')) + paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') + + if not paras: + raise ValueError('Invalid dfxp/TTML subtitle') for para, index in zip(paras, itertools.count(1)): begin_time = parse_dfxp_time_expr(para.attrib['begin']) From ecee5724110847b832a6074c66ca4a63758100f4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 19 May 2015 00:50:24 +0800 Subject: [PATCH 0729/2721] [yahoo] Add support for closed captions (closes #5714) --- youtube_dl/extractor/yahoo.py | 18 ++++++++++++++++++ youtube_dl/utils.py | 1 + 2 files changed, 19 insertions(+) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index bf4e659ac..f9afbdbab 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -15,6 +15,7 @@ from ..utils import ( unescapeHTML, ExtractorError, int_or_none, + mimetype2ext, ) from .nbc import NBCSportsVPlayerIE @@ -236,6 +237,22 @@ class YahooIE(InfoExtractor): self._sort_formats(formats) + closed_captions = self._html_search_regex( + r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions', + default='[]') + + cc_json = self._parse_json(closed_captions, video_id, fatal=False) + subtitles = {} + if cc_json: + for closed_caption in cc_json: + lang = closed_caption['lang'] + if lang not in subtitles: + subtitles[lang] = [] + subtitles[lang].append({ + 'url': closed_caption['url'], + 'ext': mimetype2ext(closed_caption['content_type']), + }) + return { 'id': video_id, 'display_id': display_id, @@ -244,6 +261,7 @@ class YahooIE(InfoExtractor): 'description': clean_html(meta['description']), 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage), 'duration': int_or_none(meta.get('duration')), + 'subtitles': subtitles, } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 507f07383..52d198fa3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1665,6 +1665,7 @@ def mimetype2ext(mt): return { 'x-ms-wmv': 'wmv', 'x-mp4-fragmented': 'mp4', + 'ttml+xml': 'ttml', }.get(res, res) From b813d8caf1b23821036b77b851e42ba0a0ad35a7 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 19 May 2015 01:01:42 +0800 Subject: [PATCH 0730/2721] [qqmusic] Unescape '\\n' in description (#5705) --- youtube_dl/extractor/qqmusic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 13113820b..b540033e2 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -26,7 +26,7 @@ class QQMusicIE(InfoExtractor): 'title': '可惜没如果', 'upload_date': '20141227', 'creator': '林俊杰', - 'description': 'md5:4348ff1dd24036906baa7b6f973f8d30', + 'description': 'md5:d327722d0361576fde558f1ac68a7065', } }] @@ -60,6 +60,8 @@ class QQMusicIE(InfoExtractor): lrc_content = self._html_search_regex( r'
    ]*>([^<>]+)
    ', detail_info_page, 'LRC lyrics', default=None) + if lrc_content: + lrc_content = lrc_content.replace('\\n', '\n') guid = self.m_r_get_ruin() From d9d747a06ab3b4c36c6063074ffb42aeb185431f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 19 May 2015 21:28:41 +0600 Subject: [PATCH 0731/2721] [ultimedia] Fix extraction --- youtube_dl/extractor/ultimedia.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py index 96c809eaf..c4751050e 100644 --- a/youtube_dl/extractor/ultimedia.py +++ b/youtube_dl/extractor/ultimedia.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse from ..utils import ( ExtractorError, qualities, @@ -44,9 +45,9 @@ class UltimediaIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - deliver_url = self._search_regex( - r']+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"', - webpage, 'deliver URL') + deliver_url = self._proto_relative_url(self._search_regex( + r']+src="((?:https?:)?//(?:www\.)?ultimedia\.com/deliver/[^"]+)"', + webpage, 'deliver URL'), compat_urllib_parse_urlparse(url).scheme + ':') deliver_page = self._download_webpage( deliver_url, video_id, 'Downloading iframe page') @@ -57,7 +58,8 @@ class UltimediaIE(InfoExtractor): player = self._parse_json( self._search_regex( - r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'), + r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", + deliver_page, 'player'), video_id) quality = qualities(['flash', 'html5']) From f670ef1c8ebe0329a68b3a3d5c2b7e07ae5c9425 Mon Sep 17 00:00:00 2001 From: ping Date: Wed, 20 May 2015 13:51:43 +0800 Subject: [PATCH 0732/2721] [dramafever] Add new extractor for dramafever.com --- youtube_dl/extractor/__init__.py | 4 + youtube_dl/extractor/dramafever.py | 131 +++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 youtube_dl/extractor/dramafever.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6f8c261d5..ca857a75f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -112,6 +112,10 @@ from .dfb import DFBIE from .dhm import DHMIE from .dotsub import DotsubIE from .douyutv import DouyuTVIE +from .dramafever import ( + DramaFeverIE, + DramaFeverSeriesIE, +) from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE from .drtuber import DrTuberIE diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py new file mode 100644 index 000000000..8fac99cc5 --- /dev/null +++ b/youtube_dl/extractor/dramafever.py @@ -0,0 +1,131 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class DramaFeverIE(InfoExtractor): + IE_NAME = 'dramafever' + _VALID_URL = r'^https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+/[0-9]+)/' + _TESTS = [{ + 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', + 'info_dict': { + 'id': '4512.1', + 'ext': 'flv', + 'title': 'Cooking with Shin 4512.1', + 'upload_date': '20140702', + 'description': 'Served at all special occasions and featured in the hit drama Heirs, Shin cooks Red Bean Rice.', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url).replace("/", ".") + + consumer_secret = self._get_consumer_secret(video_id) + + ep_json = self._download_json( + "http://www.dramafever.com/amp/episode/feed.json?guid=%s" % video_id, + video_id, note='Downloading episode metadata', + errnote="Video may not be available for your location")["channel"]["item"] + + title = ep_json["media-group"]["media-title"] + description = ep_json["media-group"]["media-description"] + thumbnail = ep_json["media-group"]["media-thumbnail"]["@attributes"]["url"] + duration = int(ep_json["media-group"]["media-content"][0]["@attributes"]["duration"]) + mobj = re.match(r"([0-9]{4})-([0-9]{2})-([0-9]{2})", ep_json["pubDate"]) + upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3) if mobj is not None else None + + formats = [] + for vid_format in ep_json["media-group"]["media-content"]: + src = vid_format["@attributes"]["url"] + if '.f4m' in src: + formats.extend(self._extract_f4m_formats(src, video_id)) + + self._sort_formats(formats) + video_subtitles = self.extract_subtitles(video_id, consumer_secret) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'formats': formats, + 'subtitles': video_subtitles, + } + + def _get_consumer_secret(self, video_id): + df_js = self._download_webpage( + "http://www.dramafever.com/static/126960d/v2/js/plugins/jquery.threadedcomments.js", video_id) + return self._search_regex(r"'cs': '([0-9a-zA-Z]+)'", df_js, "cs") + + def _get_episodes(self, series_id, consumer_secret, episode_filter=None): + _PAGE_SIZE = 60 + + curr_page = 1 + max_pages = curr_page + 1 + results = [] + while max_pages >= curr_page: + page_url = "http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d" % \ + (consumer_secret, series_id, _PAGE_SIZE, curr_page) + series = self._download_json( + page_url, series_id, note="Downloading series json page #%d" % curr_page) + max_pages = series['num_pages'] + results.extend([ep for ep in series['value'] if episode_filter is None or episode_filter(ep)]) + curr_page += 1 + return results + + def _get_subtitles(self, video_id, consumer_secret): + + def match_episode(ep): + return ep['guid'] == video_id + + res = None + info = self._get_episodes( + video_id.split(".")[0], consumer_secret, episode_filter=match_episode) + if len(info) == 1 and info[0]['subfile'] != '': + res = {'en': [{'url': info[0]['subfile'], 'ext': 'srt'}]} + return res + + +class DramaFeverSeriesIE(DramaFeverIE): + IE_NAME = 'dramafever:series' + _VALID_URL = r'^https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+)/\d*[a-zA-Z_][a-zA-Z0-9_]*/' + _TESTS = [{ + 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/', + 'info_dict': { + 'id': '4512', + 'title': 'Cooking with Shin', + 'description': 'Professional chef and cooking instructor Shin Kim takes some of the delicious dishes featured in your favorite dramas and shows you how to make them right at home.', + }, + 'playlist_count': 4, + }, { + 'url': 'http://www.dramafever.com/drama/124/IRIS/', + 'info_dict': { + 'id': '124', + 'title': 'IRIS', + 'description': 'Lee Byung Hun and Kim Tae Hee star in this powerhouse drama and ratings megahit of action, intrigue and romance.', + }, + 'playlist_count': 20, + }] + + def _real_extract(self, url): + series_id = self._match_id(url) + consumer_secret = self._get_consumer_secret(series_id) + + series_json = self._download_json( + "http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s" % (consumer_secret, series_id), + series_id, note='Downloading series metadata')["series"][series_id] + + title = series_json["name"] + description = series_json["description_short"] + + episodes = self._get_episodes(series_id, consumer_secret) + entries = [] + for ep in episodes: + entries.append(self.url_result( + 'http://www.dramafever.com%s' % ep['episode_url'], 'DramaFever', ep['guid'])) + return self.playlist_result(entries, series_id, title, description) From 051df9ad99d0a29d9eb984970e3e431795b6e445 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 20 May 2015 14:08:23 +0800 Subject: [PATCH 0733/2721] [letv/sohu] Skip tests relying on external proxies The proxy is currently broken. See #5655 and zhuzhuor/Unblock-Youku#427 --- youtube_dl/extractor/letv.py | 4 +--- youtube_dl/extractor/sohu.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index 1484ac0d2..da896caf1 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -50,9 +50,7 @@ class LetvIE(InfoExtractor): 'title': '与龙共舞 完整版', 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986', }, - 'params': { - 'cn_verification_proxy': 'http://proxy.uku.im:8888' - }, + 'skip': 'Only available in China', }] @staticmethod diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index eab4adfca..29bd9ce6f 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -23,9 +23,7 @@ class SohuIE(InfoExtractor): 'ext': 'mp4', 'title': 'MV:Far East Movement《The Illest》', }, - 'params': { - 'cn_verification_proxy': 'proxy.uku.im:8888' - } + 'skip': 'On available in China', }, { 'url': 'http://tv.sohu.com/20150305/n409385080.shtml', 'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a', From 137597b0ea88a92d174341b44b8f395b8897a2bf Mon Sep 17 00:00:00 2001 From: ping Date: Wed, 20 May 2015 15:15:28 +0800 Subject: [PATCH 0734/2721] [dramafever] Streamline code --- youtube_dl/extractor/dramafever.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 8fac99cc5..40787ffcd 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -80,12 +80,11 @@ class DramaFeverIE(InfoExtractor): def _get_subtitles(self, video_id, consumer_secret): - def match_episode(ep): - return ep['guid'] == video_id - res = None info = self._get_episodes( - video_id.split(".")[0], consumer_secret, episode_filter=match_episode) + video_id.split(".")[0], consumer_secret, + episode_filter=lambda x: x['guid'] == video_id) + if len(info) == 1 and info[0]['subfile'] != '': res = {'en': [{'url': info[0]['subfile'], 'ext': 'srt'}]} return res From 2632941f327c8b013e5fbc736317fc897876ab73 Mon Sep 17 00:00:00 2001 From: ping Date: Wed, 20 May 2015 15:53:45 +0800 Subject: [PATCH 0735/2721] [soompi] Add new extractor for tv.soompi.com --- youtube_dl/extractor/__init__.py | 4 + youtube_dl/extractor/soompi.py | 130 +++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 youtube_dl/extractor/soompi.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6f8c261d5..2a5cf9547 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -482,6 +482,10 @@ from .smotri import ( from .snotr import SnotrIE from .sockshare import SockshareIE from .sohu import SohuIE +from .soompi import ( + SoompiIE, + SoompiShowIE, +) from .soundcloud import ( SoundcloudIE, SoundcloudSetIE, diff --git a/youtube_dl/extractor/soompi.py b/youtube_dl/extractor/soompi.py new file mode 100644 index 000000000..5ecf40b7f --- /dev/null +++ b/youtube_dl/extractor/soompi.py @@ -0,0 +1,130 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re +import json +import base64 +import xml.etree.ElementTree + +# Soompi uses the same subtitle encryption as crunchyroll +from .crunchyroll import CrunchyrollIE + + +class SoompiIE(CrunchyrollIE): + IE_NAME = 'soompi' + _VALID_URL = r'^https?://tv\.soompi\.com/en/watch/(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://tv.soompi.com/en/watch/23363', + 'info_dict': { + 'id': '23363', + 'ext': 'mp4', + 'title': 'Liar Game CM1', + 'description': '15sec' + }, + 'params': { + 'skip_download': True, + }, + }] + + def _get_episodes(self, webpage, episode_filter=None): + episodes = json.loads( + self._search_regex(r'\s+VIDEOS\s+= (\[.+?\]);', webpage, "episodes meta")) + return [ep for ep in episodes if episode_filter is None or episode_filter(ep)] + + def _get_subtitles(self, video_id, show_format_xml): + subtitles = {} + subtitle_info_nodes = show_format_xml.findall('./{default}preload/subtitles/subtitle') + subtitle_nodes = show_format_xml.findall('./{default}preload/subtitle') + + sub_langs = {} + for i in subtitle_info_nodes: + sub_langs[i.attrib["id"]] = i.attrib["title"] + + for s in subtitle_nodes: + lang_code = sub_langs.get(s.attrib["id"], None) + if lang_code is None: + continue + + sub_id = int(s.attrib["id"]) + iv = base64.b64decode(s.find("iv").text) + data = base64.b64decode(s.find("data").text) + subtitle = self._decrypt_subtitles(data, iv, sub_id).decode('utf-8') + sub_root = xml.etree.ElementTree.fromstring(subtitle) + + subtitles[lang_code] = [{ + 'ext': 'srt', 'data': self._convert_subtitles_to_srt(sub_root) + }, { + 'ext': 'ass', 'data': self._convert_subtitles_to_ass(sub_root) + }] + return subtitles + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + url, video_id, note="Downloading episode page", + errnote="Video may not be available for your location") + vid_formats = re.findall(r"\?quality=q([0-9]+)", webpage) + + show_meta = json.loads( + self._search_regex(r'\s+var show = (\{.+?\});', webpage, "show meta")) + episodes = self._get_episodes( + webpage, episode_filter=lambda x: x['id'] == video_id) + + title = episodes[0]["name"] + description = episodes[0]["description"] + duration = int(episodes[0]["duration"]) + slug = show_meta["slug"] + + formats = [] + show_format_xml = None + for vf in vid_formats: + show_format_url = "http://tv.soompi.com/en/show/%s/%s-config.xml?mode=hls&quality=q%s" \ + % (slug, video_id, vf) + show_format_xml = self._download_xml( + show_format_url, video_id, note="Downloading q%s show xml" % vf) + avail_formats = self._extract_m3u8_formats( + show_format_xml.find('./{default}preload/stream_info/file').text, + video_id, ext="mp4", m3u8_id=vf, preference=int(vf)) + formats.extend(avail_formats) + self._sort_formats(formats) + + subtitles = self.extract_subtitles(video_id, show_format_xml) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles + } + + +class SoompiShowIE(SoompiIE): + IE_NAME = 'soompi:show' + _VALID_URL = r'^https?://tv\.soompi\.com/en/shows/(?P[0-9a-zA-Z\-_]+)' + _TESTS = [{ + 'url': 'http://tv.soompi.com/en/shows/liar-game', + 'info_dict': { + 'id': 'liar-game', + 'title': 'Liar Game', + 'description': 'md5:52c02bce0c1a622a95823591d0589b66', + }, + 'playlist_count': 14, + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + + webpage = self._download_webpage(url, show_id, note="Downloading show page") + title = self._og_search_title(webpage).replace("SoompiTV | ", "") + description = self._og_search_description(webpage) + + episodes = self._get_episodes(webpage) + entries = [] + for ep in episodes: + entries.append(self.url_result( + 'http://tv.soompi.com/en/watch/%s' % ep['id'], 'Soompi', ep['id'])) + + return self.playlist_result(entries, show_id, title, description) From 0b9f7cd074786abafcd35b26db4ecb4d92814393 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 20 May 2015 10:01:48 +0200 Subject: [PATCH 0736/2721] release 2015.05.20 --- docs/supportedsites.md | 5 +++++ youtube_dl/version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 43fbe8b1d..a4879bd9a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -142,6 +142,7 @@ - **Eporner** - **EroProfile** - **Escapist** + - **ESPN** (Currently broken) - **EveryonesMixtape** - **exfm**: ex.fm - **ExpoTV** @@ -338,6 +339,7 @@ - **OktoberfestTV** - **on.aol.com** - **Ooyala** + - **OoyalaExternal** - **OpenFilm** - **orf:fm4**: radio FM4 - **orf:iptv**: iptv.ORF.at @@ -451,6 +453,7 @@ - **Spike** - **Sport5** - **SportBox** + - **SportBoxEmbed** - **SportDeutschland** - **Srf** - **SRMediathek**: Saarländischer Rundfunk @@ -510,6 +513,8 @@ - **Turbo** - **Tutv** - **tv.dfb.de** + - **TV2** + - **TV2Article** - **TV4**: tv4.se and tv4play.se - **tvigle**: Интернет-телевидение Tvigle.ru - **tvp.pl** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 38f00bc9b..b33385153 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.05.15' +__version__ = '2015.05.20' From 5137adb94dcce98a3c14fb3892c5c72f70ff34ea Mon Sep 17 00:00:00 2001 From: ping Date: Wed, 20 May 2015 16:16:10 +0800 Subject: [PATCH 0737/2721] [soompi] Switch to non-geoblocked test video --- youtube_dl/extractor/soompi.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/soompi.py b/youtube_dl/extractor/soompi.py index 5ecf40b7f..4726872dc 100644 --- a/youtube_dl/extractor/soompi.py +++ b/youtube_dl/extractor/soompi.py @@ -14,12 +14,12 @@ class SoompiIE(CrunchyrollIE): IE_NAME = 'soompi' _VALID_URL = r'^https?://tv\.soompi\.com/en/watch/(?P[0-9]+)' _TESTS = [{ - 'url': 'http://tv.soompi.com/en/watch/23363', + 'url': 'http://tv.soompi.com/en/watch/29235', 'info_dict': { - 'id': '23363', + 'id': '29235', 'ext': 'mp4', - 'title': 'Liar Game CM1', - 'description': '15sec' + 'title': 'Episode 1096', + 'description': '2015-05-20' }, 'params': { 'skip_download': True, From b0d619fde2b187f2b36b077a1eb11d766429f88c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 20 May 2015 21:28:04 +0600 Subject: [PATCH 0738/2721] [viki:channel] Extract title from JSON --- youtube_dl/extractor/viki.py | 40 +++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 9bdbdc3e4..fc585c299 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -23,7 +23,7 @@ class VikiIE(InfoExtractor): # iPad2 _USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5' - _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P[0-9]+v)' + _VALID_URL = r'https?://(?:www\.)?viki\.com/videos/(?P[0-9]+v)' _TESTS = [{ 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14', 'info_dict': { @@ -149,7 +149,7 @@ class VikiIE(InfoExtractor): class VikiChannelIE(InfoExtractor): IE_NAME = 'viki:channel' - _VALID_URL = r'^https?://(?:www\.)?viki\.com/tv/(?P[0-9]+c)' + _VALID_URL = r'https?://(?:www\.)?viki\.com/tv/(?P[0-9]+c)' _TESTS = [{ 'url': 'http://www.viki.com/tv/50c-boys-over-flowers', 'info_dict': { @@ -167,27 +167,35 @@ class VikiChannelIE(InfoExtractor): }, 'playlist_count': 127, }] + _API_BASE = 'http://api.viki.io/v4/containers' + _APP = '100000a' _PER_PAGE = 25 def _real_extract(self, url): - show_id = self._match_id(url) - show_page = self._download_webpage(url, show_id, 'Download show page') + channel_id = self._match_id(url) - title = self._og_search_title(show_page) - description = self._og_search_description(show_page) + channel = self._download_json( + '%s/%s.json?app=%s' % (self._API_BASE, channel_id, self._APP), + channel_id, 'Downloading channel JSON') + + titles = channel['titles'] + title = titles.get('en') or titles[titles.keys()[0]] + + descriptions = channel['descriptions'] + description = descriptions.get('en') or descriptions[descriptions.keys()[0]] entries = [] - for video_type in ['episodes', 'clips']: - json_url = 'http://api.viki.io/v4/containers/%s/%s.json?app=100000a&per_page=%d&sort=number&direction=asc&with_paging=true&page=1' % (show_id, video_type, self._PER_PAGE) - while json_url is not None: - show_json = self._download_json( - json_url, show_id, - note='Downloading %s json page #%s' % - (video_type, re.search(r'[?&]page=([0-9]+)', json_url).group(1))) - for video in show_json['response']: + for video_type in ('episodes', 'clips'): + page_url = '%s/%s/%s.json?app=%s&per_page=%d&sort=number&direction=asc&with_paging=true&page=1' % (self._API_BASE, channel_id, video_type, self._APP, self._PER_PAGE) + while page_url: + page = self._download_json( + page_url, channel_id, + 'Downloading %s JSON page #%s' + % (video_type, re.search(r'[?&]page=([0-9]+)', page_url).group(1))) + for video in page['response']: video_id = video['id'] entries.append(self.url_result( 'http://www.viki.com/videos/%s' % video_id, 'Viki', video_id)) - json_url = show_json['pagination']['next'] + page_url = page['pagination']['next'] - return self.playlist_result(entries, show_id, title, description) + return self.playlist_result(entries, channel_id, title, description) From 1a83c731bd58ed85f6f7695cee9c88d09a224bc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 May 2015 01:44:05 +0600 Subject: [PATCH 0739/2721] [viki] Switch extraction to API --- youtube_dl/extractor/viki.py | 230 +++++++++++++++++++++-------------- 1 file changed, 142 insertions(+), 88 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index fc585c299..234649ca8 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -1,29 +1,64 @@ from __future__ import unicode_literals import re +import time +import hmac +import hashlib -from ..compat import ( - compat_urlparse, - compat_urllib_request, -) from ..utils import ( ExtractorError, - unescapeHTML, - unified_strdate, - US_RATINGS, - determine_ext, - mimetype2ext, + int_or_none, + parse_age_limit, + parse_iso8601, ) from .common import InfoExtractor -class VikiIE(InfoExtractor): +class VikiBaseIE(InfoExtractor): + _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com' + _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s' + + _APP = '65535a' + _APP_VERSION = '2.2.5.1428709186' + _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)' + + def _prepare_call(self, path, timestamp=None): + path += '?' if '?' not in path else '&' + if not timestamp: + timestamp = int(time.time()) + query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp) + sig = hmac.new( + self._APP_SECRET.encode('ascii'), + query.encode('ascii'), + hashlib.sha1 + ).hexdigest() + return self._API_URL_TEMPLATE % (query, sig) + + def _call_api(self, path, video_id, note, timestamp=None): + resp = self._download_json( + self._prepare_call(path, timestamp), video_id, note) + + error = resp.get('error') + if error: + if error == 'invalid timestamp': + resp = self._download_json( + self._prepare_call(path, int(resp['current_timestamp'])), + video_id, '%s (retry)' % note) + error = resp.get('error') + if error: + self._raise_error(resp['error']) + + return resp + + def _raise_error(self, error): + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error), + expected=True) + + +class VikiIE(VikiBaseIE): IE_NAME = 'viki' - - # iPad2 - _USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5' - - _VALID_URL = r'https?://(?:www\.)?viki\.com/videos/(?P[0-9]+v)' + _VALID_URL = r'https?://(?:www\.)?viki\.com/(?:videos|player)/(?P[0-9]+v)' _TESTS = [{ 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14', 'info_dict': { @@ -37,115 +72,134 @@ class VikiIE(InfoExtractor): }, 'skip': 'Blocked in the US', }, { + # clip 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference', - 'md5': 'ca6493e6f0a6ec07da9aa8d6304b4b2c', + 'md5': '86c0b5dbd4d83a6611a79987cc7a1989', 'info_dict': { 'id': '1067139v', 'ext': 'mp4', + 'title': "'The Avengers: Age of Ultron' Press Conference", 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea', + 'duration': 352, + 'timestamp': 1430380829, 'upload_date': '20150430', - 'title': '\'The Avengers: Age of Ultron\' Press Conference', + 'uploader': 'Arirang TV', + 'like_count': int, + 'age_limit': 0, } }, { 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi', 'info_dict': { 'id': '1048879v', 'ext': 'mp4', - 'upload_date': '20140820', - 'description': 'md5:54ff56d51bdfc7a30441ec967394e91c', 'title': 'Ankhon Dekhi', + 'duration': 6512, + 'timestamp': 1408532356, + 'upload_date': '20140820', + 'uploader': 'Spuul', + 'like_count': int, + 'age_limit': 13, }, 'params': { - # requires ffmpeg + # m3u8 download 'skip_download': True, } + }, { + # episode + 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', + 'md5': '190f3ef426005ba3a080a63325955bc3', + 'info_dict': { + 'id': '44699v', + 'ext': 'mp4', + 'title': 'Boys Over Flowers - Episode 1', + 'description': 'md5:52617e4f729c7d03bfd4bcbbb6e946f2', + 'duration': 4155, + 'timestamp': 1270496524, + 'upload_date': '20100405', + 'uploader': 'group8', + 'like_count': int, + 'age_limit': 13, + } + }, { + 'url': 'http://www.viki.com/player/44699v', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage) + streams = self._call_api( + 'videos/%s/streams.json' % video_id, video_id, + 'Downloading video streams JSON') - uploader_m = re.search( - r'Broadcast Network: \s*([^<]*)<', webpage) - if uploader_m is None: - uploader = None - else: - uploader = uploader_m.group(1).strip() + formats = [] + for format_id, stream_dict in streams.items(): + height = self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None) + for protocol, format_dict in stream_dict.items(): + if format_id == 'm3u8': + formats = self._extract_m3u8_formats( + format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol) + else: + formats.append({ + 'url': format_dict['url'], + 'format_id': '%s-%s' % (format_id, protocol), + 'height': height, + }) + self._sort_formats(formats) - rating_str = self._html_search_regex( - r'Rating: \s*([^<]*)<', webpage, - 'rating information', default='').strip() - age_limit = US_RATINGS.get(rating_str) + video = self._call_api( + 'videos/%s.json' % video_id, video_id, 'Downloading video JSON') - req = compat_urllib_request.Request( - 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id) - req.add_header('User-Agent', self._USER_AGENT) - info_webpage = self._download_webpage( - req, video_id, note='Downloading info page') - err_msg = self._html_search_regex(r']+class="video-error[^>]+>(.+)
    ', info_webpage, 'error message', default=None) - if err_msg: - if 'not available in your region' in err_msg: - raise ExtractorError( - 'Video %s is blocked from your location.' % video_id, - expected=True) - else: - raise ExtractorError('Viki said: %s %s' % (err_msg, url)) - mobj = re.search( - r']+type="(?P[^"]+)"[^>]+src="(?P[^"]+)"', info_webpage) - if not mobj: - raise ExtractorError('Unable to find video URL') - video_url = unescapeHTML(mobj.group('url')) - video_ext = mimetype2ext(mobj.group('mime_type')) + title = None + titles = video.get('titles') + if titles: + title = titles.get('en') or titles[titles.keys()[0]] + if not title: + title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id + container_titles = video.get('container', {}).get('titles') + if container_titles: + container_title = container_titles.get('en') or container_titles[titles.keys()[0]] + title = '%s - %s' % (container_title, title) - if determine_ext(video_url) == 'm3u8': - formats = self._extract_m3u8_formats( - video_url, video_id, ext=video_ext) - else: - formats = [{ - 'url': video_url, - 'ext': video_ext, - }] + descriptions = video.get('descriptions') + description = descriptions.get('en') or descriptions[titles.keys()[0]] if descriptions else None - upload_date_str = self._html_search_regex( - r'"created_at":"([^"]+)"', info_webpage, 'upload date') - upload_date = ( - unified_strdate(upload_date_str) - if upload_date_str is not None - else None - ) + duration = int_or_none(video.get('duration')) + timestamp = parse_iso8601(video.get('created_at')) + uploader = video.get('author') + like_count = int_or_none(video.get('likes', {}).get('count')) + age_limit = parse_age_limit(video.get('rating')) - # subtitles - video_subtitles = self.extract_subtitles(video_id, info_webpage) + thumbnails = [] + for thumbnail_id, thumbnail in video.get('images', {}).items(): + thumbnails.append({ + 'id': thumbnail_id, + 'url': thumbnail.get('url'), + }) + + subtitles = {} + for subtitle_lang, _ in video.get('subtitle_completions', {}).items(): + subtitles[subtitle_lang] = [{ + 'ext': subtitles_format, + 'url': self._prepare_call( + 'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)), + } for subtitles_format in ('srt', 'vtt')] return { 'id': video_id, 'title': title, - 'formats': formats, 'description': description, - 'thumbnail': thumbnail, - 'age_limit': age_limit, + 'duration': duration, + 'timestamp': timestamp, 'uploader': uploader, - 'subtitles': video_subtitles, - 'upload_date': upload_date, + 'like_count': like_count, + 'age_limit': age_limit, + 'thumbnails': thumbnails, + 'formats': formats, + 'subtitles': subtitles, } - def _get_subtitles(self, video_id, info_webpage): - res = {} - for sturl_html in re.findall(r'[a-z]+)\.vtt', sturl) - if not m: - continue - res[m.group('lang')] = [{ - 'url': compat_urlparse.urljoin('http://www.viki.com', sturl), - 'ext': 'vtt', - }] - return res - class VikiChannelIE(InfoExtractor): IE_NAME = 'viki:channel' From ac20d95f9766aa130748aac07fa90ee5dfa566d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 May 2015 01:56:02 +0600 Subject: [PATCH 0740/2721] [viki] Add support for youtube externals --- youtube_dl/extractor/viki.py | 70 ++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 234649ca8..68d5cac6e 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -120,6 +120,23 @@ class VikiIE(VikiBaseIE): 'like_count': int, 'age_limit': 13, } + }, { + # youtube external + 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1', + 'md5': '216d1afdc0c64d1febc1e9f2bd4b864b', + 'info_dict': { + 'id': '50562v', + 'ext': 'mp4', + 'title': 'Poor Nastya [COMPLETE] - Episode 1', + 'description': '', + 'duration': 607, + 'timestamp': 1274949505, + 'upload_date': '20101213', + 'uploader': 'ad14065n', + 'uploader_id': 'ad14065n', + 'like_count': int, + 'age_limit': 13, + } }, { 'url': 'http://www.viki.com/player/44699v', 'only_matching': True, @@ -128,26 +145,6 @@ class VikiIE(VikiBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - streams = self._call_api( - 'videos/%s/streams.json' % video_id, video_id, - 'Downloading video streams JSON') - - formats = [] - for format_id, stream_dict in streams.items(): - height = self._search_regex( - r'^(\d+)[pP]$', format_id, 'height', default=None) - for protocol, format_dict in stream_dict.items(): - if format_id == 'm3u8': - formats = self._extract_m3u8_formats( - format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol) - else: - formats.append({ - 'url': format_dict['url'], - 'format_id': '%s-%s' % (format_id, protocol), - 'height': height, - }) - self._sort_formats(formats) - video = self._call_api( 'videos/%s.json' % video_id, video_id, 'Downloading video JSON') @@ -186,7 +183,7 @@ class VikiIE(VikiBaseIE): 'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)), } for subtitles_format in ('srt', 'vtt')] - return { + result = { 'id': video_id, 'title': title, 'description': description, @@ -196,10 +193,39 @@ class VikiIE(VikiBaseIE): 'like_count': like_count, 'age_limit': age_limit, 'thumbnails': thumbnails, - 'formats': formats, 'subtitles': subtitles, } + streams = self._call_api( + 'videos/%s/streams.json' % video_id, video_id, + 'Downloading video streams JSON') + + if 'external' in streams: + result.update({ + '_type': 'url_transparent', + 'url': streams['external']['url'], + }) + return result + + formats = [] + for format_id, stream_dict in streams.items(): + height = self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None) + for protocol, format_dict in stream_dict.items(): + if format_id == 'm3u8': + formats = self._extract_m3u8_formats( + format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol) + else: + formats.append({ + 'url': format_dict['url'], + 'format_id': '%s-%s' % (format_id, protocol), + 'height': height, + }) + self._sort_formats(formats) + + result['formats'] = formats + return result + class VikiChannelIE(InfoExtractor): IE_NAME = 'viki:channel' From bc56355ec6bc823fe96e31688cd3123dc18ae627 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 May 2015 02:08:13 +0600 Subject: [PATCH 0741/2721] [viki:channel] Switch to API --- youtube_dl/extractor/viki.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 68d5cac6e..071e280fb 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -4,6 +4,7 @@ import re import time import hmac import hashlib +import itertools from ..utils import ( ExtractorError, @@ -227,7 +228,7 @@ class VikiIE(VikiBaseIE): return result -class VikiChannelIE(InfoExtractor): +class VikiChannelIE(VikiBaseIE): IE_NAME = 'viki:channel' _VALID_URL = r'https?://(?:www\.)?viki\.com/tv/(?P[0-9]+c)' _TESTS = [{ @@ -247,16 +248,15 @@ class VikiChannelIE(InfoExtractor): }, 'playlist_count': 127, }] - _API_BASE = 'http://api.viki.io/v4/containers' - _APP = '100000a' + _PER_PAGE = 25 def _real_extract(self, url): channel_id = self._match_id(url) - channel = self._download_json( - '%s/%s.json?app=%s' % (self._API_BASE, channel_id, self._APP), - channel_id, 'Downloading channel JSON') + channel = self._call_api( + 'containers/%s.json' % channel_id, channel_id, + 'Downloading channel JSON') titles = channel['titles'] title = titles.get('en') or titles[titles.keys()[0]] @@ -266,16 +266,16 @@ class VikiChannelIE(InfoExtractor): entries = [] for video_type in ('episodes', 'clips'): - page_url = '%s/%s/%s.json?app=%s&per_page=%d&sort=number&direction=asc&with_paging=true&page=1' % (self._API_BASE, channel_id, video_type, self._APP, self._PER_PAGE) - while page_url: - page = self._download_json( - page_url, channel_id, - 'Downloading %s JSON page #%s' - % (video_type, re.search(r'[?&]page=([0-9]+)', page_url).group(1))) + for page_num in itertools.count(1): + page = self._call_api( + 'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d' + % (channel_id, video_type, self._PER_PAGE, page_num), channel_id, + 'Downloading %s JSON page #%d' % (video_type, page_num)) for video in page['response']: video_id = video['id'] entries.append(self.url_result( - 'http://www.viki.com/videos/%s' % video_id, 'Viki', video_id)) - page_url = page['pagination']['next'] + 'http://www.viki.com/videos/%s' % video_id, 'Viki')) + if not page['pagination']['next']: + break return self.playlist_result(entries, channel_id, title, description) From d01924f48810db69d572bc121ab98021f04ac957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 May 2015 02:30:04 +0600 Subject: [PATCH 0742/2721] [viki:channel] Extend matching URLs and extract movies --- youtube_dl/extractor/viki.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 071e280fb..3acb481f9 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -230,7 +230,7 @@ class VikiIE(VikiBaseIE): class VikiChannelIE(VikiBaseIE): IE_NAME = 'viki:channel' - _VALID_URL = r'https?://(?:www\.)?viki\.com/tv/(?P[0-9]+c)' + _VALID_URL = r'https?://(?:www\.)?viki\.com/(?:tv|news|movies|artists)/(?P[0-9]+c)' _TESTS = [{ 'url': 'http://www.viki.com/tv/50c-boys-over-flowers', 'info_dict': { @@ -247,6 +247,15 @@ class VikiChannelIE(VikiBaseIE): 'description': 'md5:05bf5471385aa8b21c18ad450e350525', }, 'playlist_count': 127, + }, { + 'url': 'http://www.viki.com/news/24569c-showbiz-korea', + 'only_matching': True, + }, { + 'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005', + 'only_matching': True, + }, { + 'url': 'http://www.viki.com/artists/2141c-shinee', + 'only_matching': True, }] _PER_PAGE = 25 @@ -265,7 +274,7 @@ class VikiChannelIE(VikiBaseIE): description = descriptions.get('en') or descriptions[descriptions.keys()[0]] entries = [] - for video_type in ('episodes', 'clips'): + for video_type in ('episodes', 'clips', 'movies'): for page_num in itertools.count(1): page = self._call_api( 'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d' From 4d8ee01389c4229f14fad45f0aa7b033a2509aef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 May 2015 02:38:43 +0600 Subject: [PATCH 0743/2721] [viki] Fix typo --- youtube_dl/extractor/viki.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 3acb481f9..0ec8ef0ef 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -157,7 +157,7 @@ class VikiIE(VikiBaseIE): title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id container_titles = video.get('container', {}).get('titles') if container_titles: - container_title = container_titles.get('en') or container_titles[titles.keys()[0]] + container_title = container_titles.get('en') or container_titles[container_titles.keys()[0]] title = '%s - %s' % (container_title, title) descriptions = video.get('descriptions') From 4d2f42361e02bb67de7c2017c6817b46ff3b2bd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 21 May 2015 11:42:20 +0200 Subject: [PATCH 0744/2721] [viki] remove unused import --- youtube_dl/extractor/viki.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 0ec8ef0ef..fe7229952 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import re import time import hmac import hashlib From e7752cd57853718c6875b02517613d14c4c7221d Mon Sep 17 00:00:00 2001 From: frenchy1983 Date: Thu, 21 May 2015 11:47:16 +0200 Subject: [PATCH 0745/2721] [TNAFlix] Allow dot (and more) in cat_id and display_id URLs with dots were raising a "UnsupportedError: Unsupported URL" error. --- youtube_dl/extractor/tnaflix.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index d48cbbf14..725edd3c7 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -10,23 +10,23 @@ from ..utils import ( class TNAFlixIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P[\w-]+)/(?P[\w-]+)/video(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P[^/]+)/(?P[^/]+)/video(?P\d+)' _TITLE_REGEX = r'(.+?) - TNAFlix Porn Videos' _DESCRIPTION_REGEX = r'

    ([^<]+)

    ' _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"' _TEST = { - 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878', - 'md5': 'ecf3498417d09216374fc5907f9c6ec0', + 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632', + 'md5': '6c431ea56756497e227fb3f01a687869', 'info_dict': { - 'id': '553878', - 'display_id': 'Carmella-Decesare-striptease', + 'id': '358632', + 'display_id': 'bunzHD-Ms.Donk', 'ext': 'mp4', - 'title': 'Carmella Decesare - striptease', - 'description': '', + 'title': 'bunzHD Ms.Donk', + 'description': 'bubble booty ebony teen goddess Ms.Donk has a firm ass and acts like she is shy but really she is a freak in the sheets watch her 20 min XX rated vid at bunzHD.com click on the catalog link', 'thumbnail': 're:https?://.*\.jpg$', - 'duration': 91, + 'duration': 394, 'age_limit': 18, } } From 6ad9cb224a7d9156109fe0b0100d277b954063d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 21 May 2015 12:02:53 +0200 Subject: [PATCH 0746/2721] [mitele] It now uses m3u8 (#5764) It should also be possible to use Adobe HDS, but it would require more work. --- youtube_dl/extractor/mitele.py | 9 +++++++-- youtube_dl/extractor/telecinco.py | 4 ++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index d8897eb90..7091f3335 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -20,7 +20,6 @@ class MiTeleIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', - 'md5': '6a75fe9d0d3275bead0cb683c616fddb', 'info_dict': { 'id': '0fce117d', 'ext': 'mp4', @@ -29,6 +28,10 @@ class MiTeleIE(InfoExtractor): 'display_id': 'programa-144', 'duration': 2913, }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -56,12 +59,14 @@ class MiTeleIE(InfoExtractor): episode, transform_source=strip_jsonp ) + formats = self._extract_m3u8_formats( + token_info['tokenizedUrl'], episode, ext='mp4') return { 'id': embed_data['videoId'], 'display_id': episode, 'title': info_el.find('title').text, - 'url': token_info['tokenizedUrl'], + 'formats': formats, 'description': get_element_by_attribute('class', 'text', webpage), 'thumbnail': info_el.find('thumb').text, 'duration': parse_duration(info_el.find('duration').text), diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index 251a68680..a0c744fd1 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -16,6 +16,10 @@ class TelecincoIE(MiTeleIE): 'title': 'Con Martín Berasategui, hacer un bacalao al ...', 'duration': 662, }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html', 'only_matching': True, From 663004ac2b001b9be03bd951d539a62cf83c58ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 May 2015 22:06:25 +0600 Subject: [PATCH 0747/2721] [options] Clarify `--metadata-from-title` additional templates --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 22dbc3aec..dd07266b7 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -713,7 +713,7 @@ def parseOpts(overrideArguments=None): help='Parse additional metadata like song title / artist from the video title. ' 'The format syntax is the same as --output, ' 'the parsed parameters replace existing values. ' - 'Additional templates: %(album), %(artist). ' + 'Additional templates: %(album)s, %(artist)s. ' 'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like ' '"Coldplay - Paradise"') postproc.add_option( From 53de95da5e40aa1a465668977e507ccc914099f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 May 2015 22:27:22 +0600 Subject: [PATCH 0748/2721] [viki] Extend _VALID_URLs --- youtube_dl/extractor/viki.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index fe7229952..7f2fb1ca8 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -15,6 +15,7 @@ from .common import InfoExtractor class VikiBaseIE(InfoExtractor): + _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/' _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com' _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s' @@ -58,7 +59,7 @@ class VikiBaseIE(InfoExtractor): class VikiIE(VikiBaseIE): IE_NAME = 'viki' - _VALID_URL = r'https?://(?:www\.)?viki\.com/(?:videos|player)/(?P[0-9]+v)' + _VALID_URL = r'%s(?:videos|player)/(?P[0-9]+v)' % VikiBaseIE._VALID_URL_BASE _TESTS = [{ 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14', 'info_dict': { @@ -229,7 +230,7 @@ class VikiIE(VikiBaseIE): class VikiChannelIE(VikiBaseIE): IE_NAME = 'viki:channel' - _VALID_URL = r'https?://(?:www\.)?viki\.com/(?:tv|news|movies|artists)/(?P[0-9]+c)' + _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P[0-9]+c)' % VikiBaseIE._VALID_URL_BASE _TESTS = [{ 'url': 'http://www.viki.com/tv/50c-boys-over-flowers', 'info_dict': { From 5cd47a5e4f54033bcf6d80908e00eff4c75a51c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 21 May 2015 23:58:46 +0600 Subject: [PATCH 0749/2721] [videott] Fix for python 3.2 --- youtube_dl/extractor/videott.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py index ececc7ee0..591024ead 100644 --- a/youtube_dl/extractor/videott.py +++ b/youtube_dl/extractor/videott.py @@ -43,7 +43,7 @@ class VideoTtIE(InfoExtractor): formats = [ { - 'url': base64.b64decode(res['u']).decode('utf-8'), + 'url': base64.b64decode(res['u'].encode('utf-8')).decode('utf-8'), 'ext': 'flv', 'format_id': res['l'], } for res in settings['res'] if res['u'] From 06947add03b6b619292812a771993d3365b0e7e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 May 2015 00:03:47 +0600 Subject: [PATCH 0750/2721] [chilloutzone] Fix for python 3.2 --- youtube_dl/extractor/chilloutzone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py index c922f6959..0206d96db 100644 --- a/youtube_dl/extractor/chilloutzone.py +++ b/youtube_dl/extractor/chilloutzone.py @@ -57,7 +57,7 @@ class ChilloutzoneIE(InfoExtractor): base64_video_info = self._html_search_regex( r'var cozVidData = "(.+?)";', webpage, 'video data') - decoded_video_info = base64.b64decode(base64_video_info).decode("utf-8") + decoded_video_info = base64.b64decode(base64_video_info.encode('utf-8')).decode('utf-8') video_info_dict = json.loads(decoded_video_info) # get video information from dict From 878563c847fa5248eedbd44187536dec04643eaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 May 2015 00:06:10 +0600 Subject: [PATCH 0751/2721] [aes] Fix for python 3.2 --- youtube_dl/aes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index 07224d508..7817adcfd 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -152,7 +152,7 @@ def aes_decrypt_text(data, password, key_size_bytes): """ NONCE_LENGTH_BYTES = 8 - data = bytes_to_intlist(base64.b64decode(data)) + data = bytes_to_intlist(base64.b64decode(data.encode('utf-8'))) password = bytes_to_intlist(password.encode('utf-8')) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) From afe8b594be53161f68189e15a65b4e9c6eba0b35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 May 2015 00:09:15 +0600 Subject: [PATCH 0752/2721] [rtve.es:alacarta] Fix for python 3.2 --- youtube_dl/extractor/rtve.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 849300140..82cd98ac7 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -17,7 +17,7 @@ from ..utils import ( def _decrypt_url(png): - encrypted_data = base64.b64decode(png) + encrypted_data = base64.b64decode(png.encode('utf-8')) text_index = encrypted_data.find(b'tEXt') text_chunk = encrypted_data[text_index - 4:] length = struct_unpack('!I', text_chunk[:4])[0] From 43150d7ac36efda7bc60c694b8a18e1f720da04b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 May 2015 00:10:05 +0600 Subject: [PATCH 0753/2721] [shared] Fix for python 3.2 --- youtube_dl/extractor/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 26ced716e..9f3e944e7 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -47,7 +47,7 @@ class SharedIE(InfoExtractor): video_url = self._html_search_regex( r'data-url="([^"]+)"', video_page, 'video URL') title = base64.b64decode(self._html_search_meta( - 'full:title', webpage, 'title')).decode('utf-8') + 'full:title', webpage, 'title').encode('utf-8')).decode('utf-8') filesize = int_or_none(self._html_search_meta( 'full:size', webpage, 'file size', fatal=False)) thumbnail = self._html_search_regex( From 0459432d962bf358566340eed00f6c1c56b7b732 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 May 2015 00:10:53 +0600 Subject: [PATCH 0754/2721] [shared] Fix for python 3.2 --- youtube_dl/extractor/tutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py index 4de0aac52..fad720b68 100644 --- a/youtube_dl/extractor/tutv.py +++ b/youtube_dl/extractor/tutv.py @@ -26,7 +26,7 @@ class TutvIE(InfoExtractor): data_content = self._download_webpage( 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info') - video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8') + video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0].encode('utf-8')).decode('utf-8') return { 'id': internal_id, From 77d9cb2f04462677f2a36f487c20e7a7992a0a32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 22 May 2015 00:45:33 +0600 Subject: [PATCH 0755/2721] [sportbox] Fix extraction --- youtube_dl/extractor/sportbox.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 8686f9d11..2ab3489e4 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -7,7 +7,7 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( parse_duration, - parse_iso8601, + unified_strdate, ) @@ -20,11 +20,9 @@ class SportBoxIE(InfoExtractor): 'id': '80822', 'ext': 'mp4', 'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн', - 'description': 'md5:81715fa9c4ea3d9e7915dc8180c778ed', + 'description': 'md5:3d72dc4a006ab6805d82f037fdc637ad', 'thumbnail': 're:^https?://.*\.jpg$', - 'timestamp': 1411896237, 'upload_date': '20140928', - 'duration': 4846, }, 'params': { # m3u8 download @@ -48,17 +46,13 @@ class SportBoxIE(InfoExtractor): r'src="/?(vdl/player/[^"]+)"', webpage, 'player') title = self._html_search_regex( - r'

    ([^<]+)

    ', webpage, 'title') - description = self._html_search_regex( - r'(?s)
    (.+?)
    ', - webpage, 'description', fatal=False) + [r'"nodetitle"\s*:\s*"([^"]+)"', r'class="node-header_{1,2}title">([^<]+)'], + webpage, 'title') + description = self._og_search_description(webpage) or self._html_search_meta( + 'description', webpage, 'description') thumbnail = self._og_search_thumbnail(webpage) - timestamp = parse_iso8601(self._search_regex( - r'([^<]+)', - webpage, 'timestamp', fatal=False)) - duration = parse_duration(self._html_search_regex( - r'', - webpage, 'duration', fatal=False)) + upload_date = unified_strdate(self._html_search_meta( + 'dateCreated', webpage, 'upload date')) return { '_type': 'url_transparent', @@ -67,8 +61,7 @@ class SportBoxIE(InfoExtractor): 'title': title, 'description': description, 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, + 'upload_date': upload_date, } From 8a278a1d7ef6134a5ac6b7dd31e3458d05f71225 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 22 May 2015 13:26:50 +0800 Subject: [PATCH 0756/2721] [nba] Fix duration extraction (fixes #5777) --- youtube_dl/extractor/nba.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index 862b706bf..944096e1c 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -22,6 +22,18 @@ class NBAIE(InfoExtractor): }, { 'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/', 'only_matching': True, + }, { + 'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba', + 'info_dict': { + 'id': '0041400301-cle-atl-recap.nba', + 'ext': 'mp4', + 'title': 'NBA GAME TIME | Video: Hawks vs. Cavaliers Game 1', + 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d', + 'duration': 228, + }, + 'params': { + 'skip_download': True, + } }] def _real_extract(self, url): @@ -35,8 +47,12 @@ class NBAIE(InfoExtractor): self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com') description = self._og_search_description(webpage) - duration = parse_duration( - self._html_search_meta('duration', webpage, 'duration')) + duration_str = self._html_search_meta( + 'duration', webpage, 'duration', default=None) + if not duration_str: + duration_str = self._html_search_regex( + r'Duration:\s*(\d+:\d+)', webpage, 'duration', fatal=False) + duration = parse_duration(duration_str) return { 'id': shortened_video_id, From ed5a637d62e8ede4a8cef75df4e5f341e3c667a1 Mon Sep 17 00:00:00 2001 From: frenchy1983 Date: Fri, 22 May 2015 09:29:35 +0200 Subject: [PATCH 0757/2721] [TNAFlix] Restore test See dstftw's comment in #5772 --- youtube_dl/extractor/empflix.py | 26 ++++++++++++++------------ youtube_dl/extractor/tnaflix.py | 32 +++++++++++++++++++------------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py index 70f8efe27..0dc947c1d 100644 --- a/youtube_dl/extractor/empflix.py +++ b/youtube_dl/extractor/empflix.py @@ -10,16 +10,18 @@ class EMPFlixIE(TNAFlixIE): _DESCRIPTION_REGEX = r'name="description" value="([^"]*)"' _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"' - _TEST = { - 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', - 'md5': 'b1bc15b6412d33902d6e5952035fcabc', - 'info_dict': { - 'id': '33051', - 'display_id': 'Amateur-Finger-Fuck', - 'ext': 'mp4', - 'title': 'Amateur Finger Fuck', - 'description': 'Amateur solo finger fucking.', - 'thumbnail': 're:https?://.*\.jpg$', - 'age_limit': 18, + _TESTS = [ + { + 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', + 'md5': 'b1bc15b6412d33902d6e5952035fcabc', + 'info_dict': { + 'id': '33051', + 'display_id': 'Amateur-Finger-Fuck', + 'ext': 'mp4', + 'title': 'Amateur Finger Fuck', + 'description': 'Amateur solo finger fucking.', + 'thumbnail': 're:https?://.*\.jpg$', + 'age_limit': 18, + } } - } + ] diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 725edd3c7..79496039d 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -16,20 +16,26 @@ class TNAFlixIE(InfoExtractor): _DESCRIPTION_REGEX = r'

    ([^<]+)

    ' _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"' - _TEST = { - 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632', - 'md5': '6c431ea56756497e227fb3f01a687869', - 'info_dict': { - 'id': '358632', - 'display_id': 'bunzHD-Ms.Donk', - 'ext': 'mp4', - 'title': 'bunzHD Ms.Donk', - 'description': 'bubble booty ebony teen goddess Ms.Donk has a firm ass and acts like she is shy but really she is a freak in the sheets watch her 20 min XX rated vid at bunzHD.com click on the catalog link', - 'thumbnail': 're:https?://.*\.jpg$', - 'duration': 394, - 'age_limit': 18, + _TESTS = [ + { + 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878', + 'md5': 'ecf3498417d09216374fc5907f9c6ec0', + 'info_dict': { + 'id': '553878', + 'display_id': 'Carmella-Decesare-striptease', + 'ext': 'mp4', + 'title': 'Carmella Decesare - striptease', + 'description': '', + 'thumbnail': 're:https?://.*\.jpg$', + 'duration': 91, + 'age_limit': 18, + } + }, + { + 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632', + 'matching_only': True, } - } + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From ba6454761687e099f960b50cb50a9b87f4ec6d17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 22 May 2015 11:35:09 +0200 Subject: [PATCH 0758/2721] [sportbox] Remove unused import --- youtube_dl/extractor/sportbox.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 2ab3489e4..86d509ae5 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -6,7 +6,6 @@ import re from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( - parse_duration, unified_strdate, ) From 79979c689713fd28e8fdf08bd71eecb6798f23d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 22 May 2015 16:14:55 +0200 Subject: [PATCH 0759/2721] Clarify that --dump-pages encodes the pages using base64 (#5781) --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index dd07266b7..5a2315bd9 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -537,7 +537,7 @@ def parseOpts(overrideArguments=None): verbosity.add_option( '--dump-pages', '--dump-intermediate-pages', action='store_true', dest='dump_intermediate_pages', default=False, - help='Print downloaded pages to debug problems (very verbose)') + help='Print downloaded pages encoded using base64 to debug problems (very verbose)') verbosity.add_option( '--write-pages', action='store_true', dest='write_pages', default=False, From 69e0f1b445388e4b6f45868d53780d6f8937f56e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 May 2015 00:08:10 +0600 Subject: [PATCH 0760/2721] Credit @ping for viki:channel, qqmusic:toplist --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 267b8da1e..ebed7ebb3 100644 --- a/AUTHORS +++ b/AUTHORS @@ -124,3 +124,4 @@ Mohammad Teimori Pabandi Roman Le Négrate Matthias Küch Julian Richen +Ping O. From 685c74d315a54154c5a1d9ecee8b212dbee94bc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 May 2015 01:01:47 +0600 Subject: [PATCH 0761/2721] [rutv] Extend embed URL (Closes #5782) --- youtube_dl/extractor/rutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index 55604637d..d9df06861 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -104,7 +104,7 @@ class RUTVIE(InfoExtractor): @classmethod def _extract_url(cls, webpage): mobj = re.search( - r']+?src=(["\'])(?Phttps?://player\.rutv\.ru/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage) + r']+?src=(["\'])(?Phttps?://player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage) if mobj: return mobj.group('url') From d386878af96f368ba4c2fc8bc9b078a69b79fdf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 May 2015 21:25:53 +0600 Subject: [PATCH 0762/2721] [prosiebensat1] Add support for .at domain names (Closes #5786) --- youtube_dl/extractor/prosiebensat1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 7cc799664..255d4abc1 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -17,7 +17,7 @@ from ..utils import ( class ProSiebenSat1IE(InfoExtractor): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' - _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|ran|the-voice-of-germany)\.de|fem\.com)/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at)|ran\.de|fem\.com)/(?P.+)' _TESTS = [ { From abca34cbc04693662d913e19634c06c214a237f6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 24 May 2015 02:04:02 +0800 Subject: [PATCH 0763/2721] [cnn] Relax _VALID_URL again (fixes #5737) The problem is the same as test:CNN_1, so I didn't add the test case --- youtube_dl/extractor/cnn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index 5efc5f4fe..3b1bd4033 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -12,7 +12,7 @@ from ..utils import ( class CNNIE(InfoExtractor): _VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/ - (?P.+?/(?P[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))''' + (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))''' _TESTS = [{ 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', From 9bf87ae3aaac81df3efb92fd0a3247ccb522de2a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 24 May 2015 02:36:47 +0800 Subject: [PATCH 0764/2721] [nextmedia] Merge AppleDailyRealtimeNewsIE and AppleDailyAnimationNewsIE --- youtube_dl/extractor/__init__.py | 3 +-- youtube_dl/extractor/nextmedia.py | 37 +++++++++++++------------------ 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 24efb7ce5..8bb3926a0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -338,8 +338,7 @@ from .newstube import NewstubeIE from .nextmedia import ( NextMediaIE, NextMediaActionNewsIE, - AppleDailyRealtimeNewsIE, - AppleDailyAnimationNewsIE + AppleDailyIE, ) from .nfb import NFBIE from .nfl import NFLIE diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index 02dba4ef6..c75ccafc7 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -89,8 +89,8 @@ class NextMediaActionNewsIE(NextMediaIE): return self._extract_from_nextmedia_page(news_id, url, article_page) -class AppleDailyRealtimeNewsIE(NextMediaIE): - _VALID_URL = r'http://(www|ent).appledaily.com.tw/(realtimenews|enews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' +class AppleDailyIE(NextMediaIE): + _VALID_URL = r'http://(www|ent).appledaily.com.tw/(animation|realtimenews|enews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', @@ -99,7 +99,7 @@ class AppleDailyRealtimeNewsIE(NextMediaIE): 'ext': 'mp4', 'title': '周亭羽走過摩鐵陰霾2男陪吃 九把刀孤寒看醫生', 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'md5:b23787119933404ce515c6356a8c355c', + 'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4', 'upload_date': '20150128', } }, { @@ -110,26 +110,10 @@ class AppleDailyRealtimeNewsIE(NextMediaIE): 'ext': 'mp4', 'title': '不滿被踩腳 山東兩大媽一路打下車', 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'md5:2648aaf6fc4f401f6de35a91d111aa1d', + 'description': 'md5:175b4260c1d7c085993474217e4ab1b4', 'upload_date': '20150128', } - }] - - _URL_PATTERN = r'\{url: \'(.+)\'\}' - - def _fetch_title(self, page): - return self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title') - - def _fetch_thumbnail(self, page): - return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False) - - def _fetch_timestamp(self, page): - return None - - -class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE): - _VALID_URL = 'http://www.appledaily.com.tw/animation/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' - _TESTS = [{ + }, { 'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671', 'md5': '03df296d95dedc2d5886debbb80cb43f', 'info_dict': { @@ -156,8 +140,17 @@ class AppleDailyAnimationNewsIE(AppleDailyRealtimeNewsIE): ] }] + _URL_PATTERN = r'\{url: \'(.+)\'\}' + def _fetch_title(self, page): - return self._html_search_meta('description', page, 'news title') + return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None) or + self._html_search_meta('description', page, 'news title')) + + def _fetch_thumbnail(self, page): + return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False) + + def _fetch_timestamp(self, page): + return None def _fetch_description(self, page): return self._html_search_meta('description', page, 'news description') From 30455ce2554d00489901a398d457fac89456fe49 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 24 May 2015 02:42:01 +0800 Subject: [PATCH 0765/2721] [nextmedia] Extend and reorder _VALID_URL --- youtube_dl/extractor/nextmedia.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index c75ccafc7..d1b7cff4c 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -90,7 +90,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): - _VALID_URL = r'http://(www|ent).appledaily.com.tw/(animation|realtimenews|enews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' + _VALID_URL = r'http://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', @@ -138,6 +138,9 @@ class AppleDailyIE(NextMediaIE): 'expected_warnings': [ 'video thumbnail', ] + }, { + 'url': 'http://www.appledaily.com.tw/appledaily/article/supplement/20140417/35770334/', + 'only_matching': True, }] _URL_PATTERN = r'\{url: \'(.+)\'\}' From 1335c3aca8f0cbddc0c521c73579eec2b9a5643c Mon Sep 17 00:00:00 2001 From: Sergey M? <dstftw@gmail.com> Date: Sun, 24 May 2015 01:21:18 +0600 Subject: [PATCH 0766/2721] [drtv] Improve extraction (Closes #5792) --- youtube_dl/extractor/drtv.py | 37 +++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index f25ab319e..baa24c6d1 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -1,8 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor, ExtractorError -from ..utils import parse_iso8601 +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + parse_iso8601, +) class DRTVIE(InfoExtractor): @@ -60,19 +63,31 @@ class DRTVIE(InfoExtractor): restricted_to_denmark = asset['RestrictedToDenmark'] spoken_subtitles = asset['Target'] == 'SpokenSubtitles' for link in asset['Links']: - target = link['Target'] uri = link['Uri'] + target = link['Target'] format_id = target - preference = -1 if target == 'HDS' else -2 + preference = None if spoken_subtitles: - preference -= 2 + preference = -1 format_id += '-spoken-subtitles' - formats.append({ - 'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri, - 'format_id': format_id, - 'ext': link['FileFormat'], - 'preference': preference, - }) + if target == 'HDS': + formats.extend(self._extract_f4m_formats( + uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43', + video_id, preference, f4m_id=format_id)) + elif target == 'HLS': + formats.extend(self._extract_m3u8_formats( + uri, video_id, 'mp4', preference=preference, + m3u8_id=format_id)) + else: + bitrate = link.get('Bitrate') + if bitrate: + format_id += '-%s' % bitrate + formats.append({ + 'url': uri, + 'format_id': format_id, + 'tbr': bitrate, + 'ext': link.get('FileFormat'), + }) subtitles_list = asset.get('SubtitlesList') if isinstance(subtitles_list, list): LANGS = { From 71646e465348b25962a15f9a567f134514bde30a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 24 May 2015 04:14:01 +0600 Subject: [PATCH 0767/2721] [YoutubeDL] Initialize `files_to_delete` (Closes #5797) --- youtube_dl/YoutubeDL.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 58b34e087..d1953c18f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1527,6 +1527,7 @@ class YoutubeDL(object): pps_chain.extend(ie_info['__postprocessors']) pps_chain.extend(self._pps) for pp in pps_chain: + files_to_delete = [] try: files_to_delete, info = pp.run(info) except PostProcessingError as e: From 1807ae22dd93646ea4d7ba4bd28087bf1ef4857c Mon Sep 17 00:00:00 2001 From: WassimAttar <wassim.attar@free.fr> Date: Sun, 24 May 2015 10:37:05 +0200 Subject: [PATCH 0768/2721] chmod error After installing youtube-dl with this method sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo chmod a+xr /usr/local/bin/youtube-dl When i try to use it, i get this error python: can't open file '/usr/local/bin/youtube-dl': [Errno 13] Permission denied The correct chmod is a+xr --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3d9436456..a29cccb3f 100644 --- a/README.md +++ b/README.md @@ -17,12 +17,12 @@ youtube-dl - download videos from youtube.com or other video platforms To install it right away for all UNIX users (Linux, OS X, etc.), type: sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl - sudo chmod a+x /usr/local/bin/youtube-dl + sudo chmod a+xr /usr/local/bin/youtube-dl If you do not have curl, you can alternatively use a recent wget: sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl - sudo chmod a+x /usr/local/bin/youtube-dl + sudo chmod a+xr /usr/local/bin/youtube-dl Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29). From 23905927e19280d9217ecad377ef26ea9d5793fe Mon Sep 17 00:00:00 2001 From: "Sergey M." <dstftw@gmail.com> Date: Sun, 24 May 2015 18:32:04 +0600 Subject: [PATCH 0769/2721] [README.md] Keep more idiomatic rwx order --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a29cccb3f..e51bb5343 100644 --- a/README.md +++ b/README.md @@ -17,12 +17,12 @@ youtube-dl - download videos from youtube.com or other video platforms To install it right away for all UNIX users (Linux, OS X, etc.), type: sudo curl https://yt-dl.org/latest/youtube-dl -o /usr/local/bin/youtube-dl - sudo chmod a+xr /usr/local/bin/youtube-dl + sudo chmod a+rx /usr/local/bin/youtube-dl If you do not have curl, you can alternatively use a recent wget: sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl - sudo chmod a+xr /usr/local/bin/youtube-dl + sudo chmod a+rx /usr/local/bin/youtube-dl Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29). From abac15f3c6915d176c37f7aa748b8a0f03db82a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 24 May 2015 19:08:22 +0600 Subject: [PATCH 0770/2721] [tnaflix] Do not capture `cat_id` --- youtube_dl/extractor/tnaflix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 79496039d..59af9aba0 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -10,7 +10,7 @@ from ..utils import ( class TNAFlixIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[^/]+)/(?P<display_id>[^/]+)/video(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)' _TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos' _DESCRIPTION_REGEX = r'

    ([^<]+)

    ' From 34fb7e46ad3fa1a04635fa4876401aac881bb39b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 24 May 2015 19:10:03 +0600 Subject: [PATCH 0771/2721] [empflix] Relax _VALID_URL --- youtube_dl/extractor/empflix.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py index 0dc947c1d..9a5a8f4bb 100644 --- a/youtube_dl/extractor/empflix.py +++ b/youtube_dl/extractor/empflix.py @@ -4,7 +4,7 @@ from .tnaflix import TNAFlixIE class EMPFlixIE(TNAFlixIE): - _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P[0-9a-zA-Z-]+)-(?P[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P.+?)-(?P[0-9]+)\.html' _TITLE_REGEX = r'name="title" value="(?P[^"]*)"' _DESCRIPTION_REGEX = r'name="description" value="([^"]*)"' @@ -23,5 +23,9 @@ class EMPFlixIE(TNAFlixIE): 'thumbnail': 're:https?://.*\.jpg$', 'age_limit': 18, } + }, + { + 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html', + 'matching_only': True, } ] From d78c834ead934e5532d2f5bc221bb11eedaef0e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 24 May 2015 20:04:13 +0600 Subject: [PATCH 0772/2721] [karrierevideos] Improve and simplify --- youtube_dl/extractor/karrierevideos.py | 96 +++++++++++++++++++------- 1 file changed, 71 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py index a05e8ab76..bed94bc93 100644 --- a/youtube_dl/extractor/karrierevideos.py +++ b/youtube_dl/extractor/karrierevideos.py @@ -1,50 +1,96 @@ +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + fix_xml_ampersands, + float_or_none, + xpath_with_ns, + xpath_text, +) class KarriereVideosIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?karrierevideos\.at/berufsvideos/([a-z-]+)/(?P<id>[a-z-]+)' - _TEST = { + _VALID_URL = r'http://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)' + _TESTS = [{ 'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin', 'info_dict': { - 'id': 'altenpflegerin', - 'ext': 'mp4', + 'id': '32c91', + 'ext': 'flv', 'title': 'AltenpflegerIn', - 'thumbnail': 're:^http://.*\.png\?v=[0-9]+', - 'description': 'md5:dbadd1259fde2159a9b28667cb664ae2' + 'description': 'md5:dbadd1259fde2159a9b28667cb664ae2', + 'thumbnail': 're:^http://.*\.png', }, 'params': { - 'skip_download': 'requires rtmpdump' + # rtmp download + 'skip_download': True, } - } + }, { + # broken ampersands + 'url': 'http://www.karrierevideos.at/orientierung/vaeterkarenz-und-neue-chancen-fuer-muetter-baby-was-nun', + 'info_dict': { + 'id': '5sniu', + 'ext': 'flv', + 'title': 'Väterkarenz und neue Chancen für Mütter - "Baby - was nun?"', + 'description': 'md5:97092c6ad1fd7d38e9d6a5fdeb2bcc33', + 'thumbnail': 're:^http://.*\.png', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }] def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = (self._html_search_meta('title', webpage, default=None) or + self._search_regex(r'<h1 class="title">([^<]+)</h1>')) + + video_id = self._search_regex( + r'/config/video/(.+?)\.xml', webpage, 'video id') + playlist = self._download_xml( + 'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id, + video_id, transform_source=fix_xml_ampersands) + + NS_MAP = { + 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' + } + + def ns(path): + return xpath_with_ns(path, NS_MAP) + + item = playlist.find('./tracklist/item') + video_file = xpath_text( + item, ns('./jwplayer:file'), 'video url', fatal=True) + streamer = xpath_text( + item, ns('./jwplayer:streamer'), 'streamer', fatal=True) + + uploader = xpath_text( + item, ns('./jwplayer:author'), 'uploader') + duration = float_or_none( + xpath_text(item, ns('./jwplayer:duration'), 'duration')) + description = self._html_search_regex( - r'<div class="leadtext">\n{0,}?\s{0,}<p>(.*?)</p>', + r'(?s)<div class="leadtext">(.+?)</div>', webpage, 'description') - playlist = self._html_search_regex(r'/config/video/(.*?)\.xml', webpage, 'playlist') - playlist = self._download_xml( - 'http://www.karrierevideos.at/player-playlist.xml.php?p=' + playlist, - video_id) - - namespace = 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' - - item = playlist.find('tracklist/item') - streamer = item.find('{%s}streamer' % namespace).text + thumbnail = self._html_search_meta( + 'thumbnail', webpage, 'thumbnail') + if thumbnail: + thumbnail = compat_urlparse.urljoin(url, thumbnail) return { 'id': video_id, - 'title': self._html_search_meta('title', webpage), + 'url': streamer.replace('rtmpt', 'rtmp'), + 'play_path': 'mp4:%s' % video_file, + 'ext': 'flv', + 'title': title, 'description': description, - 'thumbnail': 'http://www.karrierevideos.at' + self._html_search_meta('thumbnail', webpage), - 'protocol': 'rtmp', - 'url': streamer.replace('rtmpt', 'http'), - 'play_path': 'mp4:' + item.find('{%s}file' % namespace).text, - 'tc_url': streamer, - 'ext': 'mp4' + 'thumbnail': thumbnail, + 'uploader': uploader, + 'duration': duration, } From 63f3cab4aee5da45dc9b91a9661d5d52b5a72ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 24 May 2015 21:09:08 +0600 Subject: [PATCH 0773/2721] [rtbf] Fix extraction (Closes #5803) --- youtube_dl/extractor/rtbf.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py index dce64e151..5a381d9ce 100644 --- a/youtube_dl/extractor/rtbf.py +++ b/youtube_dl/extractor/rtbf.py @@ -1,10 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import json - from .common import InfoExtractor +from ..utils import ( + int_or_none, + unescapeHTML, +) class RTBFIE(InfoExtractor): @@ -16,25 +17,24 @@ class RTBFIE(InfoExtractor): 'id': '1921274', 'ext': 'mp4', 'title': 'Les Diables au coeur (épisode 2)', - 'description': 'Football - Diables Rouges', 'duration': 3099, - 'timestamp': 1398456336, - 'upload_date': '20140425', } } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) - page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id) + webpage = self._download_webpage( + 'http://www.rtbf.be/video/embed?id=%s' % video_id, video_id) - data = json.loads(self._html_search_regex( - r'<div class="js-player-embed(?: player-embed)?" data-video="([^"]+)"', page, 'data video'))['data'] + data = self._parse_json( + unescapeHTML(self._search_regex( + r'data-video="([^"]+)"', webpage, 'data video')), + video_id) video_url = data.get('downloadUrl') or data.get('url') - if data['provider'].lower() == 'youtube': + if data.get('provider').lower() == 'youtube': return self.url_result(video_url, 'Youtube') return { @@ -42,8 +42,8 @@ class RTBFIE(InfoExtractor): 'url': video_url, 'title': data['title'], 'description': data.get('description') or data.get('subtitle'), - 'thumbnail': data['thumbnail']['large'], + 'thumbnail': data.get('thumbnail'), 'duration': data.get('duration') or data.get('realDuration'), - 'timestamp': data['created'], - 'view_count': data['viewCount'], + 'timestamp': int_or_none(data.get('created')), + 'view_count': int_or_none(data.get('viewCount')), } From 2ad5708c43a8672da547fa279e71b20c327793d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 24 May 2015 21:25:00 +0600 Subject: [PATCH 0774/2721] [arte:future] Switch to `search_regex` for now (Closes #5801) --- youtube_dl/extractor/arte.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 8273bd6c9..fce38248d 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -195,7 +195,9 @@ class ArteTVFutureIE(ArteTVPlus7IE): def _real_extract(self, url): anchor_id, lang = self._extract_url_info(url) webpage = self._download_webpage(url, anchor_id) - row = get_element_by_id(anchor_id, webpage) + row = self._search_regex( + r'(?s)id="%s"[^>]*>.+?(<div[^>]*arte_vp_url[^>]*>)' % anchor_id, + webpage, 'row') return self._extract_from_webpage(row, anchor_id, lang) From 04b3b3df05a26a361441754afeb7ff24d0c1f559 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 22 May 2015 11:58:52 +0200 Subject: [PATCH 0775/2721] [youtube] Remove the nondash formats (fixes #5774) Since we use fixed values for some fields like width and height they can be wrong, and would get picked by some formats filters. For example for https://www.youtube.com/watch?v=EQCrhbBxsjA the biggest height is 720 and for nondash formats it's set to 1440, so -f 'bestvideo[height>=1200]+bestaudio' would incorrectly pick the nondash format, instead it should report that the requested format is not available. --- youtube_dl/extractor/youtube.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1f9940cf5..0301682b8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1126,12 +1126,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.report_warning( 'Skipping DASH manifest: %r' % e, video_id) else: - # Hide the formats we found through non-DASH + # Remove the formats we found through non-DASH, they + # contain less info and it can be wrong, because we use + # fixed values (for example the resolution). See + # https://github.com/rg3/youtube-dl/issues/5774 for an + # example. dash_keys = set(df['format_id'] for df in dash_formats) - for f in formats: - if f['format_id'] in dash_keys: - f['format_id'] = 'nondash-%s' % f['format_id'] - f['preference'] = f.get('preference', 0) - 10000 + formats = [f for f in formats if f['format_id'] not in dash_keys] formats.extend(dash_formats) # Check for malformed aspect ratio From 4b4e1af059c0922da9770a79a68a471277303f21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 24 May 2015 18:46:29 +0200 Subject: [PATCH 0776/2721] [arte] Remove unused import --- youtube_dl/extractor/arte.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index fce38248d..76de24477 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -7,7 +7,6 @@ from .common import InfoExtractor from ..utils import ( find_xpath_attr, unified_strdate, - get_element_by_id, get_element_by_attribute, int_or_none, qualities, From d41ebe146b693011eb1020ca9cd935e7db892d0b Mon Sep 17 00:00:00 2001 From: Sergey M? <dstftw@gmail.com> Date: Sun, 24 May 2015 23:57:47 +0600 Subject: [PATCH 0777/2721] [tenplay] Fix formats and modernize (Closes #5806) --- youtube_dl/extractor/tenplay.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py index 466155ef8..f6694149b 100644 --- a/youtube_dl/extractor/tenplay.py +++ b/youtube_dl/extractor/tenplay.py @@ -2,6 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + int_or_none, + float_or_none, +) class TenPlayIE(InfoExtractor): @@ -49,18 +53,23 @@ class TenPlayIE(InfoExtractor): if protocol == 'rtmp': url = url.replace('&mp4:', '') + tbr = int_or_none(rendition.get('encodingRate'), 1000) + formats.append({ - 'format_id': '_'.join(['rtmp', rendition['videoContainer'].lower(), rendition['videoCodec'].lower()]), - 'width': rendition['frameWidth'], - 'height': rendition['frameHeight'], - 'tbr': rendition['encodingRate'] / 1024, - 'filesize': rendition['size'], + 'format_id': '_'.join( + ['rtmp', rendition['videoContainer'].lower(), + rendition['videoCodec'].lower(), '%sk' % tbr]), + 'width': int_or_none(rendition['frameWidth']), + 'height': int_or_none(rendition['frameHeight']), + 'tbr': tbr, + 'filesize': int_or_none(rendition['size']), 'protocol': protocol, 'ext': ext, 'vcodec': rendition['videoCodec'].lower(), 'container': rendition['videoContainer'].lower(), 'url': url, }) + self._sort_formats(formats) return { 'id': video_id, @@ -74,8 +83,8 @@ class TenPlayIE(InfoExtractor): 'url': json['thumbnailURL'] }], 'thumbnail': json['videoStillURL'], - 'duration': json['length'] / 1000, - 'timestamp': float(json['creationDate']) / 1000, - 'uploader': json['customFields']['production_company_distributor'] if 'production_company_distributor' in json['customFields'] else 'TENplay', - 'view_count': json['playsTotal'] + 'duration': float_or_none(json.get('length'), 1000), + 'timestamp': float_or_none(json.get('creationDate'), 1000), + 'uploader': json.get('customFields', {}).get('production_company_distributor') or 'TENplay', + 'view_count': int_or_none(json.get('playsTotal')), } From 7198063d96003050eccb0ea59cc938f0388c0606 Mon Sep 17 00:00:00 2001 From: Mister Hat <misterhat144@gmail.com> Date: Sun, 24 May 2015 15:26:59 -0500 Subject: [PATCH 0778/2721] [pinkbike] new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/pinkbike.py | 78 ++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 youtube_dl/extractor/pinkbike.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 79bcd9106..80bec39da 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -394,6 +394,7 @@ from .pbs import PBSIE from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .pinkbike import PinkbikeIE from .planetaplay import PlanetaPlayIE from .pladform import PladformIE from .played import PlayedIE diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py new file mode 100644 index 000000000..4a15c1835 --- /dev/null +++ b/youtube_dl/extractor/pinkbike.py @@ -0,0 +1,78 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class PinkbikeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pinkbike\.com/video/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://www.pinkbike.com/video/402811/', + 'md5': '4814b8ca7651034cd87e3361d5c2155a', + 'info_dict': { + 'id': '402811', + 'ext': 'mp4', + 'title': 'Brandon Semenuk - RAW 100', + 'thumbnail': 're:^https?://.*\.jpg$', + 'location': 'Victoria, British Columbia, Canada', + 'uploader_id': 'revelco', + 'upload_date': '20150406', + 'description': 'Official release: www.redbull.ca/rupertwalker', + 'duration': '100' + } + }, { + 'url': 'http://www.pinkbike.com/video/406629/', + 'md5': 'c7a3e19a2bd5cde5a1cda6b2b46caa74', + 'info_dict': { + 'id': '406629', + 'ext': 'mp4', + 'title': 'Chromag: Reece Wallace in Utah', + 'thumbnail': 're:^https?://.*\.jpg$', + 'location': 'Whistler, British Columbia, Canada', + 'uploader_id': 'Chromagbikes', + 'upload_date': '20150505', + 'description': 'Reece Wallace shredding Virgin, Utah. Video by Virtu Media.', + 'duration': '180' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'<title>(.*?)', webpage, 'title') + title = title[:-len(' Video - Pinkbike')] + + description = self._html_search_meta('description', webpage, 'description') + description = description[len(title + '. '):] + + uploader_id = self._html_search_regex(r'un:\s*"(.*?)"', webpage, 'uploader_id') + + upload_date = self._html_search_regex( + r'class="fullTime"\s*title="([0-9]{4}(?:-[0-9]{2}){2})"', + webpage, 'upload_date') + upload_date = upload_date.replace('-', '') + + location = self._html_search_regex( + r'
    Location
    \n?\s*
    \n?(.*?)\s*', + webpage) + + formats = [{'url': fmt[1], 'height': fmt[0]} for fmt in formats] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': self._html_search_meta('video:duration', webpage, 'duration'), + 'thumbnail': self._html_search_meta('og:image', webpage, 'thumbnail'), + 'uploader_id': uploader_id, + 'upload_date': upload_date, + 'location': location, + 'formats': formats + } From 2c935c0c7224a3332ff9f0fd83e8c074cfbe2c9d Mon Sep 17 00:00:00 2001 From: Mister Hat Date: Sun, 24 May 2015 16:30:03 -0500 Subject: [PATCH 0779/2721] [pinkbike] converted duration to int --- youtube_dl/extractor/pinkbike.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py index 4a15c1835..66605ddbe 100644 --- a/youtube_dl/extractor/pinkbike.py +++ b/youtube_dl/extractor/pinkbike.py @@ -20,7 +20,7 @@ class PinkbikeIE(InfoExtractor): 'uploader_id': 'revelco', 'upload_date': '20150406', 'description': 'Official release: www.redbull.ca/rupertwalker', - 'duration': '100' + 'duration': 100 } }, { 'url': 'http://www.pinkbike.com/video/406629/', @@ -34,7 +34,7 @@ class PinkbikeIE(InfoExtractor): 'uploader_id': 'Chromagbikes', 'upload_date': '20150505', 'description': 'Reece Wallace shredding Virgin, Utah. Video by Virtu Media.', - 'duration': '180' + 'duration': 180 } }] @@ -69,7 +69,7 @@ class PinkbikeIE(InfoExtractor): 'id': video_id, 'title': title, 'description': description, - 'duration': self._html_search_meta('video:duration', webpage, 'duration'), + 'duration': int(self._html_search_meta('video:duration', webpage, 'duration')), 'thumbnail': self._html_search_meta('og:image', webpage, 'thumbnail'), 'uploader_id': uploader_id, 'upload_date': upload_date, From 680f9744c4e010ad5111c7711c58c341d5ba24dd Mon Sep 17 00:00:00 2001 From: Mister Hat Date: Sun, 24 May 2015 16:45:10 -0500 Subject: [PATCH 0780/2721] [pinkbike] used proper conversion methods --- youtube_dl/extractor/pinkbike.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py index 66605ddbe..45c0b1377 100644 --- a/youtube_dl/extractor/pinkbike.py +++ b/youtube_dl/extractor/pinkbike.py @@ -4,6 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + int_or_none, + remove_end, + remove_start +) class PinkbikeIE(InfoExtractor): @@ -43,10 +48,13 @@ class PinkbikeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'(.*?)', webpage, 'title') - title = title[:-len(' Video - Pinkbike')] + title = remove_end(title, ' Video - Pinkbike') description = self._html_search_meta('description', webpage, 'description') - description = description[len(title + '. '):] + description = remove_start(description, title + '. ') + + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'duration')) uploader_id = self._html_search_regex(r'un:\s*"(.*?)"', webpage, 'uploader_id') @@ -63,13 +71,13 @@ class PinkbikeIE(InfoExtractor): r'', webpage) - formats = [{'url': fmt[1], 'height': fmt[0]} for fmt in formats] + formats = [{'url': fmt[1], 'height': int_or_none(fmt[0])} for fmt in formats] return { 'id': video_id, 'title': title, 'description': description, - 'duration': int(self._html_search_meta('video:duration', webpage, 'duration')), + 'duration': duration, 'thumbnail': self._html_search_meta('og:image', webpage, 'thumbnail'), 'uploader_id': uploader_id, 'upload_date': upload_date, From b885bae6340b2aa9406501250fdebfbeea54e5e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 25 May 2015 04:53:53 +0600 Subject: [PATCH 0781/2721] Credit @misterhat for karrierevideos (#5729) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index ebed7ebb3..3410e1fb9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -125,3 +125,4 @@ Roman Le Négrate Matthias Küch Julian Richen Ping O. +Mister Hat From c6bbdadd79fac001cde15e8fd118b9535427474d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 25 May 2015 21:22:13 +0600 Subject: [PATCH 0782/2721] [odnoklassniki] Support extraction from metadata URL (Closes #5813) --- youtube_dl/extractor/odnoklassniki.py | 33 ++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index fbc521d1a..691f503f5 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urllib_parse from ..utils import ( unified_strdate, int_or_none, @@ -11,8 +12,9 @@ from ..utils import ( class OdnoklassnikiIE(InfoExtractor): - _VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P\d+)' + _VALID_URL = r'https?://(?:odnoklassniki|ok)\.ru/(?:video|web-api/video/moviePlayer)/(?P[\d-]+)' _TESTS = [{ + # metadata in JSON 'url': 'http://ok.ru/video/20079905452', 'md5': '8e24ad2da6f387948e7a7d44eb8668fe', 'info_dict': { @@ -20,11 +22,22 @@ class OdnoklassnikiIE(InfoExtractor): 'ext': 'mp4', 'title': 'Культура меняет нас (прекрасный ролик!))', 'duration': 100, - 'upload_date': '20141207', 'uploader_id': '330537914540', 'uploader': 'Виталий Добровольский', 'like_count': int, - 'age_limit': 0, + }, + }, { + # metadataUrl + 'url': 'http://ok.ru/video/63567059965189-0', + 'md5': '9676cf86eff5391d35dea675d224e131', + 'info_dict': { + 'id': '63567059965189-0', + 'ext': 'mp4', + 'title': 'Девушка без комплексов ...', + 'duration': 191, + 'uploader_id': '534380003155', + 'uploader': 'Андрей Мещанинов', + 'like_count': int, }, }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', @@ -41,7 +54,15 @@ class OdnoklassnikiIE(InfoExtractor): r'data-attributes="([^"]+)"', webpage, 'player')), video_id) - metadata = self._parse_json(player['flashvars']['metadata'], video_id) + flashvars = player['flashvars'] + + metadata = flashvars.get('metadata') + if metadata: + metadata = self._parse_json(metadata, video_id) + else: + metadata = self._download_json( + compat_urllib_parse.unquote(flashvars['metadataUrl']), + video_id, 'Downloading metadata JSON') movie = metadata['movie'] title = movie['title'] @@ -53,11 +74,11 @@ class OdnoklassnikiIE(InfoExtractor): uploader = author.get('name') upload_date = unified_strdate(self._html_search_meta( - 'ya:ovs:upload_date', webpage, 'upload date')) + 'ya:ovs:upload_date', webpage, 'upload date', default=None)) age_limit = None adult = self._html_search_meta( - 'ya:ovs:adult', webpage, 'age limit') + 'ya:ovs:adult', webpage, 'age limit', default=None) if adult: age_limit = 18 if adult == 'true' else 0 From ba2df04b41b62d08e1fd0efaaaf104467133e9a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 25 May 2015 21:27:43 +0600 Subject: [PATCH 0783/2721] [odnoklassniki] Make URL explicit --- youtube_dl/extractor/odnoklassniki.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 691f503f5..6c7149fe3 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -47,7 +47,8 @@ class OdnoklassnikiIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'http://ok.ru/video/%s' % video_id, video_id) player = self._parse_json( unescapeHTML(self._search_regex( From 5d0a33eebcae821ac5d1124043a5ad77a58fa291 Mon Sep 17 00:00:00 2001 From: Alexander Kirk Date: Mon, 25 May 2015 20:12:18 +0200 Subject: [PATCH 0784/2721] rtlnow is now hosted at nowtv.de --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/nowtv.py | 90 ++++++++++++++++ youtube_dl/extractor/rtlnow.py | 174 ------------------------------- 3 files changed, 91 insertions(+), 175 deletions(-) create mode 100644 youtube_dl/extractor/nowtv.py delete mode 100644 youtube_dl/extractor/rtlnow.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 79bcd9106..bfd07392e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -355,6 +355,7 @@ from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE from .novamov import NovaMovIE from .nowness import NownessIE +from .nowtv import NowTvIE from .nowvideo import NowVideoIE from .npo import ( NPOIE, @@ -438,7 +439,6 @@ from .roxwel import RoxwelIE from .rtbf import RTBFIE from .rte import RteIE from .rtlnl import RtlNlIE -from .rtlnow import RTLnowIE from .rtl2 import RTL2IE from .rtp import RTPIE from .rts import RTSIE diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py new file mode 100644 index 000000000..bf97fe7f4 --- /dev/null +++ b/youtube_dl/extractor/nowtv.py @@ -0,0 +1,90 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + qualities, + unified_strdate, + int_or_none, +) + +class NowTvIE(InfoExtractor): + """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" + _VALID_URL = r'''(?x) + (?:https?://)? + ( + (?:www\.)?nowtv\.de + /(rtl|rtl2|rtlnitro||superrtl|ntv|vox)(?P/.*?)/player + )''' + + _TESTS = [ + { + 'url': 'http://www.nowtv.de/vox/der-hundeprofi/buero-fall-chihuahua-joel/player', + 'info_dict': { + 'id': '128953', + 'ext': 'mp4', + 'title': 'B\u00fcro-Fall \/ Chihuahua \'Joel\'', + 'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de', + 'upload_date': '2015-05-23 19:10:00', + 'duration': '00:51:32', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Only works from Germany', + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + info_url = 'https://api.nowtv.de/v3/movies' + mobj.group('path') + '?fields=*,format,files,breakpoints,paymentPaytypes,trailers' + info = self._download_json(info_url, None) + + video_id = info['id'] + title = info['title'] + description = info['articleShort'] + duration = info['duration'] + upload_date = unified_strdate(info['broadcastStartDate']) + free = info['free'] + station = info['format']['station'] + thumbnail = info['format']['defaultImage169Logo'] + + if station == 'rtl': + base_url = 'http://hls.fra.rtlnow.de/hls-vod-enc/' + elif station == 'rtl2': + base_url = 'http://hls.fra.rtl2now.de/hls-vod-enc/' + elif station == 'vox': + base_url = 'http://hls.fra.voxnow.de/hls-vod-enc/' + elif station == 'nitro': + base_url = 'http://hls.fra.rtlnitronow.de/hls-vod-enc/' + elif station == 'ntv': + base_url = 'http://hls.fra.n-tvnow.de/hls-vod-enc/' + elif station == 'superrtl': + base_url = 'http://hls.fra.superrtlnow.de/hls-vod-enc/' + + formats = [] + for item in info['files']['items']: + if item['type'] != 'video/x-abr': + continue + + fmt = { + 'url': base_url + item['path'] + '.m3u8', + 'tbr': int_or_none(item['bitrate']), + 'ext': 'mp4', + 'format_id': int_or_none(item['id']), + } + formats.append(fmt) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'formats': formats, + } diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py deleted file mode 100644 index 785a8045e..000000000 --- a/youtube_dl/extractor/rtlnow.py +++ /dev/null @@ -1,174 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - clean_html, - unified_strdate, - int_or_none, -) - - -class RTLnowIE(InfoExtractor): - """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" - _VALID_URL = r'''(?x) - (?:https?://)? - (?P - (?P - rtl-now\.rtl\.de| - rtl2now\.rtl2\.de| - (?:www\.)?voxnow\.de| - (?:www\.)?rtlnitronow\.de| - (?:www\.)?superrtlnow\.de| - (?:www\.)?n-tvnow\.de) - /+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\? - (?:container_id|film_id)=(?P[0-9]+)& - player=1(?:&season=[0-9]+)?(?:&.*)? - )''' - - _TESTS = [ - { - 'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', - 'info_dict': { - 'id': '90419', - 'ext': 'flv', - 'title': 'Ahornallee - Folge 1 - Der Einzug', - 'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de', - 'upload_date': '20070416', - 'duration': 1685, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Only works from Germany', - }, - { - 'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', - 'info_dict': { - 'id': '69756', - 'ext': 'flv', - 'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', - 'description': 'md5:3fb247005ed21a935ffc82b7dfa70cf0', - 'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', - 'upload_date': '20120519', - 'duration': 1245, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Only works from Germany', - }, - { - 'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', - 'info_dict': { - 'id': '13883', - 'ext': 'flv', - 'title': 'Voxtours - Südafrika-Reporter II', - 'description': 'md5:de7f8d56be6fd4fed10f10f57786db00', - 'upload_date': '20090627', - 'duration': 1800, - }, - 'params': { - 'skip_download': True, - }, - }, - { - 'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', - 'info_dict': { - 'id': '99205', - 'ext': 'flv', - 'title': 'Medicopter 117 - Angst!', - 'description': 're:^Im Therapiezentrum \'Sonnalm\' kommen durch eine Unachtsamkeit die für die B.handlung mit Phobikern gehaltenen Voglespinnen frei\. Eine Ausreißerin', - 'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg', - 'upload_date': '20080928', - 'duration': 2691, - }, - 'params': { - 'skip_download': True, - }, - }, - { - 'url': 'http://rtl-now.rtl.de/der-bachelor/folge-4.php?film_id=188729&player=1&season=5', - 'info_dict': { - 'id': '188729', - 'ext': 'flv', - 'upload_date': '20150204', - 'description': 'md5:5e1ce23095e61a79c166d134b683cecc', - 'title': 'Der Bachelor - Folge 4', - } - }, { - 'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0', - 'only_matching': True, - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_page_url = 'http://%s/' % mobj.group('domain') - video_id = mobj.group('video_id') - - webpage = self._download_webpage('http://' + mobj.group('url'), video_id) - - mobj = re.search(r'(?s)
    (.*?)
    ', webpage) - if mobj: - raise ExtractorError(clean_html(mobj.group(1)), expected=True) - - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage, default=None) - - upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date')) - - mobj = re.search(r'', webpage) - duration = int(mobj.group('seconds')) if mobj else None - - playerdata_url = self._html_search_regex( - r"'playerdata': '(?P[^']+)'", webpage, 'playerdata_url') - - playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML') - - videoinfo = playerdata.find('./playlist/videoinfo') - - formats = [] - for filename in videoinfo.findall('filename'): - mobj = re.search(r'(?Prtmpe://(?:[^/]+/){2})(?P.+)', filename.text) - if mobj: - fmt = { - 'url': mobj.group('url'), - 'play_path': 'mp4:' + mobj.group('play_path'), - 'page_url': video_page_url, - 'player_url': video_page_url + 'includes/vodplayer.swf', - } - else: - mobj = re.search(r'.*/(?P[^/]+)/videos/(?P.+)\.f4m', filename.text) - if mobj: - fmt = { - 'url': 'rtmpe://fms.rtl.de/' + mobj.group('hoster'), - 'play_path': 'mp4:' + mobj.group('play_path'), - 'page_url': url, - 'player_url': video_page_url + 'includes/vodplayer.swf', - } - else: - fmt = { - 'url': filename.text, - } - fmt.update({ - 'width': int_or_none(filename.get('width')), - 'height': int_or_none(filename.get('height')), - 'vbr': int_or_none(filename.get('bitrate')), - 'ext': 'flv', - }) - formats.append(fmt) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'upload_date': upload_date, - 'duration': duration, - 'formats': formats, - } From 42833b44b5e3810a2875dfb130aefbf5db057c1e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 26 May 2015 13:32:43 +0800 Subject: [PATCH 0785/2721] [tf1] Extend _VALID_URL (fixes #5819) --- youtube_dl/extractor/tf1.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 025d0877c..656410528 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -6,8 +6,8 @@ from .common import InfoExtractor class TF1IE(InfoExtractor): """TF1 uses the wat.tv player.""" - _VALID_URL = r'http://(?:videos\.tf1|www\.tfou)\.fr/.*?-(?P\d+)(?:-\d+)?\.html' - _TESTS = { + _VALID_URL = r'http://(?:videos\.tf1|www\.tfou|www\.tf1)\.fr/.*?-(?P\d+)(?:-\d+)?\.html' + _TESTS = [{ 'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', 'info_dict': { 'id': '10635995', @@ -32,7 +32,10 @@ class TF1IE(InfoExtractor): # Sometimes wat serves the whole file with the --test option 'skip_download': True, }, - } + }, { + 'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From db3ca36403930063b7df3b228a3f297bf278b43c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 26 May 2015 13:37:15 +0800 Subject: [PATCH 0786/2721] [facebook] Move the title extraction warning below (fixes #5820) --- youtube_dl/extractor/facebook.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 937b28fcc..e8d682716 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -50,7 +50,10 @@ class FacebookIE(InfoExtractor): 'id': '274175099429670', 'ext': 'mp4', 'title': 'Facebook video #274175099429670', - } + }, + 'expected_warnings': [ + 'title' + ] }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, @@ -150,11 +153,11 @@ class FacebookIE(InfoExtractor): video_title = self._html_search_regex( r'

    ([^<]*)

    ', webpage, 'title', - fatal=False) + default=None) if not video_title: video_title = self._html_search_regex( r'(?s)(.*?)', - webpage, 'alternative title', default=None) + webpage, 'alternative title', fatal=False) video_title = limit_length(video_title, 80) if not video_title: video_title = 'Facebook video #%s' % video_id From efec4358b9b8ed5726c1f7d7939e8bce49f9100c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 26 May 2015 13:54:41 +0800 Subject: [PATCH 0787/2721] [cinemassacre] Support an alternative form of screenwavemedia URL fixes #5821 --- youtube_dl/extractor/cinemassacre.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py index cf0a7551b..c949a4814 100644 --- a/youtube_dl/extractor/cinemassacre.py +++ b/youtube_dl/extractor/cinemassacre.py @@ -60,6 +60,17 @@ class CinemassacreIE(InfoExtractor): 'uploader_id': 'Cinemassacre', 'title': 'AVGN: McKids', } + }, + { + 'url': 'http://cinemassacre.com/2015/05/25/mario-kart-64-nintendo-64-james-mike-mondays/', + 'md5': '1376908e49572389e7b06251a53cdd08', + 'info_dict': { + 'id': 'Cinemassacre-555779690c440', + 'ext': 'mp4', + 'description': 'Let’s Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!', + 'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays', + 'upload_date': '20150525', + } } ] @@ -72,7 +83,7 @@ class CinemassacreIE(InfoExtractor): playerdata_url = self._search_regex( [ - r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', + r'src="(http://(?:player2\.screenwavemedia\.com|player\.screenwavemedia\.com/play)/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', r']+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"', ], webpage, 'player data URL', default=None) From ff305edd645d6f4307faa9307dea91694a1d217d Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Tue, 26 May 2015 13:43:00 +0300 Subject: [PATCH 0788/2721] [sockshare] Remove extractor Haywire since last October. --- youtube_dl/extractor/sockshare.py | 83 ------------------------------- 1 file changed, 83 deletions(-) delete mode 100644 youtube_dl/extractor/sockshare.py diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py deleted file mode 100644 index b5fa6f1da..000000000 --- a/youtube_dl/extractor/sockshare.py +++ /dev/null @@ -1,83 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from ..compat import ( - compat_urllib_parse, - compat_urllib_request, -) -from ..utils import ( - determine_ext, - ExtractorError, -) - -from .common import InfoExtractor - - -class SockshareIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?sockshare\.com/file/(?P[0-9A-Za-z]+)' - _FILE_DELETED_REGEX = r'This file doesn\'t exist, or has been removed\.
    ' - _TEST = { - 'url': 'http://www.sockshare.com/file/437BE28B89D799D7', - 'md5': '9d0bf1cfb6dbeaa8d562f6c97506c5bd', - 'info_dict': { - 'id': '437BE28B89D799D7', - 'title': 'big_buck_bunny_720p_surround.avi', - 'ext': 'avi', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - url = 'http://sockshare.com/file/%s' % video_id - webpage = self._download_webpage(url, video_id) - - if re.search(self._FILE_DELETED_REGEX, webpage) is not None: - raise ExtractorError('Video %s does not exist' % video_id, - expected=True) - - confirm_hash = self._html_search_regex(r'''(?x)(.+)', - r'var name = "([^"]+)";'), - webpage, 'title', default=None) - thumbnail = self._html_search_regex( - r' Date: Tue, 26 May 2015 13:44:46 +0300 Subject: [PATCH 0789/2721] [firedrive] Remove extractor (Closes #3870) Haywire since last October. --- youtube_dl/extractor/firedrive.py | 80 ------------------------------- 1 file changed, 80 deletions(-) delete mode 100644 youtube_dl/extractor/firedrive.py diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py deleted file mode 100644 index 3191116d9..000000000 --- a/youtube_dl/extractor/firedrive.py +++ /dev/null @@ -1,80 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, - compat_urllib_request, -) -from ..utils import ( - ExtractorError, -) - - -class FiredriveIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \ - '(?:file|embed)/(?P[0-9a-zA-Z]+)' - _FILE_DELETED_REGEX = r'
    ' - - _TESTS = [{ - 'url': 'https://www.firedrive.com/file/FEB892FA160EBD01', - 'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970', - 'info_dict': { - 'id': 'FEB892FA160EBD01', - 'ext': 'flv', - 'title': 'bbb_theora_486kbit.flv', - 'thumbnail': 're:^http://.*\.jpg$', - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - url = 'http://firedrive.com/file/%s' % video_id - webpage = self._download_webpage(url, video_id) - - if re.search(self._FILE_DELETED_REGEX, webpage) is not None: - raise ExtractorError('Video %s does not exist' % video_id, - expected=True) - - fields = dict(re.findall(r'''(?x)(.+)
    ', - webpage, 'title') - thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage, - 'thumbnail', fatal=False) - if thumbnail is not None: - thumbnail = 'http:' + thumbnail - - ext = self._search_regex(r'type:\s?\'([^\']+)\',', - webpage, 'extension', fatal=False) - video_url = self._search_regex( - r'file:\s?loadURL\(\'(http[^\']+)\'\),', webpage, 'file url') - - formats = [{ - 'format_id': 'sd', - 'url': video_url, - 'ext': ext, - }] - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } From 544a8693b7d9321d776987a5104889056955daa2 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Tue, 26 May 2015 13:53:14 +0300 Subject: [PATCH 0790/2721] Remove Firedrive and Sockshare imports Oops --- youtube_dl/extractor/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 79bcd9106..80c9cb107 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -149,7 +149,6 @@ from .extremetube import ExtremeTubeIE from .facebook import FacebookIE from .faz import FazIE from .fc2 import FC2IE -from .firedrive import FiredriveIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE @@ -480,7 +479,6 @@ from .smotri import ( SmotriBroadcastIE, ) from .snotr import SnotrIE -from .sockshare import SockshareIE from .sohu import SohuIE from .soundcloud import ( SoundcloudIE, From 7d65242dc3c0c3306b775f0663d325ba55b62379 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 26 May 2015 22:12:26 +0600 Subject: [PATCH 0791/2721] [dailymotion:user] Process user home as user (Closes #5823) --- youtube_dl/extractor/dailymotion.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index db10b8d00..d8f9eb13f 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -225,7 +225,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): class DailymotionUserIE(DailymotionPlaylistIE): IE_NAME = 'dailymotion:user' - _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:(?:old/)?user/)?(?P[^/]+)' _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' _TESTS = [{ 'url': 'https://www.dailymotion.com/user/nqtv', @@ -239,7 +239,8 @@ class DailymotionUserIE(DailymotionPlaylistIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user = mobj.group('user') - webpage = self._download_webpage(url, user) + webpage = self._download_webpage( + 'https://www.dailymotion.com/user/%s' % user, user) full_user = unescapeHTML(self._html_search_regex( r'' % re.escape(user), webpage, 'user')) From 5406af92bc3f88b9fe4f26fe20bdaaf0b4968c32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 26 May 2015 22:16:47 +0600 Subject: [PATCH 0792/2721] [dailymotion:user] Fix _VALID_URL --- youtube_dl/extractor/dailymotion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index d8f9eb13f..70aa4333c 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -225,7 +225,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): class DailymotionUserIE(DailymotionPlaylistIE): IE_NAME = 'dailymotion:user' - _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:(?:old/)?user/)?(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:(?:old/)?user/)?(?P[^/]+)$' _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' _TESTS = [{ 'url': 'https://www.dailymotion.com/user/nqtv', From 7a372b64dfa69d5b2cfd1514b89e8fc0ab7e5874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 27 May 2015 01:41:00 +0600 Subject: [PATCH 0793/2721] [pornhub] Do not modify aes key string (Closes #5824) --- youtube_dl/extractor/pornhub.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 0c8b731cf..daa284ea2 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -71,7 +71,8 @@ class PornHubIE(InfoExtractor): video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) if webpage.find('"encrypted":true') != -1: - password = compat_urllib_parse.unquote_plus(self._html_search_regex(r'"video_title":"([^"]+)', webpage, 'password')) + password = compat_urllib_parse.unquote_plus( + self._search_regex(r'"video_title":"([^"]+)', webpage, 'password')) video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) formats = [] From 1434184c577953ff6fe558ccc6751697791f4076 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 27 May 2015 01:42:53 +0600 Subject: [PATCH 0794/2721] [spankwire] Do not modify aes key string --- youtube_dl/extractor/spankwire.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index b936202f6..06d6e6640 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -71,7 +71,7 @@ class SpankwireIE(InfoExtractor): compat_urllib_parse.unquote, re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*["\']([^"\']+)["\']', webpage))) if webpage.find('flashvars\.encrypted = "true"') != -1: - password = self._html_search_regex( + password = self._search_regex( r'flashvars\.video_title = "([^"]+)', webpage, 'password').replace('+', ' ') video_urls = list(map( From b535170b218131afd1165776e611691479627ce8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 27 May 2015 04:14:24 +0800 Subject: [PATCH 0795/2721] [bilibili] Skip assertion if HQ videos not available --- youtube_dl/extractor/bilibili.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 7ca835e31..a8bea2c10 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -80,9 +80,11 @@ class BiliBiliIE(InfoExtractor): note='Downloading HQ video info', fatal=False, ) - hq_durls = hq_doc.findall('./durl') if hq_doc is not False else itertools.repeat(None) - - assert len(lq_durls) == len(hq_durls) + if hq_doc is not False: + hq_durls = hq_doc.findall('./durl') + assert len(lq_durls) == len(hq_durls) + else: + hq_durls = itertools.repeat(None) i = 1 for lq_durl, hq_durl in zip(lq_durls, hq_durls): From 6d00a2dcd110f12a0aa110f5479df76613792fbd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 27 May 2015 04:23:21 +0800 Subject: [PATCH 0796/2721] [bilibili] Catch API call failures JSON are returned in a failed API call --- youtube_dl/extractor/bilibili.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index a8bea2c10..2103ed73a 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals import re import itertools +import json +import xml.etree.ElementTree as ET from .common import InfoExtractor from ..utils import ( @@ -67,11 +69,19 @@ class BiliBiliIE(InfoExtractor): entries = [] - lq_doc = self._download_xml( + lq_page = self._download_webpage( 'http://interface.bilibili.com/v_cdn_play?appkey=1&cid=%s' % cid, video_id, note='Downloading LQ video info' ) + try: + err_info = json.loads(lq_page) + raise ExtractorError( + 'BiliBili said: ' + err_info['error_text'], expected=True) + except ValueError: + pass + + lq_doc = ET.fromstring(lq_page) lq_durls = lq_doc.findall('./durl') hq_doc = self._download_xml( From c23848b3c5244f8ef1501adfd04a32111b12d7ff Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 27 May 2015 14:20:29 +0800 Subject: [PATCH 0797/2721] [naver] Enhanced error detection --- youtube_dl/extractor/naver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index c10405f04..fdd825784 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -35,7 +35,7 @@ class NaverIE(InfoExtractor): webpage) if m_id is None: m_error = re.search( - r'(?s)
    \s*(?:)?\s*

    (?P.+?)

    \s*
    ', + r'(?s)
    \s*(?:)?\s*

    (?P.+?)

    \s*
    ', webpage) if m_error: raise ExtractorError(clean_html(m_error.group('msg')), expected=True) From f8d5e1cfb5d9a8c946a966452d9b86c45182a952 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 27 May 2015 14:44:08 +0800 Subject: [PATCH 0798/2721] [naver] Fix video url (fixes #5809) RTMP urls in test:naver does not work. Need more investigation. --- youtube_dl/extractor/naver.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index fdd825784..925967753 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_urllib_parse, + compat_urlparse, ) from ..utils import ( ExtractorError, @@ -16,7 +17,7 @@ from ..utils import ( class NaverIE(InfoExtractor): _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://tvcast.naver.com/v/81652', 'info_dict': { 'id': '81652', @@ -25,7 +26,18 @@ class NaverIE(InfoExtractor): 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', 'upload_date': '20130903', }, - } + }, { + 'url': 'http://tvcast.naver.com/v/395837', + 'md5': '638ed4c12012c458fefcddfd01f173cd', + 'info_dict': { + 'id': '395837', + 'ext': 'mp4', + 'title': '9년이 지나도 아픈 기억, 전효성의 아버지', + 'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7', + 'upload_date': '20150519', + }, + 'skip': 'Georestricted', + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -58,14 +70,18 @@ class NaverIE(InfoExtractor): formats = [] for format_el in urls.findall('EncodingOptions/EncodingOption'): domain = format_el.find('Domain').text + uri = format_el.find('uri').text f = { - 'url': domain + format_el.find('uri').text, + 'url': compat_urlparse.urljoin(domain, uri), 'ext': 'mp4', 'width': int(format_el.find('width').text), 'height': int(format_el.find('height').text), } if domain.startswith('rtmp'): + # urlparse does not support custom schemes + # https://bugs.python.org/issue18828 f.update({ + 'url': domain + uri, 'ext': 'flv', 'rtmp_protocol': '1', # rtmpt }) From f9f3e3df9a9fff1b00c7184234c4f607ea3cec81 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 27 May 2015 14:51:18 +0800 Subject: [PATCH 0799/2721] [teamcoco] Use determine_ext to determine the video type Some videos does not contain a 'type' field (#5798) --- youtube_dl/extractor/teamcoco.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 56be52638..b2a4b1fc0 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -10,6 +10,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, qualities, + determine_ext, ) from ..compat import compat_ord @@ -108,7 +109,7 @@ class TeamcocoIE(InfoExtractor): formats = [] get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) for filed in data['files']: - if filed['type'] == 'hls': + if determine_ext(filed['url']) == 'm3u8': formats.extend(self._extract_m3u8_formats( filed['url'], video_id, ext='mp4')) else: From f0bfaa2d7d9563975f1f6effa75d28dcdb1c23ce Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 27 May 2015 15:23:34 +0800 Subject: [PATCH 0800/2721] [nrk] Update subtitles test Subtitle conversion routine is removed, so the subtitles are TTML now. See 1c7e2e64f6328024711d5fa999d4498396f4cb5c --- test/test_subtitles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 891ee620b..c4e3adb67 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -266,7 +266,7 @@ class TestNRKSubtitles(BaseTestSubtitles): self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), set(['no'])) - self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a') + self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2') class TestRaiSubtitles(BaseTestSubtitles): From bf24c3d01798fad0a8344a642eb5d46231fd78c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 27 May 2015 21:25:07 +0600 Subject: [PATCH 0801/2721] [facebook] Improve title regex (Closes #5816) --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index e8d682716..82dc27bc6 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -152,7 +152,7 @@ class FacebookIE(InfoExtractor): raise ExtractorError('Cannot find video formats') video_title = self._html_search_regex( - r'

    ([^<]*)

    ', webpage, 'title', + r']*class="uiHeaderTitle"[^>]*>([^<]*)', webpage, 'title', default=None) if not video_title: video_title = self._html_search_regex( From d90b3854ca9e8602f440cc9439e1cba240192286 Mon Sep 17 00:00:00 2001 From: PeterDing Date: Thu, 28 May 2015 00:37:00 +0800 Subject: [PATCH 0802/2721] [porn91] Add new extractor for 91porn.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/porn91.py | 62 ++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 youtube_dl/extractor/porn91.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 80c9cb107..d20ad286d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -400,6 +400,7 @@ from .playfm import PlayFMIE from .playvid import PlayvidIE from .playwire import PlaywireIE from .podomatic import PodomaticIE +from .porn91 import Porn91IE from .pornhd import PornHdIE from .pornhub import ( PornHubIE, diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py new file mode 100644 index 000000000..af06af2b7 --- /dev/null +++ b/youtube_dl/extractor/porn91.py @@ -0,0 +1,62 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re +import json + +from ..compat import compat_urllib_parse +from .common import InfoExtractor +from ..utils import ExtractorError + + +class Porn91IE(InfoExtractor): + IE_NAME = '91porn' + _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/.+?\?viewkey=(?P[\w\d]+)' + + _TEST = { + 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134', + 'md5': '6df8f6d028bc8b14f5dbd73af742fb20', + 'info_dict': { + 'id': '7e42283b4f5ab36da134', + 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!', + 'ext': 'mp4' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id + self._set_cookie('91porn.com', 'language', 'cn_CN') + webpage = self._download_webpage(url, video_id, "get HTML content") + title = re.search( + r'
    (.+?)
    ', + webpage, + re.DOTALL) + assert title + title = title.group(1).replace('\n', '') + + # get real url + n1 = re.search(r'so.addVariable\(\'file\',\'(\d+)\'', webpage) + n2 = re.search(r'so.addVariable\(\'seccode\',\'(.+?)\'', webpage) + n3 = re.search(r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage) + if not (n1 and n2 and n3): + raise ExtractorError("You are Blocked by Server.") + + url_params = compat_urllib_parse.urlencode({ + 'VID': n1.group(1), + 'mp4': '1', + 'seccode': n2.group(1), + 'max_vid': n3.group(1), + }) + t_url = 'http://91porn.com/getfile.php?' + url_params + info_cn = self._download_webpage(t_url, video_id, "get real video_url") + video_url = re.search(r'file=(http.+?)&', info_cn).group(1) + + info = { + 'id': video_id, + 'title': title, + 'url': video_url, + } + + return info From b25b645d5106e5b2bf33c640813fe744b63f4730 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 27 May 2015 23:20:32 +0600 Subject: [PATCH 0803/2721] [nowtv] Improve and simplify --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/nowtv.py | 197 ++++++++++++++++++++++--------- 2 files changed, 139 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bfd07392e..17248ccea 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -355,7 +355,7 @@ from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE from .novamov import NovaMovIE from .nowness import NownessIE -from .nowtv import NowTvIE +from .nowtv import NowTVIE from .nowvideo import NowVideoIE from .npo import ( NPOIE, diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index bf97fe7f4..5c91acec6 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -1,90 +1,169 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, - qualities, - unified_strdate, int_or_none, + parse_iso8601, + parse_duration, + remove_start, ) -class NowTvIE(InfoExtractor): - """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW""" - _VALID_URL = r'''(?x) - (?:https?://)? - ( - (?:www\.)?nowtv\.de - /(rtl|rtl2|rtlnitro||superrtl|ntv|vox)(?P/.*?)/player - )''' - _TESTS = [ - { - 'url': 'http://www.nowtv.de/vox/der-hundeprofi/buero-fall-chihuahua-joel/player', - 'info_dict': { - 'id': '128953', - 'ext': 'mp4', - 'title': 'B\u00fcro-Fall \/ Chihuahua \'Joel\'', - 'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de', - 'upload_date': '2015-05-23 19:10:00', - 'duration': '00:51:32', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Only works from Germany', +class NowTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?Prtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P.+?)/player' + + _TESTS = [{ + # rtl + 'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/player', + 'info_dict': { + 'id': '203519', + 'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit', + 'ext': 'mp4', + 'title': 'Die neuen Bauern und eine Hochzeit', + 'description': 'md5:e234e1ed6d63cf06be5c070442612e7e', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1432580700, + 'upload_date': '20150525', + 'duration': 2786, }, - ] + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # rtl2 + 'url': 'http://www.nowtv.de/rtl2/berlin-tag-nacht/berlin-tag-nacht-folge-934/player', + 'info_dict': { + 'id': '203481', + 'display_id': 'berlin-tag-nacht/berlin-tag-nacht-folge-934', + 'ext': 'mp4', + 'title': 'Berlin - Tag & Nacht (Folge 934)', + 'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1432666800, + 'upload_date': '20150526', + 'duration': 2641, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # superrtl + 'url': 'http://www.nowtv.de/superrtl/medicopter-117/angst/player', + 'info_dict': { + 'id': '99205', + 'display_id': 'medicopter-117/angst', + 'ext': 'mp4', + 'title': 'Angst!', + 'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1222632900, + 'upload_date': '20080928', + 'duration': 3025, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # ntv + 'url': 'http://www.nowtv.de/ntv/ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch/player', + 'info_dict': { + 'id': '203521', + 'display_id': 'ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch', + 'ext': 'mp4', + 'title': 'Thema u.a.: Der erste Blick: Die Apple Watch', + 'description': 'md5:4312b6c9d839ffe7d8caf03865a531af', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1432751700, + 'upload_date': '20150527', + 'duration': 1083, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # vox + 'url': 'http://www.nowtv.de/vox/der-hundeprofi/buero-fall-chihuahua-joel/player', + 'info_dict': { + 'id': '128953', + 'display_id': 'der-hundeprofi/buero-fall-chihuahua-joel', + 'ext': 'mp4', + 'title': "Büro-Fall / Chihuahua 'Joel'", + 'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1432408200, + 'upload_date': '20150523', + 'duration': 3092, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - info_url = 'https://api.nowtv.de/v3/movies' + mobj.group('path') + '?fields=*,format,files,breakpoints,paymentPaytypes,trailers' - info = self._download_json(info_url, None) + display_id = mobj.group('id') + station = mobj.group('station') - video_id = info['id'] - title = info['title'] - description = info['articleShort'] - duration = info['duration'] - upload_date = unified_strdate(info['broadcastStartDate']) - free = info['free'] - station = info['format']['station'] - thumbnail = info['format']['defaultImage169Logo'] + info = self._download_json( + 'https://api.nowtv.de/v3/movies/%s?fields=*,format,files,breakpoints,paymentPaytypes,trailers,pictures' % display_id, + display_id) - if station == 'rtl': - base_url = 'http://hls.fra.rtlnow.de/hls-vod-enc/' - elif station == 'rtl2': - base_url = 'http://hls.fra.rtl2now.de/hls-vod-enc/' - elif station == 'vox': - base_url = 'http://hls.fra.voxnow.de/hls-vod-enc/' - elif station == 'nitro': - base_url = 'http://hls.fra.rtlnitronow.de/hls-vod-enc/' - elif station == 'ntv': - base_url = 'http://hls.fra.n-tvnow.de/hls-vod-enc/' - elif station == 'superrtl': - base_url = 'http://hls.fra.superrtlnow.de/hls-vod-enc/' + video_id = compat_str(info['id']) + + if info.get('geoblocked'): + raise ExtractorError( + 'Video %s is not available from your location due to geo restriction' % video_id, + expected=True) + + f = info.get('format', {}) + station = f.get('station') or station + + STATIONS = { + 'rtl': 'rtlnow', + 'rtl2': 'rtl2now', + 'vox': 'voxnow', + 'nitro': 'rtlnitronow', + 'ntv': 'n-tvnow', + 'superrtl': 'superrtlnow' + } formats = [] for item in info['files']['items']: - if item['type'] != 'video/x-abr': - continue - - fmt = { - 'url': base_url + item['path'] + '.m3u8', - 'tbr': int_or_none(item['bitrate']), + item_path = remove_start(item['path'], '/') + tbr = int_or_none(item['bitrate']) + m3u8_url = 'http://hls.fra.%s.de/hls-vod-enc/%s.m3u8' % (STATIONS[station], item_path) + m3u8_url = m3u8_url.replace('now/', 'now/videos/') + formats.append({ + 'url': m3u8_url, + 'format_id': '%s-%sk' % (item['id'], tbr), 'ext': 'mp4', - 'format_id': int_or_none(item['id']), - } - formats.append(fmt) + 'tbr': tbr, + }) self._sort_formats(formats) + title = info['title'] + description = info.get('articleLong') or info.get('articleShort') + timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ') + duration = parse_duration(info.get('duration')) + thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo') + return { 'id': video_id, + 'display_id': display_id, 'title': title, 'description': description, 'thumbnail': thumbnail, - 'upload_date': upload_date, + 'timestamp': timestamp, 'duration': duration, 'formats': formats, } From 703d78bbf5edf73f60447ac273c0d303d28cc340 Mon Sep 17 00:00:00 2001 From: PeterDing Date: Thu, 28 May 2015 01:37:24 +0800 Subject: [PATCH 0804/2721] [porn91] change re to _search_regex --- youtube_dl/extractor/porn91.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index af06af2b7..f3a97df64 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import json from ..compat import compat_urllib_parse from .common import InfoExtractor @@ -29,30 +28,32 @@ class Porn91IE(InfoExtractor): url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id self._set_cookie('91porn.com', 'language', 'cn_CN') webpage = self._download_webpage(url, video_id, "get HTML content") - title = re.search( - r'
    (.+?)
    ', - webpage, - re.DOTALL) + title = self._search_regex( + r'
    (?P.+?)</div>', + webpage, 'title', flags=re.DOTALL) assert title - title = title.group(1).replace('\n', '') + title = title.replace('\n', '') # get real url - n1 = re.search(r'so.addVariable\(\'file\',\'(\d+)\'', webpage) - n2 = re.search(r'so.addVariable\(\'seccode\',\'(.+?)\'', webpage) - n3 = re.search(r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage) + n1 = self._search_regex( + r'so.addVariable\(\'file\',\'(?P<n1>\d+)\'', webpage, 'n1') + n2 = self._search_regex( + r'so.addVariable\(\'seccode\',\'(?P<n2>.+?)\'', webpage, 'n2') + n3 = self._search_regex( + r'so.addVariable\(\'max_vid\',\'(?P<n3>\d+)\'', webpage, 'n3') if not (n1 and n2 and n3): raise ExtractorError("You are Blocked by Server.") - url_params = compat_urllib_parse.urlencode({ - 'VID': n1.group(1), + 'VID': n1, 'mp4': '1', - 'seccode': n2.group(1), - 'max_vid': n3.group(1), + 'seccode': n2, + 'max_vid': n3, }) t_url = 'http://91porn.com/getfile.php?' + url_params info_cn = self._download_webpage(t_url, video_id, "get real video_url") - video_url = re.search(r'file=(http.+?)&', info_cn).group(1) + video_url = self._search_regex(r'file=(?P<url>http.+?)&', info_cn, 'url') + # construct info info = { 'id': video_id, 'title': title, From 9b254aa177d58b7b4c4f44dce8c38fa7978c7df6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 27 May 2015 23:41:43 +0600 Subject: [PATCH 0805/2721] [nowtv] Add non-free video check --- youtube_dl/extractor/nowtv.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index 5c91acec6..295168432 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -120,10 +120,15 @@ class NowTVIE(InfoExtractor): video_id = compat_str(info['id']) - if info.get('geoblocked'): - raise ExtractorError( - 'Video %s is not available from your location due to geo restriction' % video_id, - expected=True) + files = info['files'] + if not files: + if info.get('geoblocked', False): + raise ExtractorError( + 'Video %s is not available from your location due to geo restriction' % video_id, + expected=True) + if not info.get('free', True): + raise ExtractorError( + 'Video %s is not available for free' % video_id, expected=True) f = info.get('format', {}) station = f.get('station') or station @@ -138,7 +143,7 @@ class NowTVIE(InfoExtractor): } formats = [] - for item in info['files']['items']: + for item in files['items']: item_path = remove_start(item['path'], '/') tbr = int_or_none(item['bitrate']) m3u8_url = 'http://hls.fra.%s.de/hls-vod-enc/%s.m3u8' % (STATIONS[station], item_path) From ff4a1279f2d40fdba3287d4e7949bd8caa89eb04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 May 2015 01:15:04 +0600 Subject: [PATCH 0806/2721] [nowtv] Do not request unnecessary metadata --- youtube_dl/extractor/nowtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index 295168432..d39bbde99 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -115,7 +115,7 @@ class NowTVIE(InfoExtractor): station = mobj.group('station') info = self._download_json( - 'https://api.nowtv.de/v3/movies/%s?fields=*,format,files,breakpoints,paymentPaytypes,trailers,pictures' % display_id, + 'https://api.nowtv.de/v3/movies/%s?fields=*,format,files' % display_id, display_id) video_id = compat_str(info['id']) From 9e0b5791281c68e5773555688928184064396011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 28 May 2015 01:26:14 +0600 Subject: [PATCH 0807/2721] [nowtv] Add test for rtlnitro --- youtube_dl/extractor/nowtv.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index d39bbde99..173e46cd8 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -53,6 +53,24 @@ class NowTVIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + }, { + # rtlnitro + 'url': 'http://www.nowtv.de/rtlnitro/alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00/player', + 'info_dict': { + 'id': '165780', + 'display_id': 'alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00', + 'ext': 'mp4', + 'title': 'Hals- und Beinbruch', + 'description': 'md5:b50d248efffe244e6f56737f0911ca57', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1432415400, + 'upload_date': '20150523', + 'duration': 2742, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { # superrtl 'url': 'http://www.nowtv.de/superrtl/medicopter-117/angst/player', From f9355dc989362f31d8e21ccb8fa765546a2360f2 Mon Sep 17 00:00:00 2001 From: PeterDing <dfhayst@gmail.com> Date: Thu, 28 May 2015 17:00:09 +0800 Subject: [PATCH 0808/2721] [youku] update youku --- youtube_dl/extractor/youku.py | 248 ++++++++++++++++++++++------------ 1 file changed, 162 insertions(+), 86 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 97b98bbe8..8d86c3f45 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -1,123 +1,199 @@ # coding: utf-8 - from __future__ import unicode_literals -import math -import random import re -import time +import base64 from .common import InfoExtractor -from ..utils import ( - ExtractorError, -) - +from ..utils import ExtractorError class YoukuIE(InfoExtractor): + IE_NAME = 'youku' _VALID_URL = r'''(?x) (?: http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| youku:) (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|) ''' + _TEST = { - 'url': 'http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html', - 'md5': 'ffe3f2e435663dc2d1eea34faeff5b5b', - 'params': { - 'test': False - }, - 'info_dict': { - 'id': 'XNDgyMDQ2NTQw_part00', - 'ext': 'flv', - 'title': 'youtube-dl test video "\'/\\ä↭𝕐' - } + 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', + 'md5': '5f3af4192eabacc4501508d54a8cabd7', + 'info_dict': { + 'id': 'XMTc1ODE5Njcy', + 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', + 'ext': 'flv' + } } - def _gen_sid(self): - nowTime = int(time.time() * 1000) - random1 = random.randint(1000, 1998) - random2 = random.randint(1000, 9999) + def construct_video_urls(self, data1, data2): + # get sid, token + def yk_t(s1, s2): + ls = list(range(256)) + t = 0 + for i in range(256): + t = (t + ls[i] + ord(s1[i%len(s1)])) % 256 + ls[i], ls[t] = ls[t], ls[i] + s, x, y = '', 0, 0 + for i in range(len(s2)): + y = (y + 1) % 256 + x = (x + ls[y]) % 256 + ls[x], ls[y] = ls[y], ls[x] + s += chr((s2[i] ^ ls[(ls[x]+ls[y]) % 256])) + return s - return "%d%d%d" % (nowTime, random1, random2) + sid, token = yk_t( + 'becaf9be', base64.b64decode(bytes(data2['ep'], 'ascii')) + ).split('_') - def _get_file_ID_mix_string(self, seed): - mixed = [] - source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890") - seed = float(seed) - for i in range(len(source)): - seed = (seed * 211 + 30031) % 65536 - index = math.floor(seed / 65536 * len(source)) - mixed.append(source[int(index)]) - source.remove(source[int(index)]) - # return ''.join(mixed) - return mixed + # get oip + oip = data2['ip'] - def _get_file_id(self, fileId, seed): - mixed = self._get_file_ID_mix_string(seed) - ids = fileId.split('*') - realId = [] - for ch in ids: - if ch: - realId.append(mixed[int(ch)]) - return ''.join(realId) + # get fileid + string_ls = list( + 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890') + shuffled_string_ls = [] + seed = data1['seed'] + N = len(string_ls) + for ii in range(N): + seed = (seed * 0xd3 + 0x754f) % 0x10000 + idx = seed * len(string_ls) // 0x10000 + shuffled_string_ls.append(string_ls[idx]) + del string_ls[idx] + + fileid_dict = {} + for format in data1['streamtypes']: + streamfileid = [ + int(i) for i in data1['streamfileids'][format].strip('*').split('*')] + fileid = ''.join( + [shuffled_string_ls[i] for i in streamfileid]) + fileid_dict[format] = fileid[:8] + '%s' + fileid[10:] + + def get_fileid(format, n): + fileid = fileid_dict[format] % hex(int(n))[2:].upper().zfill(2) + return fileid + + # get ep + def generate_ep(format, n): + fileid = get_fileid(format, n) + ep_t = yk_t( + 'bf7e5f01', + bytes('%s_%s_%s' % (sid, fileid, token), 'ascii')) + ep = base64.b64encode(bytes(ep_t, 'latin')).decode() + ep = ep.replace('+', '%2B') + ep = ep.replace('/', '%2F') + ep = ep.replace('=', '%2D') + return ep + + # generate video_urls + video_urls_dict = {} + for format in data1['streamtypes']: + video_urls = [] + for dt in data1['segs'][format]: + n = str(int(dt['no'])) + video_url = \ + 'http://k.youku.com/player/getFlvPath/' + \ + 'sid/' + sid + \ + '_' + str(int(n)+1).zfill(2) + \ + '/st/' + self.parse_ext_l(format) + \ + '/fileid/' + get_fileid(format, n) + '?' + \ + 'K=' + str(dt['k']) + \ + '&hd=' + self.get_hd(format) + \ + '&myp=0' + \ + '&ts=' + str(dt['seconds']) + \ + '&ypp=0&ctype=12&ev=1' + \ + '&token=' + str(token) + \ + '&oip=' + str(oip) + \ + '&ep=' + generate_ep(format, n) + video_urls.append(video_url) + video_urls_dict[format] = video_urls + + return video_urls_dict + + def get_hd(self, fm): + hd_id_dict = { + 'flv': '0', + 'mp4': '1', + 'hd2': '2', + 'hd3': '3', + '3gp': '0', + '3gphd': '1' + } + return hd_id_dict[fm] + + def parse_ext_l(self, fm): + ext_dict = { + 'flv': 'flv', + 'mp4': 'mp4', + 'hd2': 'flv', + 'hd3': 'flv', + '3gp': 'flv', + '3gphd': 'mp4', + } + return ext_dict[fm] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id + # request basic data + data1_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id + data2_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id - config = self._download_json(info_url, video_id) + raw_data1 = self._download_json(data1_url, video_id) + raw_data2 = self._download_json(data2_url, video_id) + data1 = raw_data1['data'][0] + data2 = raw_data2['data'][0] - error_code = config['data'][0].get('error_code') + error_code = data1.get('error_code') if error_code: # -8 means blocked outside China. - error = config['data'][0].get('error') # Chinese and English, separated by newline. - raise ExtractorError(error or 'Server reported error %i' % error_code, - expected=True) + # Chinese and English, separated by newline. + error = data1.get('error') + raise ExtractorError( + error or 'Server reported error %i' % + error_code, + expected=True) - video_title = config['data'][0]['title'] - seed = config['data'][0]['seed'] + title = data1['title'] - format = self._downloader.params.get('format', None) - supported_format = list(config['data'][0]['streamfileids'].keys()) + # generate video_urls_dict + video_urls_dict = self.construct_video_urls(data1, data2) - # TODO proper format selection - if format is None or format == 'best': - if 'hd2' in supported_format: - format = 'hd2' - else: - format = 'flv' - ext = 'flv' - elif format == 'worst': - format = 'mp4' - ext = 'mp4' - else: - format = 'flv' - ext = 'flv' + # construct info + entries = [] + for fm in data1['streamtypes']: + #formats = [] + video_urls = video_urls_dict[fm] + for i in range(len(video_urls)): + if len(entries) < i+1: + entries.append({'formats': []}) + entries[i]['formats'].append( + { + 'url': video_urls[i], + 'format_id': fm, + 'ext': self.parse_ext_l(fm), + 'filesize': int(data1['segs'][fm][i]['size']) + } + ) - fileid = config['data'][0]['streamfileids'][format] - keys = [s['k'] for s in config['data'][0]['segs'][format]] - # segs is usually a dictionary, but an empty *list* if an error occured. - - files_info = [] - sid = self._gen_sid() - fileid = self._get_file_id(fileid, seed) - - # column 8,9 of fileid represent the segment number - # fileid[7:9] should be changed - for index, key in enumerate(keys): - temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:]) - download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) + for i in range(len(entries)): + entries[i].update( + { + 'id': '_part%d' % (i+1), + 'title': title, + } + ) + if len(entries) > 1: info = { - 'id': '%s_part%02d' % (video_id, index), - 'url': download_url, - 'uploader': None, - 'upload_date': None, - 'title': video_title, - 'ext': ext, + '_type': 'multi_video', + 'id': video_id, + 'title': title, + 'entries': entries, } - files_info.append(info) + else: + info = entries[0] + info['id'] = video_id - return files_info + return info From ca45246627f5a67a7c82cd40a11e5c4ff5f68871 Mon Sep 17 00:00:00 2001 From: PeterDing <dfhayst@gmail.com> Date: Thu, 28 May 2015 21:04:58 +0800 Subject: [PATCH 0809/2721] [youku] compatible for python > 3.3 or > 2.7 --- youtube_dl/extractor/youku.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 8d86c3f45..7a07c8a5f 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import sys +pyvs = sys.version_info[0] import re import base64 @@ -34,16 +36,23 @@ class YoukuIE(InfoExtractor): for i in range(256): t = (t + ls[i] + ord(s1[i%len(s1)])) % 256 ls[i], ls[t] = ls[t], ls[i] - s, x, y = '', 0, 0 + s = '' if pyvs == 3 else b'' + x, y = 0, 0 for i in range(len(s2)): y = (y + 1) % 256 x = (x + ls[y]) % 256 ls[x], ls[y] = ls[y], ls[x] - s += chr((s2[i] ^ ls[(ls[x]+ls[y]) % 256])) + if isinstance(s2[i], int): + s += chr(s2[i] ^ ls[(ls[x]+ls[y]) % 256]) + else: + s += chr(ord(s2[i]) ^ ls[(ls[x]+ls[y]) % 256]) return s sid, token = yk_t( - 'becaf9be', base64.b64decode(bytes(data2['ep'], 'ascii')) + 'becaf9be', + base64.b64decode(bytes(data2['ep'], 'ascii')) \ + if pyvs == 3 \ + else base64.b64decode(data2['ep']) ).split('_') # get oip @@ -78,8 +87,15 @@ class YoukuIE(InfoExtractor): fileid = get_fileid(format, n) ep_t = yk_t( 'bf7e5f01', - bytes('%s_%s_%s' % (sid, fileid, token), 'ascii')) - ep = base64.b64encode(bytes(ep_t, 'latin')).decode() + bytes('%s_%s_%s' % (sid, fileid, token), 'ascii') \ + if pyvs == 3 \ + else ('%s_%s_%s' % (sid, fileid, token)) + ) + ep = base64.b64encode( + bytes(ep_t, 'latin') \ + if pyvs == 3 \ + else ep_t + ).decode() ep = ep.replace('+', '%2B') ep = ep.replace('/', '%2F') ep = ep.replace('=', '%2D') From 806598b94dec1268566ae71d671116060f7971d6 Mon Sep 17 00:00:00 2001 From: PeterDing <dfhayst@gmail.com> Date: Fri, 29 May 2015 08:21:24 +0800 Subject: [PATCH 0810/2721] [porn91] the one that _search_regex returns not needs to be checked --- youtube_dl/extractor/porn91.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index f3a97df64..b62eec92d 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -31,7 +31,6 @@ class Porn91IE(InfoExtractor): title = self._search_regex( r'<div id="viewvideo-title">(?P<title>.+?)</div>', webpage, 'title', flags=re.DOTALL) - assert title title = title.replace('\n', '') # get real url @@ -41,8 +40,6 @@ class Porn91IE(InfoExtractor): r'so.addVariable\(\'seccode\',\'(?P<n2>.+?)\'', webpage, 'n2') n3 = self._search_regex( r'so.addVariable\(\'max_vid\',\'(?P<n3>\d+)\'', webpage, 'n3') - if not (n1 and n2 and n3): - raise ExtractorError("You are Blocked by Server.") url_params = compat_urllib_parse.urlencode({ 'VID': n1, 'mp4': '1', From 1498940b10a3f43490c05045ebe7a517267a2bff Mon Sep 17 00:00:00 2001 From: PeterDing <dfhayst@gmail.com> Date: Fri, 29 May 2015 10:13:09 +0800 Subject: [PATCH 0811/2721] [youku] compare bytes and str for compatible; use compat_urllib_parse for making video_url --- youtube_dl/extractor/youku.py | 38 +++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 7a07c8a5f..063f2e10e 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -1,14 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals -import sys -pyvs = sys.version_info[0] import re import base64 from .common import InfoExtractor from ..utils import ExtractorError +from ..compat import compat_urllib_parse + +bytes_is_str = (bytes == str) # for compatible + class YoukuIE(InfoExtractor): IE_NAME = 'youku' _VALID_URL = r'''(?x) @@ -36,7 +38,7 @@ class YoukuIE(InfoExtractor): for i in range(256): t = (t + ls[i] + ord(s1[i%len(s1)])) % 256 ls[i], ls[t] = ls[t], ls[i] - s = '' if pyvs == 3 else b'' + s = '' if not bytes_is_str else b'' x, y = 0, 0 for i in range(len(s2)): y = (y + 1) % 256 @@ -51,7 +53,7 @@ class YoukuIE(InfoExtractor): sid, token = yk_t( 'becaf9be', base64.b64decode(bytes(data2['ep'], 'ascii')) \ - if pyvs == 3 \ + if not bytes_is_str \ else base64.b64decode(data2['ep']) ).split('_') @@ -88,17 +90,14 @@ class YoukuIE(InfoExtractor): ep_t = yk_t( 'bf7e5f01', bytes('%s_%s_%s' % (sid, fileid, token), 'ascii') \ - if pyvs == 3 \ + if not bytes_is_str \ else ('%s_%s_%s' % (sid, fileid, token)) ) ep = base64.b64encode( bytes(ep_t, 'latin') \ - if pyvs == 3 \ + if not bytes_is_str \ else ep_t ).decode() - ep = ep.replace('+', '%2B') - ep = ep.replace('/', '%2F') - ep = ep.replace('=', '%2D') return ep # generate video_urls @@ -107,20 +106,25 @@ class YoukuIE(InfoExtractor): video_urls = [] for dt in data1['segs'][format]: n = str(int(dt['no'])) + param = { + 'K': dt['k'], + 'hd': self.get_hd(format), + 'myp': 0, + 'ts': dt['seconds'], + 'ypp': 0, + 'ctype': 12, + 'ev': 1, + 'token': token, + 'oip': oip, + 'ep': generate_ep(format, n) + } video_url = \ 'http://k.youku.com/player/getFlvPath/' + \ 'sid/' + sid + \ '_' + str(int(n)+1).zfill(2) + \ '/st/' + self.parse_ext_l(format) + \ '/fileid/' + get_fileid(format, n) + '?' + \ - 'K=' + str(dt['k']) + \ - '&hd=' + self.get_hd(format) + \ - '&myp=0' + \ - '&ts=' + str(dt['seconds']) + \ - '&ypp=0&ctype=12&ev=1' + \ - '&token=' + str(token) + \ - '&oip=' + str(oip) + \ - '&ep=' + generate_ep(format, n) + compat_urllib_parse.urlencode(param) video_urls.append(video_url) video_urls_dict[format] = video_urls From 84e1e036c2cb7311cdea14763bec3322403a8d54 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 29 May 2015 12:44:31 +0800 Subject: [PATCH 0812/2721] [senate] Extend _VALID_URL (fixes #5836) --- youtube_dl/extractor/senateisvp.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index d3b8a1be4..9c53704ea 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -48,7 +48,7 @@ class SenateISVPIE(InfoExtractor): ["arch", "", "http://ussenate-f.akamaihd.net/"] ] _IE_NAME = 'senate.gov' - _VALID_URL = r'http://www\.senate\.gov/isvp/\?(?P<qs>.+)' + _VALID_URL = r'http://www\.senate\.gov/isvp/?\?(?P<qs>.+)' _TESTS = [{ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'info_dict': { @@ -72,12 +72,16 @@ class SenateISVPIE(InfoExtractor): 'ext': 'mp4', 'title': 'Integrated Senate Video Player' } + }, { + # From http://www.c-span.org/video/?96791-1 + 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', + 'only_matching': True, }] @staticmethod def _search_iframe_url(webpage): mobj = re.search( - r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/\?[^'\"]+)['\"]", + r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", webpage) if mobj: return mobj.group('url') From eb6cb9fbe934fe99a35af22065cf91063d416c12 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 29 May 2015 07:52:17 +0200 Subject: [PATCH 0813/2721] release 2015.05.29 --- README.md | 4 ++-- docs/supportedsites.md | 9 ++++----- youtube_dl/version.py | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e51bb5343..f3d83c89f 100644 --- a/README.md +++ b/README.md @@ -168,7 +168,7 @@ which means you can modify it, redistribute it or use it however you like. --no-progress Do not print progress bar --console-title Display progress in console titlebar -v, --verbose Print various debugging information - --dump-pages Print downloaded pages to debug problems (very verbose) + --dump-pages Print downloaded pages encoded using base64 to debug problems (very verbose) --write-pages Write downloaded intermediary pages to files in the current directory to debug problems --print-traffic Display sent and read HTTP traffic -C, --call-home Contact the youtube-dl server for debugging @@ -220,7 +220,7 @@ which means you can modify it, redistribute it or use it however you like. --embed-thumbnail Embed thumbnail in the audio as cover art --add-metadata Write metadata to the video file --metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed - parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s - + parameters replace existing values. Additional templates: %(album)s, %(artist)s. Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise" --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards) --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a4879bd9a..a421ae62b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -26,8 +26,7 @@ - **anitube.se** - **AnySex** - **Aparat** - - **AppleDailyAnimationNews** - - **AppleDailyRealtimeNews** + - **AppleDaily** - **AppleTrailers** - **archive.org**: archive.org videos - **ARD** @@ -152,7 +151,6 @@ - **fc2** - **fernsehkritik.tv** - **fernsehkritik.tv:postecke** - - **Firedrive** - **Firstpost** - **Flickr** - **Folketinget**: Folketinget (ft.dk; Danish parliament) @@ -230,6 +228,7 @@ - **KanalPlay**: Kanal 5/9/11 Play - **Kankan** - **Karaoketv** + - **KarriereVideos** - **keek** - **KeezMovies** - **KhanAcademy** @@ -322,6 +321,7 @@ - **NosVideo** - **novamov**: NovaMov - **Nowness** + - **NowTV** - **nowvideo**: NowVideo - **npo.nl** - **npo.nl:live** @@ -393,7 +393,6 @@ - **Rte** - **rtl.nl**: rtl.nl and rtlxl.nl - **RTL2** - - **RTLnow** - **RTP** - **RTS**: RTS.ch - **rtve.es:alacarta**: RTVE a la carta @@ -431,7 +430,6 @@ - **smotri:community**: Smotri.com community videos - **smotri:user**: Smotri.com user videos - **Snotr** - - **Sockshare** - **Sohu** - **soundcloud** - **soundcloud:playlist** @@ -564,6 +562,7 @@ - **vier:videos** - **Viewster** - **viki** + - **viki:channel** - **vimeo** - **vimeo:album** - **vimeo:channel** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b33385153..653710131 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.05.20' +__version__ = '2015.05.29' From d6aa68ce75ef4f4d27fbf7103edfda1f92ba70b2 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Fri, 29 May 2015 12:47:20 +0200 Subject: [PATCH 0814/2721] [postprocessor/embedthumbnail] embed mp4 too (fixes #5840) --- youtube_dl/postprocessor/embedthumbnail.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py index 8f825f785..774494efd 100644 --- a/youtube_dl/postprocessor/embedthumbnail.py +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -49,7 +49,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - elif info['ext'] == 'm4a': + elif info['ext'] in ['m4a', 'mp4']: if not check_executable('AtomicParsley', ['-v']): raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.') @@ -82,6 +82,6 @@ class EmbedThumbnailPP(FFmpegPostProcessor): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) else: - raise EmbedThumbnailPPError('Only mp3 and m4a are supported for thumbnail embedding for now.') + raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.') return [], info From 605ec701b7b4cd120a9acb33bfcc4306719b59b4 Mon Sep 17 00:00:00 2001 From: PeterDing <dfhayst@gmail.com> Date: Fri, 29 May 2015 23:32:04 +0800 Subject: [PATCH 0815/2721] [iqiyi] Add new extractor for iqiyi.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/iqiyi.py | 214 +++++++++++++++++++++++++++++++ 2 files changed, 215 insertions(+) create mode 100644 youtube_dl/extractor/iqiyi.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 80c9cb107..85c1b1a3a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -229,6 +229,7 @@ from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import IPrimaIE +from .iqiyi import IqiyiIE from .ivi import ( IviIE, IviCompilationIE diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py new file mode 100644 index 000000000..d96d13225 --- /dev/null +++ b/youtube_dl/extractor/iqiyi.py @@ -0,0 +1,214 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..compat import ( + compat_chr, + compat_parse_qs, + compat_urllib_parse, + compat_urllib_request, + compat_urlparse, + compat_str, +) + +from ..utils import ExtractorError + +import re +import time +import json +import uuid +import math +import random +import zlib +import hashlib + +class IqiyiIE(InfoExtractor): + IE_NAME = 'iqiyi' + + _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html' + + _TEST = { + 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', + 'md5': '260f0f59686e65e886995d0ba791ab83', + 'info_dict': { + 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', + 'title': '美国德州空中惊现奇异云团 酷似UFO', + 'ext': 'f4v' + } + } + + def construct_video_urls(self, data, video_id, _uuid): + def do_xor(x, y): + a = y % 3 + if a == 1: + return x ^ 121 + if a == 2: + return x ^ 72 + return x ^ 103 + + def get_encode_code(l): + a = 0 + b = l.split('-') + c = len(b) + s = '' + for i in range(c - 1, -1, -1): + a = do_xor(int(b[c-i-1], 16), i) + s += chr(a) + return s[::-1] + + def get_path_key(x): + mg = ')(*&^flash@#$%a' + tm = self._download_json( + 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id)['t'] + t = str(int(math.floor(int(tm)/(600.0)))) + return hashlib.md5( + (t+mg+x).encode('utf8')).hexdigest() + + video_urls_dict = {} + for i in data['vp']['tkl'][0]['vs']: + if 0 < int(i['bid']) <= 10: + format_id = self.get_format(i['bid']) + + video_urls_info = i['fs'] + if not i['fs'][0]['l'].startswith('/'): + t = get_encode_code(i['fs'][0]['l']) + if t.endswith('mp4'): + video_urls_info = i['flvs'] + + video_urls = [] + for ii in video_urls_info: + vl = ii['l'] + if not vl.startswith('/'): + vl = get_encode_code(vl) + key = get_path_key( + vl.split('/')[-1].split('.')[0]) + filesize = ii['b'] + base_url = data['vp']['du'].split('/') + base_url.insert(-1, key) + base_url = '/'.join(base_url) + param = { + 'su': _uuid, + 'qyid': uuid.uuid4().hex, + 'client': '', + 'z': '', + 'bt': '', + 'ct': '', + 'tn': str(int(time.time())) + } + api_video_url = base_url + vl + '?' + \ + compat_urllib_parse.urlencode(param) + js = self._download_json(api_video_url, video_id) + video_url = js['l'] + video_urls.append( + (video_url, filesize)) + + video_urls_dict[format_id] = video_urls + return video_urls_dict + + def get_format(self, bid): + bid_dict = { + '1': 'standard', + '2': 'high', + '3': 'super', + '4': 'suprt-high', + '5': 'fullhd', + '10': '4k' + } + return bid_dict[str(bid)] + + def get_raw_data(self, tvid, video_id, enc_key, _uuid): + tm = str(int(time.time())) + param = { + 'key': 'fvip', + 'src': hashlib.md5(b'youtube-dl').hexdigest(), + 'tvId': tvid, + 'vid': video_id, + 'vinfo': 1, + 'tm': tm, + 'enc': hashlib.md5( + (enc_key + tm + tvid).encode('utf8')).hexdigest(), + 'qyid': _uuid, + 'tn': random.random(), + 'um': 0, + 'authkey': hashlib.md5( + (tm + tvid).encode('utf8')).hexdigest() + } + + api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ + compat_urllib_parse.urlencode(param) + raw_data = self._download_json(api_url, video_id) + return raw_data + + def get_enc_key(self, swf_url, video_id): + req = self._request_webpage( + swf_url, video_id, note='download swf content') + cn = req.read() + cn = zlib.decompress(cn[8:]) + pt = re.compile(b'MixerRemote\x08(?P<enc_key>.+?)\$&vv') + enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8') + return enc_key + + def _real_extract(self, url): + webpage = self._download_webpage( + url, 'temp_id', note='download video page') + tvid = self._search_regex( + r'tvId ?= ?(\'|\")(?P<tvid>\d+)', webpage, 'tvid', flags=re.I, group='tvid') + video_id = self._search_regex( + r'videoId ?= ?(\'|\")(?P<video_id>[a-z\d]+)', + webpage, 'video_id', flags=re.I, group='video_id') + swf_url = self._search_regex( + r'(?P<swf>http://.+?MainPlayer.+?\.swf)', webpage, 'swf') + _uuid = uuid.uuid4().hex + + enc_key = self.get_enc_key(swf_url, video_id) + + raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) + assert raw_data['code'] == 'A000000' + if not raw_data['data']['vp']['tkl']: + raise ExtractorError('No support iQiqy VIP video') + + data = raw_data['data'] + + title = data['vi']['vn'] + + # generate video_urls_dict + video_urls_dict = self.construct_video_urls(data, video_id, _uuid) + + # construct info + entries = [] + for format_id in video_urls_dict: + video_urls = video_urls_dict[format_id] + for i, video_url_info in enumerate(video_urls): + if len(entries) < i+1: + entries.append({'formats': []}) + entries[i]['formats'].append( + { + 'url': video_url_info[0], + 'filesize': video_url_info[-1], + 'format_id': format_id, + } + ) + + for i in range(len(entries)): + entries[i].update( + { + 'id': '_part%d' % (i+1), + 'title': title, + } + ) + + if len(entries) > 1: + info = { + '_type': 'multi_video', + 'id': video_id, + 'title': title, + 'entries': entries, + } + else: + info = entries[0] + info['id'] = video_id + info['title'] = title + + return info From 08f7db20c16743a2bd3040eb7dac11d675011eef Mon Sep 17 00:00:00 2001 From: PeterDing <dfhayst@gmail.com> Date: Sat, 30 May 2015 10:03:32 +0800 Subject: [PATCH 0816/2721] [youku] change format_id --- youtube_dl/extractor/youku.py | 37 +++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 063f2e10e..aed6b960a 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -132,26 +132,37 @@ class YoukuIE(InfoExtractor): def get_hd(self, fm): hd_id_dict = { - 'flv': '0', - 'mp4': '1', - 'hd2': '2', - 'hd3': '3', - '3gp': '0', - '3gphd': '1' + 'flv' : '0', + 'mp4' : '1', + 'hd2' : '2', + 'hd3' : '3', + '3gp' : '0', + '3gphd' : '1' } return hd_id_dict[fm] def parse_ext_l(self, fm): ext_dict = { - 'flv': 'flv', - 'mp4': 'mp4', - 'hd2': 'flv', - 'hd3': 'flv', - '3gp': 'flv', - '3gphd': 'mp4', + 'flv' : 'flv', + 'mp4' : 'mp4', + 'hd2' : 'flv', + 'hd3' : 'flv', + '3gp' : 'flv', + '3gphd' : 'mp4' } return ext_dict[fm] + def get_format_name(self, fm): + _dict = { + '3gp' : 'h6', + '3gphd' : 'h5', + 'flv' : 'h4', + 'mp4' : 'h3', + 'hd2' : 'h2', + 'hd3' : 'h1' + } + return _dict[fm] + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') @@ -191,7 +202,7 @@ class YoukuIE(InfoExtractor): entries[i]['formats'].append( { 'url': video_urls[i], - 'format_id': fm, + 'format_id': self.get_format_name(fm), 'ext': self.parse_ext_l(fm), 'filesize': int(data1['segs'][fm][i]['size']) } From 670861bd206ab4063baeb6b80d06a054ce4e1d62 Mon Sep 17 00:00:00 2001 From: PeterDing <dfhayst@gmail.com> Date: Sat, 30 May 2015 10:37:54 +0800 Subject: [PATCH 0817/2721] [iqiyi] Do not request for unneeded formats --- youtube_dl/extractor/iqiyi.py | 72 ++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index d96d13225..747f3f902 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -4,20 +4,12 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_chr, - compat_parse_qs, - compat_urllib_parse, - compat_urllib_request, - compat_urlparse, - compat_str, -) +from ..compat import compat_urllib_parse from ..utils import ExtractorError import re import time -import json import uuid import math import random @@ -31,15 +23,15 @@ class IqiyiIE(InfoExtractor): _TEST = { 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', - 'md5': '260f0f59686e65e886995d0ba791ab83', + 'md5': '2cb594dc2781e6c941a110d8f358118b', 'info_dict': { 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', 'title': '美国德州空中惊现奇异云团 酷似UFO', - 'ext': 'f4v' + 'ext': 'f4v', } } - def construct_video_urls(self, data, video_id, _uuid): + def construct_video_urls(self, data, video_id, _uuid, bid): def do_xor(x, y): a = y % 3 if a == 1: @@ -66,10 +58,21 @@ class IqiyiIE(InfoExtractor): return hashlib.md5( (t+mg+x).encode('utf8')).hexdigest() + # get accept format + # getting all format will spend minutes for a big video. + if bid == 'best': + bids = [int(i['bid']) for i in data['vp']['tkl'][0]['vs'] \ + if 0 < int(i['bid']) <= 10] + bid = str(max(bids)) + video_urls_dict = {} for i in data['vp']['tkl'][0]['vs']: if 0 < int(i['bid']) <= 10: format_id = self.get_format(i['bid']) + else: + continue + + video_urls = [] video_urls_info = i['fs'] if not i['fs'][0]['l'].startswith('/'): @@ -77,7 +80,12 @@ class IqiyiIE(InfoExtractor): if t.endswith('mp4'): video_urls_info = i['flvs'] - video_urls = [] + if int(i['bid']) != int(bid): # ignore missing match format + video_urls.extend( + [('http://example.com/v.flv', ii['b']) for ii in video_urls_info]) + video_urls_dict[format_id] = video_urls + continue + for ii in video_urls_info: vl = ii['l'] if not vl.startswith('/'): @@ -108,15 +116,27 @@ class IqiyiIE(InfoExtractor): return video_urls_dict def get_format(self, bid): - bid_dict = { - '1': 'standard', - '2': 'high', - '3': 'super', - '4': 'suprt-high', - '5': 'fullhd', - '10': '4k' + _dict = { + '1' : 'h6', + '2' : 'h5', + '3' : 'h4', + '4' : 'h3', + '5' : 'h2', + '10' : 'h1' } - return bid_dict[str(bid)] + return _dict.get(str(bid), None) + + def get_bid(self, format_id): + _dict = { + 'h6' : '1', + 'h5' : '2', + 'h4' : '3', + 'h3' : '4', + 'h2' : '5', + 'h1' : '10', + 'best' : 'best' + } + return _dict.get(format_id, None) def get_raw_data(self, tvid, video_id, enc_key, _uuid): tm = str(int(time.time())) @@ -173,8 +193,14 @@ class IqiyiIE(InfoExtractor): title = data['vi']['vn'] + format = self._downloader.params.get('format', None) + bid = self.get_bid(format) if format else 'best' + if not bid: + raise ExtractorError('Can\'t get format.') + # generate video_urls_dict - video_urls_dict = self.construct_video_urls(data, video_id, _uuid) + video_urls_dict = self.construct_video_urls( + data, video_id, _uuid, bid) # construct info entries = [] @@ -188,10 +214,12 @@ class IqiyiIE(InfoExtractor): 'url': video_url_info[0], 'filesize': video_url_info[-1], 'format_id': format_id, + 'preference': int(self.get_bid(format_id)) } ) for i in range(len(entries)): + self._sort_formats(entries[i]['formats']) entries[i].update( { 'id': '_part%d' % (i+1), From fafec39d4177f4873bb2393749a46873c4ffda4a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 30 May 2015 13:23:09 +0800 Subject: [PATCH 0818/2721] [spiegeltv] Changed RTMP server (fixes #5788 and fixes #5843) Thanks to @brickleroux for finding out the problem --- youtube_dl/extractor/spiegeltv.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index 98cf92d89..359722ad6 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -51,9 +51,9 @@ class SpiegeltvIE(InfoExtractor): is_wide = media_json['is_wide'] server_json = self._download_json( - 'http://www.spiegel.tv/streaming_servers/', video_id, - note='Downloading server information') - server = server_json[0]['endpoint'] + 'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json', + video_id, note='Downloading server information') + server = server_json['streamingserver'][0]['endpoint'] thumbnails = [] for image in media_json['images']: @@ -76,5 +76,6 @@ class SpiegeltvIE(InfoExtractor): 'ext': 'm4v', 'description': description, 'duration': duration, - 'thumbnails': thumbnails + 'thumbnails': thumbnails, + 'rtmp_live': True, } From 6ebdfe43e439239df051f6071a23c51705c150cf Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Sat, 30 May 2015 09:30:14 +0200 Subject: [PATCH 0819/2721] [tube8] fix extractor (fixes #5846) --- youtube_dl/extractor/tube8.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index d73ad3762..6ca8840b0 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -47,7 +47,7 @@ class Tube8IE(InfoExtractor): webpage = self._download_webpage(req, display_id) flashvars = json.loads(self._html_search_regex( - r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars')) + r'flashvars\s*=\s*({.+?})', webpage, 'flashvars')) video_url = flashvars['video_url'] if flashvars.get('encrypted') is True: @@ -58,19 +58,19 @@ class Tube8IE(InfoExtractor): thumbnail = flashvars.get('image_url') title = self._html_search_regex( - r'videotitle\s*=\s*"([^"]+)', webpage, 'title') + r'videoTitle\s*=\s*"([^"]+)', webpage, 'title') description = self._html_search_regex( - r'>Description:</strong>(.+?)<', webpage, 'description', fatal=False) + r'>Description:</strong>\s*(.+?)\s*<', webpage, 'description', fatal=False) uploader = self._html_search_regex( - r'<strong class="video-username">(?:<a href="[^"]+">)?([^<]+)(?:</a>)?</strong>', + r'<span class="username">\s*(.+?)\s*<', webpage, 'uploader', fatal=False) like_count = int_or_none(self._html_search_regex( - r"rupVar\s*=\s*'(\d+)'", webpage, 'like count', fatal=False)) + r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False)) dislike_count = int_or_none(self._html_search_regex( - r"rdownVar\s*=\s*'(\d+)'", webpage, 'dislike count', fatal=False)) + r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False)) view_count = self._html_search_regex( - r'<strong>Views: </strong>([\d,\.]+)</li>', webpage, 'view count', fatal=False) + r'<strong>Views: </strong>([\d,\.]+)\s*</li>', webpage, 'view count', fatal=False) if view_count: view_count = str_to_int(view_count) comment_count = self._html_search_regex( From 0385d642232ba4e8b455d0c4eb95c7985f22f276 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 May 2015 14:12:58 +0600 Subject: [PATCH 0820/2721] [crunchyroll] Extract subtitles extraction routine --- youtube_dl/extractor/crunchyroll.py | 30 +++++++++++++---------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 1c77df47e..4ac537a6d 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -76,8 +76,8 @@ class CrunchyrollIE(InfoExtractor): self._login() def _decrypt_subtitles(self, data, iv, id): - data = bytes_to_intlist(data) - iv = bytes_to_intlist(iv) + data = bytes_to_intlist(base64.b64decode(data)) + iv = bytes_to_intlist(base64.b64decode(iv)) id = int(id) def obfuscate_key_aux(count, modulo, start): @@ -179,6 +179,16 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text return output + def _extract_subtitles(self, subtitle): + sub_root = xml.etree.ElementTree.fromstring(subtitle) + return [{ + 'ext': 'srt', + 'data': self._convert_subtitles_to_srt(sub_root), + }, { + 'ext': 'ass', + 'data': self._convert_subtitles_to_ass(sub_root), + }] + def _get_subtitles(self, video_id, webpage): subtitles = {} for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): @@ -190,25 +200,11 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) if not id or not iv or not data: continue - id = int(id) - iv = base64.b64decode(iv) - data = base64.b64decode(data) - subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) if not lang_code: continue - sub_root = xml.etree.ElementTree.fromstring(subtitle) - subtitles[lang_code] = [ - { - 'ext': 'srt', - 'data': self._convert_subtitles_to_srt(sub_root), - }, - { - 'ext': 'ass', - 'data': self._convert_subtitles_to_ass(sub_root), - }, - ] + subtitles[lang_code] = self._extract_subtitles(subtitle) return subtitles def _real_extract(self, url): From b2cf6543b21bbe0954c45b35b1402eaca5187c0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 May 2015 14:30:04 +0600 Subject: [PATCH 0821/2721] [soompi] Improve and simplify --- youtube_dl/extractor/soompi.py | 146 ++++++++++++++++++--------------- 1 file changed, 81 insertions(+), 65 deletions(-) diff --git a/youtube_dl/extractor/soompi.py b/youtube_dl/extractor/soompi.py index 4726872dc..5da66ca9e 100644 --- a/youtube_dl/extractor/soompi.py +++ b/youtube_dl/extractor/soompi.py @@ -2,17 +2,31 @@ from __future__ import unicode_literals import re -import json -import base64 -import xml.etree.ElementTree -# Soompi uses the same subtitle encryption as crunchyroll from .crunchyroll import CrunchyrollIE +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + ExtractorError, + int_or_none, + remove_start, + xpath_text, +) -class SoompiIE(CrunchyrollIE): + +class SoompiBaseIE(InfoExtractor): + def _get_episodes(self, webpage, episode_filter=None): + episodes = self._parse_json( + self._search_regex( + r'VIDEOS\s*=\s*(\[.+?\]);', webpage, 'episodes JSON'), + None) + return list(filter(episode_filter, episodes)) + + +class SoompiIE(SoompiBaseIE, CrunchyrollIE): IE_NAME = 'soompi' - _VALID_URL = r'^https?://tv\.soompi\.com/en/watch/(?P<id>[0-9]+)' + _VALID_URL = r'https?://tv\.soompi\.com/(?:en/)?watch/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://tv.soompi.com/en/watch/29235', 'info_dict': { @@ -26,84 +40,86 @@ class SoompiIE(CrunchyrollIE): }, }] - def _get_episodes(self, webpage, episode_filter=None): - episodes = json.loads( - self._search_regex(r'\s+VIDEOS\s+= (\[.+?\]);', webpage, "episodes meta")) - return [ep for ep in episodes if episode_filter is None or episode_filter(ep)] - - def _get_subtitles(self, video_id, show_format_xml): - subtitles = {} - subtitle_info_nodes = show_format_xml.findall('./{default}preload/subtitles/subtitle') - subtitle_nodes = show_format_xml.findall('./{default}preload/subtitle') + def _get_episode(self, webpage, video_id): + return self._get_episodes(webpage, lambda x: x['id'] == video_id)[0] + def _get_subtitles(self, config, video_id): sub_langs = {} - for i in subtitle_info_nodes: - sub_langs[i.attrib["id"]] = i.attrib["title"] + for subtitle in config.findall('./{default}preload/subtitles/subtitle'): + sub_langs[subtitle.attrib['id']] = subtitle.attrib['title'] - for s in subtitle_nodes: - lang_code = sub_langs.get(s.attrib["id"], None) - if lang_code is None: + subtitles = {} + for s in config.findall('./{default}preload/subtitle'): + lang_code = sub_langs.get(s.attrib['id']) + if not lang_code: + continue + sub_id = s.get('id') + data = xpath_text(s, './data', 'data') + iv = xpath_text(s, './iv', 'iv') + if not id or not iv or not data: continue - - sub_id = int(s.attrib["id"]) - iv = base64.b64decode(s.find("iv").text) - data = base64.b64decode(s.find("data").text) subtitle = self._decrypt_subtitles(data, iv, sub_id).decode('utf-8') - sub_root = xml.etree.ElementTree.fromstring(subtitle) - - subtitles[lang_code] = [{ - 'ext': 'srt', 'data': self._convert_subtitles_to_srt(sub_root) - }, { - 'ext': 'ass', 'data': self._convert_subtitles_to_ass(sub_root) - }] + subtitles[lang_code] = self._extract_subtitles(subtitle) return subtitles def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, note="Downloading episode page", - errnote="Video may not be available for your location") - vid_formats = re.findall(r"\?quality=q([0-9]+)", webpage) - - show_meta = json.loads( - self._search_regex(r'\s+var show = (\{.+?\});', webpage, "show meta")) - episodes = self._get_episodes( - webpage, episode_filter=lambda x: x['id'] == video_id) - - title = episodes[0]["name"] - description = episodes[0]["description"] - duration = int(episodes[0]["duration"]) - slug = show_meta["slug"] + try: + webpage = self._download_webpage( + url, video_id, 'Downloading episode page') + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: + webpage = ee.cause.read() + block_message = self._html_search_regex( + r'(?s)<div class="block-message">(.+?)</div>', webpage, + 'block message', default=None) + if block_message: + raise ExtractorError(block_message, expected=True) + raise formats = [] - show_format_xml = None - for vf in vid_formats: - show_format_url = "http://tv.soompi.com/en/show/%s/%s-config.xml?mode=hls&quality=q%s" \ - % (slug, video_id, vf) - show_format_xml = self._download_xml( - show_format_url, video_id, note="Downloading q%s show xml" % vf) - avail_formats = self._extract_m3u8_formats( - show_format_xml.find('./{default}preload/stream_info/file').text, - video_id, ext="mp4", m3u8_id=vf, preference=int(vf)) - formats.extend(avail_formats) + config = None + for format_id in re.findall(r'\?quality=([0-9a-zA-Z]+)', webpage): + config = self._download_xml( + 'http://tv.soompi.com/en/show/_/%s-config.xml?mode=hls&quality=%s' % (video_id, format_id), + video_id, 'Downloading %s XML' % format_id) + m3u8_url = xpath_text( + config, './{default}preload/stream_info/file', + '%s m3u8 URL' % format_id) + if not m3u8_url: + continue + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', m3u8_id=format_id)) self._sort_formats(formats) - subtitles = self.extract_subtitles(video_id, show_format_xml) + episode = self._get_episode(webpage, video_id) + + title = episode['name'] + description = episode.get('description') + duration = int_or_none(episode.get('duration')) + + thumbnails = [{ + 'id': thumbnail_id, + 'url': thumbnail_url, + } for thumbnail_id, thumbnail_url in episode.get('img_url', {}).items()] + + subtitles = self.extract_subtitles(config, video_id) return { 'id': video_id, 'title': title, 'description': description, + 'thumbnails': thumbnails, 'duration': duration, 'formats': formats, 'subtitles': subtitles } -class SoompiShowIE(SoompiIE): +class SoompiShowIE(SoompiBaseIE): IE_NAME = 'soompi:show' - _VALID_URL = r'^https?://tv\.soompi\.com/en/shows/(?P<id>[0-9a-zA-Z\-_]+)' + _VALID_URL = r'https?://tv\.soompi\.com/en/shows/(?P<id>[0-9a-zA-Z\-_]+)' _TESTS = [{ 'url': 'http://tv.soompi.com/en/shows/liar-game', 'info_dict': { @@ -117,14 +133,14 @@ class SoompiShowIE(SoompiIE): def _real_extract(self, url): show_id = self._match_id(url) - webpage = self._download_webpage(url, show_id, note="Downloading show page") - title = self._og_search_title(webpage).replace("SoompiTV | ", "") + webpage = self._download_webpage( + url, show_id, 'Downloading show page') + + title = remove_start(self._og_search_title(webpage), 'SoompiTV | ') description = self._og_search_description(webpage) - episodes = self._get_episodes(webpage) - entries = [] - for ep in episodes: - entries.append(self.url_result( - 'http://tv.soompi.com/en/watch/%s' % ep['id'], 'Soompi', ep['id'])) + entries = [ + self.url_result('http://tv.soompi.com/en/watch/%s' % episode['id'], 'Soompi') + for episode in self._get_episodes(webpage)] return self.playlist_result(entries, show_id, title, description) From 1a5b77dc21384c462e0be86a1638cafd15a6e236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 May 2015 14:36:45 +0600 Subject: [PATCH 0822/2721] [crunchyroll] Fix python 3.2 --- youtube_dl/extractor/crunchyroll.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 4ac537a6d..41f0c736d 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -76,8 +76,8 @@ class CrunchyrollIE(InfoExtractor): self._login() def _decrypt_subtitles(self, data, iv, id): - data = bytes_to_intlist(base64.b64decode(data)) - iv = bytes_to_intlist(base64.b64decode(iv)) + data = bytes_to_intlist(base64.b64decode(data.encode('utf-8'))) + iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8'))) id = int(id) def obfuscate_key_aux(count, modulo, start): From 5c2191a6053cb5b1210cef68406e8a52e86fd9fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 May 2015 15:14:10 +0600 Subject: [PATCH 0823/2721] [vgtv] Skip wasLive hds (Closes #5835) --- youtube_dl/extractor/vgtv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index e6ee1e471..654298431 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -107,7 +107,8 @@ class VGTVIE(InfoExtractor): hls_url, video_id, 'mp4', m3u8_id='hls')) hds_url = streams.get('hds') - if hds_url: + # wasLive hds are always 404 + if hds_url and data.get('streamType') != 'wasLive': formats.extend(self._extract_f4m_formats( hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id, f4m_id='hds')) From 4d454c5e4b7ecfae97ff109e05453f43d7cea0a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 May 2015 15:15:42 +0600 Subject: [PATCH 0824/2721] [vgtv] Check for inactive videos --- youtube_dl/extractor/vgtv.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 654298431..472feb7f0 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import float_or_none +from ..utils import ( + ExtractorError, + float_or_none, +) class VGTVIE(InfoExtractor): @@ -97,6 +100,10 @@ class VGTVIE(InfoExtractor): % (host, video_id, HOST_WEBSITES[host]), video_id, 'Downloading media JSON') + if data.get('status') == 'inactive': + raise ExtractorError( + 'Video %s is no longer available' % video_id, expected=True) + streams = data['streamUrls'] formats = [] From 181c7053e377700c1615bdff2b0fb19235762c57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 May 2015 16:04:44 +0600 Subject: [PATCH 0825/2721] [YoutubeDL] Make sure all formats have unique format_id --- youtube_dl/YoutubeDL.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d1953c18f..21d247f23 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1047,6 +1047,8 @@ class YoutubeDL(object): if not formats: raise ExtractorError('No video formats found!') + formats_dict = {} + # We check that all the formats have the format and format_id fields for i, format in enumerate(formats): if 'url' not in format: @@ -1054,6 +1056,18 @@ class YoutubeDL(object): if format.get('format_id') is None: format['format_id'] = compat_str(i) + format_id = format['format_id'] + if format_id not in formats_dict: + formats_dict[format_id] = [] + formats_dict[format_id].append(format) + + # Make sure all formats have unique format_id + for format_id, ambiguous_formats in formats_dict.items(): + if len(ambiguous_formats) > 1: + for i, format in enumerate(ambiguous_formats): + format['format_id'] = '%s-%d' % (format_id, i) + + for i, format in enumerate(formats): if format.get('format') is None: format['format'] = '{id} - {res}{note}'.format( id=format['format_id'], From b4dd98358f0a68650f6154e8de4e12b8881248aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 May 2015 16:12:07 +0600 Subject: [PATCH 0826/2721] [vgtv] Properly handle lives --- youtube_dl/extractor/vgtv.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 472feb7f0..f38a72fde 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -62,16 +62,16 @@ class VGTVIE(InfoExtractor): }, { # streamType: live - 'url': 'http://www.vgtv.no/#!/live/100015/direkte-her-kan-du-se-laksen-live-fra-suldalslaagen', + 'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla', 'info_dict': { - 'id': '100015', + 'id': '113063', 'ext': 'flv', - 'title': 'DIREKTE: Her kan du se laksen live fra Suldalslågen!', - 'description': 'md5:9a60cc23fa349f761628924e56eeec2d', + 'title': 're:^DIREKTE: V75 fra Solvalla [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:b3743425765355855f88e096acc93231', 'thumbnail': 're:^https?://.*\.jpg', 'duration': 0, - 'timestamp': 1407423348, - 'upload_date': '20140807', + 'timestamp': 1432975582, + 'upload_date': '20150530', 'view_count': int, }, 'params': { @@ -105,6 +105,7 @@ class VGTVIE(InfoExtractor): 'Video %s is no longer available' % video_id, expected=True) streams = data['streamUrls'] + stream_type = data.get('streamType') formats = [] @@ -115,7 +116,7 @@ class VGTVIE(InfoExtractor): hds_url = streams.get('hds') # wasLive hds are always 404 - if hds_url and data.get('streamType') != 'wasLive': + if hds_url and stream_type != 'wasLive': formats.extend(self._extract_f4m_formats( hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id, f4m_id='hds')) @@ -143,13 +144,14 @@ class VGTVIE(InfoExtractor): return { 'id': video_id, - 'title': data['title'], + 'title': self._live_title(data['title']), 'description': data['description'], 'thumbnail': data['images']['main'] + '?t[]=900x506q80', 'timestamp': data['published'], 'duration': float_or_none(data['duration'], 1000), 'view_count': data['displays'], 'formats': formats, + 'is_live': True if stream_type == 'live' else False, } From e6e63e91a70a9c2e4ab92b8afad6fac3b8bede18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 May 2015 16:18:11 +0600 Subject: [PATCH 0827/2721] [tf1] Extend _VALID_URL (Closes #5848) --- youtube_dl/extractor/tf1.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 656410528..3a68eaa80 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class TF1IE(InfoExtractor): """TF1 uses the wat.tv player.""" - _VALID_URL = r'http://(?:videos\.tf1|www\.tfou|www\.tf1)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html' + _VALID_URL = r'http://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/.*?-(?P<id>\d+)(?:-\d+)?\.html' _TESTS = [{ 'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', 'info_dict': { @@ -35,6 +35,9 @@ class TF1IE(InfoExtractor): }, { 'url': 'http://www.tf1.fr/tf1/koh-lanta/videos/replay-koh-lanta-22-mai-2015.html', 'only_matching': True, + }, { + 'url': 'http://lci.tf1.fr/sept-a-huit/videos/sept-a-huit-du-24-mai-2015-8611550.html', + 'only_matching': True, }] def _real_extract(self, url): From 5196b988971716b9e9c5884d33c757a41aa4548a Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sat, 30 May 2015 14:16:18 +0300 Subject: [PATCH 0828/2721] [tubitv] Add new extractor (Closes #5524) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/tubitv.py | 79 ++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 youtube_dl/extractor/tubitv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f73bf646b..e7e0a55f2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -569,6 +569,7 @@ from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE from .trutube import TruTubeIE from .tube8 import Tube8IE +from .tubitv import TubiTvIE from .tudou import TudouIE from .tumblr import TumblrIE from .tunein import TuneInIE diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py new file mode 100644 index 000000000..03e971e5e --- /dev/null +++ b/youtube_dl/extractor/tubitv.py @@ -0,0 +1,79 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import codecs +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse, + compat_urllib_request +) +from ..utils import ( + ExtractorError, + int_or_none, +) + + +class TubiTvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tubitv\.com/video\?id=(?P<id>[0-9]+)' + _LOGIN_URL = 'http://tubitv.com/login' + _NETRC_MACHINE = 'tubitv' + _TEST = { + 'url': 'http://tubitv.com/video?id=54411&title=The_Kitchen_Musical_-_EP01', + 'info_dict': { + 'id': '54411', + 'ext': 'mp4', + 'title': 'The Kitchen Musical - EP01', + 'thumbnail': 're:^https?://.*\.png$', + 'description': 'md5:37532716166069b353e8866e71fefae7', + 'duration': 2407, + }, + 'params': { + 'skip_download': 'HLS download', + }, + } + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + self.report_login() + form_data = { + 'username': username, + 'password': password, + } + payload = compat_urllib_parse.urlencode(form_data).encode('utf-8') + request = compat_urllib_request.Request(self._LOGIN_URL, payload) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') + login_page = self._download_webpage( + request, None, False, 'Wrong login info') + if not re.search(r'id="tubi-logout"', login_page): + raise ExtractorError( + 'Login failed (invalid username/password)', expected=True) + + def _real_initialize(self): + self._login() + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'duration')) + + apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu') + m3u8_url = codecs.decode(apu, 'rot_13')[::-1] + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'description': description, + 'duration': duration, + } From 1ae7ff771b17d16540aa446aef4f10971465a249 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sat, 30 May 2015 14:33:27 +0300 Subject: [PATCH 0829/2721] [tubitv] Add error message for videos that require login (#5524) --- youtube_dl/extractor/tubitv.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index 03e971e5e..2c4b21807 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -59,6 +59,11 @@ class TubiTvIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage): + raise ExtractorError( + 'This video requires login, use --username and --password ' + 'options to provide account credentials.', expected=True) + title = self._og_search_title(webpage) description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) From 386bdfa698a7f06c43df91913677db3732e29900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 May 2015 18:29:16 +0600 Subject: [PATCH 0830/2721] [youtube:user] Workaround 35 pages limitation (Closes #5778) --- youtube_dl/extractor/youtube.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0301682b8..fcdbfe0bc 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1399,6 +1399,26 @@ class YoutubeChannelIE(InfoExtractor): channel_id = self._match_id(url) url = self._TEMPLATE_URL % channel_id + + # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778) + # Workaround by extracting as a playlist if managed to obtain channel playlist URL + # otherwise fallback on channel by page extraction + channel_page = self._download_webpage( + url + '?view=57', channel_id, + 'Downloading channel page', fatal=False) + channel_playlist_id = self._search_regex( + [r'<meta itemprop="channelId" content="([^"]+)">', + r'data-channel-external-id="([^"]+)"'], + channel_page, 'channel id', default=None) + if channel_playlist_id and channel_playlist_id.startswith('UC'): + playlist_id = 'UU' + channel_playlist_id[2:] + channel_playlist = unescapeHTML(self._search_regex( + r'href="/?(watch\?v=[0-9A-Za-z_-]{11}&list=%s)"' % playlist_id, + channel_page, 'channel playlist URL', default=None)) + if channel_playlist: + return self.url_result( + compat_urlparse.urljoin(url, '/%s' % channel_playlist), 'YoutubePlaylist') + channel_page = self._download_webpage(url, channel_id, 'Downloading page #1') autogenerated = re.search(r'''(?x) class="[^"]*?(?: From 9ff811c5cddbf3481fdcd44e97cf3683a925b33f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 30 May 2015 23:35:55 +0800 Subject: [PATCH 0831/2721] [porn91] PEP8 --- youtube_dl/extractor/porn91.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index b62eec92d..cdf308f3d 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -5,7 +5,6 @@ import re from ..compat import compat_urllib_parse from .common import InfoExtractor -from ..utils import ExtractorError class Porn91IE(InfoExtractor): @@ -13,13 +12,13 @@ class Porn91IE(InfoExtractor): _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/.+?\?viewkey=(?P<id>[\w\d]+)' _TEST = { - 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134', - 'md5': '6df8f6d028bc8b14f5dbd73af742fb20', - 'info_dict': { - 'id': '7e42283b4f5ab36da134', - 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!', - 'ext': 'mp4' - } + 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134', + 'md5': '6df8f6d028bc8b14f5dbd73af742fb20', + 'info_dict': { + 'id': '7e42283b4f5ab36da134', + 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!', + 'ext': 'mp4' + } } def _real_extract(self, url): From 1c2223875664f99325b73fe7765677db9b87e105 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 May 2015 00:03:19 +0800 Subject: [PATCH 0832/2721] [porn91] Simplify --- youtube_dl/extractor/porn91.py | 38 ++++++++++++++-------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index cdf308f3d..377ca2c77 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -1,8 +1,6 @@ # encoding: utf-8 from __future__ import unicode_literals -import re - from ..compat import compat_urllib_parse from .common import InfoExtractor @@ -22,38 +20,34 @@ class Porn91IE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id self._set_cookie('91porn.com', 'language', 'cn_CN') webpage = self._download_webpage(url, video_id, "get HTML content") title = self._search_regex( - r'<div id="viewvideo-title">(?P<title>.+?)</div>', - webpage, 'title', flags=re.DOTALL) + r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title') title = title.replace('\n', '') # get real url - n1 = self._search_regex( - r'so.addVariable\(\'file\',\'(?P<n1>\d+)\'', webpage, 'n1') - n2 = self._search_regex( - r'so.addVariable\(\'seccode\',\'(?P<n2>.+?)\'', webpage, 'n2') - n3 = self._search_regex( - r'so.addVariable\(\'max_vid\',\'(?P<n3>\d+)\'', webpage, 'n3') + file_id = self._search_regex( + r'so.addVariable\(\'file\',\'(\d+)\'', webpage, 'file id') + sec_code = self._search_regex( + r'so.addVariable\(\'seccode\',\'([^\']+)\'', webpage, 'sec code') + max_vid = self._search_regex( + r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage, 'max vid') url_params = compat_urllib_parse.urlencode({ - 'VID': n1, + 'VID': file_id, 'mp4': '1', - 'seccode': n2, - 'max_vid': n3, + 'seccode': sec_code, + 'max_vid': max_vid, }) - t_url = 'http://91porn.com/getfile.php?' + url_params - info_cn = self._download_webpage(t_url, video_id, "get real video_url") - video_url = self._search_regex(r'file=(?P<url>http.+?)&', info_cn, 'url') + info_cn = self._download_webpage( + 'http://91porn.com/getfile.php?' + url_params, video_id, + "get real video url") + video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url') - # construct info - info = { + return { 'id': video_id, 'title': title, 'url': video_url, } - - return info From a80601f8d9789e27c0a916e63d7192c3f398d5d5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 May 2015 00:20:37 +0800 Subject: [PATCH 0833/2721] [porn91] Extract more info --- youtube_dl/extractor/porn91.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 377ca2c77..ea1efc71b 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -3,6 +3,10 @@ from __future__ import unicode_literals from ..compat import compat_urllib_parse from .common import InfoExtractor +from ..utils import ( + parse_duration, + int_or_none, +) class Porn91IE(InfoExtractor): @@ -15,7 +19,8 @@ class Porn91IE(InfoExtractor): 'info_dict': { 'id': '7e42283b4f5ab36da134', 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!', - 'ext': 'mp4' + 'ext': 'mp4', + 'duration': 431, } } @@ -46,8 +51,16 @@ class Porn91IE(InfoExtractor): "get real video url") video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url') + duration = parse_duration(self._search_regex( + r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False)) + + comment_count = int_or_none(self._search_regex( + r'留言:\s*</span>\s*(\d+)', webpage, 'comment count', fatal=False)) + return { 'id': video_id, 'title': title, 'url': video_url, + 'duration': duration, + 'comment_count': comment_count, } From d05a1dbe7013d6314ec477b50d864726e509a872 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 May 2015 00:26:12 +0800 Subject: [PATCH 0834/2721] [porn91] Catch daily limit error --- youtube_dl/extractor/porn91.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index ea1efc71b..c119c7e94 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( parse_duration, int_or_none, + ExtractorError, ) @@ -29,6 +30,10 @@ class Porn91IE(InfoExtractor): url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id self._set_cookie('91porn.com', 'language', 'cn_CN') webpage = self._download_webpage(url, video_id, "get HTML content") + + if '作为游客,你每天只可观看10个视频' in webpage: + raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True) + title = self._search_regex( r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title') title = title.replace('\n', '') From a2d971309b75c79f3f688a0c381707d828cb1026 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 31 May 2015 00:31:18 +0800 Subject: [PATCH 0835/2721] [porn91] Use single quotes --- youtube_dl/extractor/porn91.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index c119c7e94..72d1b2718 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -29,7 +29,7 @@ class Porn91IE(InfoExtractor): video_id = self._match_id(url) url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id self._set_cookie('91porn.com', 'language', 'cn_CN') - webpage = self._download_webpage(url, video_id, "get HTML content") + webpage = self._download_webpage(url, video_id, 'get HTML content') if '作为游客,你每天只可观看10个视频' in webpage: raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True) @@ -53,7 +53,7 @@ class Porn91IE(InfoExtractor): }) info_cn = self._download_webpage( 'http://91porn.com/getfile.php?' + url_params, video_id, - "get real video url") + 'get real video url') video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url') duration = parse_duration(self._search_regex( From 931bc3c3a719fe33101c05b9fdc4e6ad8eb08bdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 May 2015 22:52:02 +0600 Subject: [PATCH 0836/2721] [YoutubeDL] Do not loose request method information --- youtube_dl/YoutubeDL.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 21d247f23..5fc8754c6 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -49,6 +49,7 @@ from .utils import ( ExtractorError, format_bytes, formatSeconds, + HEADRequest, locked_file, make_HTTPS_handler, MaxDownloadsReached, @@ -1720,7 +1721,8 @@ class YoutubeDL(object): if req_is_string: req = url_escaped else: - req = compat_urllib_request.Request( + req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request + req = req_type( url_escaped, data=req.data, headers=req.headers, origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) From 339516072be6865bf7e9316be81704ae69296c6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 30 May 2015 23:16:14 +0600 Subject: [PATCH 0837/2721] [extractor/generic] Unescape video_id and title extracted from URL --- youtube_dl/extractor/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9a7b0d25d..c9c92d686 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -894,7 +894,7 @@ class GenericIE(InfoExtractor): force_videoid = smuggled_data['force_videoid'] video_id = force_videoid else: - video_id = os.path.splitext(url.rstrip('/').split('/')[-1])[0] + video_id = os.path.splitext(compat_urllib_parse.unquote(url.rstrip('/').split('/')[-1]))[0] self.to_screen('%s: Requesting header' % video_id) @@ -927,7 +927,7 @@ class GenericIE(InfoExtractor): head_response.headers.get('Last-Modified')) return { 'id': video_id, - 'title': os.path.splitext(url_basename(url))[0], + 'title': os.path.splitext(compat_urllib_parse.unquote(url_basename(url)))[0], 'direct': True, 'formats': [{ 'format_id': m.group('format_id'), @@ -953,7 +953,7 @@ class GenericIE(InfoExtractor): head_response.headers.get('Last-Modified')) return { 'id': video_id, - 'title': os.path.splitext(url_basename(url))[0], + 'title': os.path.splitext(compat_urllib_parse.unquote(url_basename(url)))[0], 'direct': True, 'url': url, 'upload_date': upload_date, From 58bde34a236ff98f25fc109a94b3d393f0bbc9ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 May 2015 00:44:54 +0600 Subject: [PATCH 0838/2721] [extractor/generic] Force Accept-Encoding to any for extraction pass --- youtube_dl/extractor/generic.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c9c92d686..ec1d9abbe 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,6 +11,7 @@ from ..compat import ( compat_urllib_parse, compat_urlparse, compat_xml_parse_error, + compat_urllib_request, ) from ..utils import ( determine_ext, @@ -916,7 +917,9 @@ class GenericIE(InfoExtractor): full_response = None if head_response is False: - full_response = self._request_webpage(url, video_id) + request = compat_urllib_request.Request(url) + request.add_header('Accept-Encoding', '*') + full_response = self._request_webpage(request, video_id) head_response = full_response # Check for direct link to a video @@ -941,7 +944,17 @@ class GenericIE(InfoExtractor): self._downloader.report_warning('Falling back on generic information extractor.') if not full_response: - full_response = self._request_webpage(url, video_id) + request = compat_urllib_request.Request(url) + # Some webservers may serve compressed content of rather big size (e.g. gzipped flac) + # making it impossible to download only chunk of the file (yet we need only 512kB to + # test whether it's HTML or not). According to youtube-dl default Accept-Encoding + # that will always result in downloading the whole file that is not desirable. + # Therefore for extraction pass we have to override Accept-Encoding to any in order + # to accept raw bytes and being able to download only a chunk. + # It may probably better to solve this by checking Content-Type for application/octet-stream + # after HEAD request finishes, but not sure if we can rely on this. + request.add_header('Accept-Encoding', '*') + full_response = self._request_webpage(request, video_id) # Maybe it's a direct link to a video? # Be careful not to download the whole thing! From 1ddb9456c4a63a207ec40bd74cdf0b36d8c68409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 May 2015 01:23:58 +0600 Subject: [PATCH 0839/2721] [extractor/generic] Use compat_urllib_parse_unquote for unquoting video_id and title from URL --- youtube_dl/extractor/generic.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ec1d9abbe..d9116ce10 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -9,9 +9,10 @@ from .common import InfoExtractor from .youtube import YoutubeIE from ..compat import ( compat_urllib_parse, + compat_urllib_parse_unquote, + compat_urllib_request, compat_urlparse, compat_xml_parse_error, - compat_urllib_request, ) from ..utils import ( determine_ext, @@ -895,7 +896,7 @@ class GenericIE(InfoExtractor): force_videoid = smuggled_data['force_videoid'] video_id = force_videoid else: - video_id = os.path.splitext(compat_urllib_parse.unquote(url.rstrip('/').split('/')[-1]))[0] + video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0]) self.to_screen('%s: Requesting header' % video_id) @@ -930,7 +931,7 @@ class GenericIE(InfoExtractor): head_response.headers.get('Last-Modified')) return { 'id': video_id, - 'title': os.path.splitext(compat_urllib_parse.unquote(url_basename(url)))[0], + 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), 'direct': True, 'formats': [{ 'format_id': m.group('format_id'), @@ -966,7 +967,7 @@ class GenericIE(InfoExtractor): head_response.headers.get('Last-Modified')) return { 'id': video_id, - 'title': os.path.splitext(compat_urllib_parse.unquote(url_basename(url)))[0], + 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), 'direct': True, 'url': url, 'upload_date': upload_date, From a074e922967fa571d4f1abb1773c711747060f00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 May 2015 02:13:24 +0600 Subject: [PATCH 0840/2721] [extractor/generic] Add test for large compressed media --- youtube_dl/extractor/generic.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d9116ce10..737141f95 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -138,6 +138,20 @@ class GenericIE(InfoExtractor): 'upload_date': '20100513', } }, + # Direct link to a media delivered compressed (requires Accept-Encoding == *) + { + 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac', + 'md5': '128c42e68b13950268b648275386fc74', + 'info_dict': { + 'id': 'FictionJunction-Parallel_Hearts', + 'ext': 'flac', + 'title': 'FictionJunction-Parallel_Hearts', + 'upload_date': '20140522', + }, + 'expected_warnings': [ + 'URL could be a direct video link, returning it as such.' + ] + }, # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', From c5fa81fe81ce05cd81c20ff4ea6dac3dccdcbf9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 May 2015 02:22:29 +0600 Subject: [PATCH 0841/2721] [extractor/generic] Put all direct link tests near to each other for better navigation --- youtube_dl/extractor/generic.py | 182 ++++++++++++++++---------------- 1 file changed, 91 insertions(+), 91 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 737141f95..8f2e53063 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -48,6 +48,97 @@ class GenericIE(InfoExtractor): _VALID_URL = r'.*' IE_NAME = 'generic' _TESTS = [ + # Direct link to a video + { + 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', + 'md5': '67d406c2bcb6af27fa886f31aa934bbe', + 'info_dict': { + 'id': 'trailer', + 'ext': 'mp4', + 'title': 'trailer', + 'upload_date': '20100513', + } + }, + # Direct link to a media delivered compressed (requires Accept-Encoding == *) + { + 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac', + 'md5': '128c42e68b13950268b648275386fc74', + 'info_dict': { + 'id': 'FictionJunction-Parallel_Hearts', + 'ext': 'flac', + 'title': 'FictionJunction-Parallel_Hearts', + 'upload_date': '20140522', + }, + 'expected_warnings': [ + 'URL could be a direct video link, returning it as such.' + ] + }, + # Direct download with broken HEAD + { + 'url': 'http://ai-radio.org:8000/radio.opus', + 'info_dict': { + 'id': 'radio', + 'ext': 'opus', + 'title': 'radio', + }, + 'params': { + 'skip_download': True, # infinite live stream + }, + 'expected_warnings': [ + r'501.*Not Implemented' + ], + }, + # Direct link with incorrect MIME type + { + 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', + 'md5': '4ccbebe5f36706d85221f204d7eb5913', + 'info_dict': { + 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', + 'id': '5_Lennart_Poettering_-_Systemd', + 'ext': 'webm', + 'title': '5_Lennart_Poettering_-_Systemd', + 'upload_date': '20141120', + }, + 'expected_warnings': [ + 'URL could be a direct video link, returning it as such.' + ] + }, + # RSS feed + { + 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml', + 'info_dict': { + 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', + 'title': 'Zero Punctuation', + 'description': 're:.*groundbreaking video review series.*' + }, + 'playlist_mincount': 11, + }, + # RSS feed with enclosure + { + 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', + 'info_dict': { + 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + 'ext': 'm4v', + 'upload_date': '20150228', + 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + } + }, + # google redirect + { + 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', + 'info_dict': { + 'id': 'cmQHVoWB5FY', + 'ext': 'mp4', + 'upload_date': '20130224', + 'uploader_id': 'TheVerge', + 'description': 're:^Chris Ziegler takes a look at the\.*', + 'uploader': 'The Verge', + 'title': 'First Firefox OS phones side-by-side', + }, + 'params': { + 'skip_download': False, + } + }, { 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', 'md5': '85b90ccc9d73b4acd9138d3af4c27f89', @@ -127,31 +218,6 @@ class GenericIE(InfoExtractor): 'skip_download': True, # m3u8 download }, }, - # Direct link to a video - { - 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', - 'md5': '67d406c2bcb6af27fa886f31aa934bbe', - 'info_dict': { - 'id': 'trailer', - 'ext': 'mp4', - 'title': 'trailer', - 'upload_date': '20100513', - } - }, - # Direct link to a media delivered compressed (requires Accept-Encoding == *) - { - 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac', - 'md5': '128c42e68b13950268b648275386fc74', - 'info_dict': { - 'id': 'FictionJunction-Parallel_Hearts', - 'ext': 'flac', - 'title': 'FictionJunction-Parallel_Hearts', - 'upload_date': '20140522', - }, - 'expected_warnings': [ - 'URL could be a direct video link, returning it as such.' - ] - }, # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', @@ -176,22 +242,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Ooyala'], }, - # google redirect - { - 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', - 'info_dict': { - 'id': 'cmQHVoWB5FY', - 'ext': 'mp4', - 'upload_date': '20130224', - 'uploader_id': 'TheVerge', - 'description': 're:^Chris Ziegler takes a look at the\.*', - 'uploader': 'The Verge', - 'title': 'First Firefox OS phones side-by-side', - }, - 'params': { - 'skip_download': False, - } - }, # embed.ly video { 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', @@ -423,16 +473,6 @@ class GenericIE(InfoExtractor): 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com', } }, - # RSS feed - { - 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml', - 'info_dict': { - 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', - 'title': 'Zero Punctuation', - 'description': 're:.*groundbreaking video review series.*' - }, - 'playlist_mincount': 11, - }, # Multiple brightcove videos # https://github.com/rg3/youtube-dl/issues/2283 { @@ -486,21 +526,6 @@ class GenericIE(InfoExtractor): 'uploader': 'thoughtworks.wistia.com', }, }, - # Direct download with broken HEAD - { - 'url': 'http://ai-radio.org:8000/radio.opus', - 'info_dict': { - 'id': 'radio', - 'ext': 'opus', - 'title': 'radio', - }, - 'params': { - 'skip_download': True, # infinite live stream - }, - 'expected_warnings': [ - r'501.*Not Implemented' - ], - }, # Soundcloud embed { 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/', @@ -532,21 +557,6 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 2, }, - # Direct link with incorrect MIME type - { - 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', - 'md5': '4ccbebe5f36706d85221f204d7eb5913', - 'info_dict': { - 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', - 'id': '5_Lennart_Poettering_-_Systemd', - 'ext': 'webm', - 'title': '5_Lennart_Poettering_-_Systemd', - 'upload_date': '20141120', - }, - 'expected_warnings': [ - 'URL could be a direct video link, returning it as such.' - ] - }, # Cinchcast embed { 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/', @@ -705,16 +715,6 @@ class GenericIE(InfoExtractor): 'age_limit': 0, }, }, - # RSS feed with enclosure - { - 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', - 'info_dict': { - 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', - 'ext': 'm4v', - 'upload_date': '20150228', - 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', - } - }, # Crooks and Liars embed { 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists', From c5138a7ce49db19b64adc11d81384595b966a7a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 May 2015 02:36:20 +0600 Subject: [PATCH 0842/2721] [extractor/generic] Clarify test comment --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8f2e53063..96ca398de 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -59,7 +59,7 @@ class GenericIE(InfoExtractor): 'upload_date': '20100513', } }, - # Direct link to a media delivered compressed (requires Accept-Encoding == *) + # Direct link to media delivered compressed (until Accept-Encoding is *) { 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac', 'md5': '128c42e68b13950268b648275386fc74', From d2a9de78dfc629aaaaf8a2a30432d5f02c949e9a Mon Sep 17 00:00:00 2001 From: Ivan Kozik <ivan@ludios.org> Date: Sat, 30 May 2015 20:50:22 +0000 Subject: [PATCH 0843/2721] [youtube] Construct a playlist URL in case the page is missing one This fixes jumping from user/channel -> playlist for some users like https://www.youtube.com/user/BitcoinFoundation This also removes the superfluous log message "add --no-playlist to just download video VIDEOID" when downloading a user/channel. --- youtube_dl/extractor/youtube.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index fcdbfe0bc..aacb999ce 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1412,12 +1412,8 @@ class YoutubeChannelIE(InfoExtractor): channel_page, 'channel id', default=None) if channel_playlist_id and channel_playlist_id.startswith('UC'): playlist_id = 'UU' + channel_playlist_id[2:] - channel_playlist = unescapeHTML(self._search_regex( - r'href="/?(watch\?v=[0-9A-Za-z_-]{11}&list=%s)"' % playlist_id, - channel_page, 'channel playlist URL', default=None)) - if channel_playlist: - return self.url_result( - compat_urlparse.urljoin(url, '/%s' % channel_playlist), 'YoutubePlaylist') + return self.url_result( + compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist') channel_page = self._download_webpage(url, channel_id, 'Downloading page #1') autogenerated = re.search(r'''(?x) From eb47569f8a6017190d73429b3ef54c1ffaf201dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 May 2015 03:00:13 +0600 Subject: [PATCH 0844/2721] [tvigle] Add support for m3u8 --- youtube_dl/extractor/tvigle.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index 102362b29..4e95bd30f 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -6,6 +6,7 @@ import re from .common import InfoExtractor from ..utils import ( float_or_none, + int_or_none, parse_age_limit, ) @@ -24,17 +25,17 @@ class TvigleIE(InfoExtractor): 'display_id': 'sokrat', 'ext': 'flv', 'title': 'Сократ', - 'description': 'md5:a05bd01be310074d5833efc6743be95e', + 'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17', 'duration': 6586, - 'age_limit': 0, + 'age_limit': 12, }, }, { 'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/', - 'md5': 'd9012d7c7c598fe7a11d7fb46dc1f574', + 'md5': 'e7efe5350dd5011d0de6550b53c3ba7b', 'info_dict': { 'id': '5142516', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Ведущий телепрограммы «60 минут» (США) о Владимире Высоцком', 'description': 'md5:027f7dc872948f14c96d19b4178428a4', 'duration': 186.080, @@ -54,7 +55,7 @@ class TvigleIE(InfoExtractor): if not video_id: webpage = self._download_webpage(url, display_id) video_id = self._html_search_regex( - r'<li class="video-preview current_playing" id="(\d+)">', + r'class="video-preview current_playing" id="(\d+)">', webpage, 'video id') video_data = self._download_json( @@ -70,13 +71,19 @@ class TvigleIE(InfoExtractor): formats = [] for vcodec, fmts in item['videos'].items(): - for quality, video_url in fmts.items(): + for format_id, video_url in fmts.items(): + if format_id == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id=vcodec)) + continue + height = self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None) formats.append({ 'url': video_url, - 'format_id': '%s-%s' % (vcodec, quality), + 'format_id': '%s-%s' % (vcodec, format_id), 'vcodec': vcodec, - 'height': int(quality[:-1]), - 'filesize': item['video_files_size'][vcodec][quality], + 'height': int_or_none(height), + 'filesize': item['video_files_size'][vcodec][format_id], }) self._sort_formats(formats) From 7584e38ce4e98e0e9abca146a513d215701308e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 May 2015 03:01:41 +0600 Subject: [PATCH 0845/2721] [tvigle] Modernize --- youtube_dl/extractor/tvigle.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index 4e95bd30f..a85693888 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -64,8 +64,8 @@ class TvigleIE(InfoExtractor): item = video_data['playlist']['items'][0] title = item['title'] - description = item['description'] - thumbnail = item['thumbnail'] + description = item.get('description') + thumbnail = item.get('thumbnail') duration = float_or_none(item.get('durationMilliseconds'), 1000) age_limit = parse_age_limit(item.get('ageRestrictions')) @@ -83,7 +83,7 @@ class TvigleIE(InfoExtractor): 'format_id': '%s-%s' % (vcodec, format_id), 'vcodec': vcodec, 'height': int_or_none(height), - 'filesize': item['video_files_size'][vcodec][format_id], + 'filesize': int_or_none(item.get('video_files_size', {}).get(vcodec, {}).get(format_id)), }) self._sort_formats(formats) From df15ef8dab6df79eb076b3d06b3948917763ac3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 May 2015 04:05:09 +0600 Subject: [PATCH 0846/2721] [YoutubeDL] Tweak select_format for video only media --- youtube_dl/YoutubeDL.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5fc8754c6..aa6ec9d9a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -924,8 +924,9 @@ class YoutubeDL(object): if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] if audiovideo_formats: return audiovideo_formats[format_idx] - # for audio only urls, select the best/worst audio format - elif all(f.get('acodec') != 'none' for f in available_formats): + # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format + elif (all(f.get('acodec') != 'none' for f in available_formats) or + all(f.get('vcodec') != 'none' for f in available_formats)): return available_formats[format_idx] elif format_spec == 'bestaudio': audio_formats = [ From 96b9690985e9b9f4e50fde10bbc92e1a72df64e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 31 May 2015 04:05:26 +0600 Subject: [PATCH 0847/2721] [imgur] Improve extraction --- youtube_dl/extractor/imgur.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index fe5d95e2c..d692ea79a 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( int_or_none, js_to_json, @@ -12,7 +13,7 @@ from ..utils import ( class ImgurIE(InfoExtractor): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'https://i.imgur.com/A61SaA1.gifv', @@ -34,7 +35,8 @@ class ImgurIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + compat_urlparse.urljoin(url, video_id), video_id) width = int_or_none(self._search_regex( r'<param name="width" value="([0-9]+)"', From 47fd8c2f761c2073744cb041f9eccb7ed10f2470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 1 Jun 2015 00:04:36 +0600 Subject: [PATCH 0848/2721] [patreon] Fix embeds extraction (Closes #5862) --- youtube_dl/extractor/patreon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py index f179ea200..6cdc2638b 100644 --- a/youtube_dl/extractor/patreon.py +++ b/youtube_dl/extractor/patreon.py @@ -87,7 +87,7 @@ class PatreonIE(InfoExtractor): r'<div class="attach"><a target="_blank" href="([^"]+)">', webpage, 'attachment URL', default=None) embed = self._html_search_regex( - r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"', + r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"', webpage, 'embedded URL', default=None) if attach_fn is not None: From 4053ee9104cd7669f749a267dc2c2a1725ca188b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 1 Jun 2015 14:43:20 +0800 Subject: [PATCH 0849/2721] Credit @PeterDing for 91porn extractor (#5830) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 3410e1fb9..bf2a25cb8 100644 --- a/AUTHORS +++ b/AUTHORS @@ -126,3 +126,4 @@ Matthias Küch Julian Richen Ping O. Mister Hat +Peter Ding From 866b296d0f156831cceccc967c34382a90b77422 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Mon, 1 Jun 2015 16:11:19 +0300 Subject: [PATCH 0850/2721] [aftonbladet] Fix extraction and update _VALID_URL (Fixes #5863) --- youtube_dl/extractor/aftonbladet.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py index a117502bc..e0518cf26 100644 --- a/youtube_dl/extractor/aftonbladet.py +++ b/youtube_dl/extractor/aftonbladet.py @@ -6,11 +6,11 @@ from ..utils import int_or_none class AftonbladetIE(InfoExtractor): - _VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])' + _VALID_URL = r'http://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)' _TEST = { - 'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab', + 'url': 'http://tv.aftonbladet.se/abtv/articles/36015', 'info_dict': { - 'id': 'article36015', + 'id': '36015', 'ext': 'mp4', 'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna', 'description': 'Jupiters måne mest aktiv av alla himlakroppar', @@ -25,8 +25,9 @@ class AftonbladetIE(InfoExtractor): # find internal video meta data meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json' - internal_meta_id = self._html_search_regex( - r'data-aptomaId="([\w\d]+)"', webpage, 'internal_meta_id') + player_config = self._parse_json(self._html_search_regex( + r'data-player-config="([^"]+)"', webpage, 'player config'), video_id) + internal_meta_id = player_config['videoId'] internal_meta_url = meta_url % internal_meta_id internal_meta_json = self._download_json( internal_meta_url, video_id, 'Downloading video meta data') From 923e79e2e4d9cc0c24496614aab520737cdc89ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 2 Jun 2015 00:53:04 +0600 Subject: [PATCH 0851/2721] [nova] Add extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nova.py | 135 +++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 youtube_dl/extractor/nova.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4dc07efe0..67eb96057 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -352,6 +352,7 @@ from .ninegag import NineGagIE from .noco import NocoIE from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE +from .nova import NovaIE from .novamov import NovaMovIE from .nowness import NownessIE from .nowtv import NowTVIE diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py new file mode 100644 index 000000000..e93a7ffa8 --- /dev/null +++ b/youtube_dl/extractor/nova.py @@ -0,0 +1,135 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class NovaIE(InfoExtractor): + IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz' + _VALID_URL = 'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+)(?:\.html|/?)' + _TESTS = [{ + 'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html', + 'info_dict': { + 'id': '1608920', + 'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou', + 'ext': 'flv', + 'title': 'Duel: Michal Hrdlička a Petr Suchoň', + 'description': 'md5:d0cc509858eee1b1374111c588c6f5d5', + 'thumbnail': 're:^https?://.*\.(?:jpg)', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html', + 'md5': '1dd7b9d5ea27bc361f110cd855a19bd3', + 'info_dict': { + 'id': '1757139', + 'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci', + 'ext': 'mp4', + 'title': 'Podzemní nemocnice v pražské Krči', + 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53', + 'thumbnail': 're:^https?://.*\.(?:jpg)', + } + }, { + 'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove/', + 'info_dict': { + 'id': '1756825', + 'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove', + 'ext': 'flv', + 'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově', + 'description': 'md5:d804ba6b30bc7da2705b1fea961bddfe', + 'thumbnail': 're:^https?://.*\.(?:jpg)', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html', + 'only_matching': True, + }, { + 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html', + 'only_matching': True, + }, { + 'url': 'http://doma.nova.cz/clanek/zdravi/prijdte-se-zapsat-do-registru-kostni-drene-jiz-ve-stredu-3-cervna.html', + 'only_matching': True, + }, { + 'url': 'http://prask.nova.cz/clanek/novinky/co-si-na-sobe-nase-hvezdy-nechaly-pojistit.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + site = mobj.group('site') + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + [r"(?:media|video_id)\s*:\s*'(\d+)'", + r'media=(\d+)', + r'id="article_video_(\d+)"', + r'id="player_(\d+)"'], + webpage, 'video id') + + config_url = self._search_regex( + r'src="(http://tn\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"', + webpage, 'config url', default=None) + + if not config_url: + DEFAULT_SITE_ID = '23000' + SITES = { + 'tvnoviny': DEFAULT_SITE_ID, + 'novaplus': DEFAULT_SITE_ID, + 'vymena': DEFAULT_SITE_ID, + 'krasna': DEFAULT_SITE_ID, + 'fanda': '30', + 'tn': '30', + 'doma': '30', + } + + site_id = self._search_regex( + r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(site, DEFAULT_SITE_ID) + + config_url = ('http://tn.nova.cz/bin/player/videojs/config.php?site=%s&media=%s&jsVar=vjsconfig' + % (site_id, video_id)) + + config = self._download_json( + config_url, display_id, + 'Downloading config JSON', + transform_source=lambda s: re.sub(r'var\s+[\da-zA-Z_]+\s*=\s*({.+?});', r'\1', s)) + + mediafile = config['mediafile'] + video_url = mediafile['src'] + + m = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+?))/&*(?P<playpath>.+)$', video_url) + if m: + formats = [{ + 'url': m.group('url'), + 'app': m.group('app'), + 'play_path': m.group('playpath'), + 'player_path': 'http://tvnoviny.nova.cz/static/shared/app/videojs/video-js.swf', + 'ext': 'flv', + }] + else: + formats = [{ + 'url': video_url, + }] + self._sort_formats(formats) + + title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = config.get('poster') + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats, + } From 60158217ef8da5f44ef316e50c8a5e2ac2e202c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 2 Jun 2015 00:57:08 +0600 Subject: [PATCH 0852/2721] [nova] Add tv test --- youtube_dl/extractor/nova.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index e93a7ffa8..4e999b237 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -60,6 +60,9 @@ class NovaIE(InfoExtractor): }, { 'url': 'http://prask.nova.cz/clanek/novinky/co-si-na-sobe-nase-hvezdy-nechaly-pojistit.html', 'only_matching': True, + }, { + 'url': 'http://tv.nova.cz/clanek/novinky/zivot-je-zivot-bondovsky-trailer.html', + 'only_matching': True, }] def _real_extract(self, url): From 9f4b9118ccaef5cd7c414a78c5622968e8c3343f Mon Sep 17 00:00:00 2001 From: Slava Shklyaev <shk.slava@gmail.com> Date: Tue, 2 Jun 2015 12:47:52 +0300 Subject: [PATCH 0853/2721] [nova] Fix display_id extraction bug Make id group non-greedy so that .html is not included in it. --- youtube_dl/extractor/nova.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 4e999b237..1dd18511e 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -8,7 +8,7 @@ from .common import InfoExtractor class NovaIE(InfoExtractor): IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz' - _VALID_URL = 'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+)(?:\.html|/?)' + _VALID_URL = 'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/?)$' _TESTS = [{ 'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html', 'info_dict': { From 9464a194dbf48989c486fa2de9e1aebc59e28ed4 Mon Sep 17 00:00:00 2001 From: Slava Shklyaev <shk.slava@gmail.com> Date: Tue, 2 Jun 2015 12:52:39 +0300 Subject: [PATCH 0854/2721] [nova] Fix extension extraction bug Replace the hardcoded flv with determine_ext. Let rtmpdump parse the url. --- youtube_dl/extractor/nova.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 1dd18511e..fd5f9cb0e 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import determine_ext class NovaIE(InfoExtractor): @@ -39,7 +40,7 @@ class NovaIE(InfoExtractor): 'info_dict': { 'id': '1756825', 'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově', 'description': 'md5:d804ba6b30bc7da2705b1fea961bddfe', 'thumbnail': 're:^https?://.*\.(?:jpg)', @@ -108,21 +109,8 @@ class NovaIE(InfoExtractor): mediafile = config['mediafile'] video_url = mediafile['src'] - - m = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+?))/&*(?P<playpath>.+)$', video_url) - if m: - formats = [{ - 'url': m.group('url'), - 'app': m.group('app'), - 'play_path': m.group('playpath'), - 'player_path': 'http://tvnoviny.nova.cz/static/shared/app/videojs/video-js.swf', - 'ext': 'flv', - }] - else: - formats = [{ - 'url': video_url, - }] - self._sort_formats(formats) + ext = determine_ext(video_url) + video_url = video_url.replace('&{}:'.format(ext), '') title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage) description = self._og_search_description(webpage) @@ -134,5 +122,6 @@ class NovaIE(InfoExtractor): 'title': title, 'description': description, 'thumbnail': thumbnail, - 'formats': formats, + 'url': video_url, + 'ext': ext, } From fcb04bcaca1b83cd3f13f494d7d775e35e0b6182 Mon Sep 17 00:00:00 2001 From: Slava Shklyaev <shk.slava@gmail.com> Date: Tue, 2 Jun 2015 12:55:41 +0300 Subject: [PATCH 0855/2721] [nova] Extract upload_date in some cases --- youtube_dl/extractor/nova.py | 39 ++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index fd5f9cb0e..30c64aaf8 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -49,6 +49,33 @@ class NovaIE(InfoExtractor): # rtmp download 'skip_download': True, } + }, { + 'url': 'http://novaplus.nova.cz/porad/televizni-noviny/video/5585-televizni-noviny-30-5-2015/', + 'info_dict': { + 'id': '1756858', + 'ext': 'mp4', + 'title': 'Televizní noviny - 30. 5. 2015', + 'thumbnail': 're:^https?://.*\.(?:jpg)', + 'upload_date': '20150530', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html', + 'info_dict': { + 'id': '1753621', + 'ext': 'mp4', + 'title': 'Zaklínač 3: Divoký hon', + 'description': 're:.*Pokud se stejně jako my nemůžete.*', + 'thumbnail': 're:https?://.*\.jpg(\?.*)?', + 'upload_date': '20150521', + }, + 'params': { + # rtmp download + 'skip_download': True, + } }, { 'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html', 'only_matching': True, @@ -116,11 +143,23 @@ class NovaIE(InfoExtractor): description = self._og_search_description(webpage) thumbnail = config.get('poster') + mobj = None + if site == 'novaplus': + mobj = re.search(r'(?P<day>\d{1,2})-(?P<month>\d{1,2})-(?P<year>\d{4})$', display_id) + if site == 'fanda': + mobj = re.search( + r'<span class="date_time">(?P<day>\d{1,2})\.(?P<month>\d{1,2})\.(?P<year>\d{4})\b', webpage) + if mobj: + upload_date = '{}{:02d}{:02d}'.format(mobj.group('year'), int(mobj.group('month')), int(mobj.group('day'))) + else: + upload_date = None + return { 'id': video_id, 'display_id': display_id, 'title': title, 'description': description, + 'upload_date': upload_date, 'thumbnail': thumbnail, 'url': video_url, 'ext': ext, From 34c0f95db273ac5e7a7f8a6d23a3f90ceadf4695 Mon Sep 17 00:00:00 2001 From: Slava Shklyaev <shk.slava@gmail.com> Date: Tue, 2 Jun 2015 12:56:36 +0300 Subject: [PATCH 0856/2721] [nova] Remove html tags from description --- youtube_dl/extractor/nova.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 30c64aaf8..140312f9c 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import determine_ext +from ..utils import clean_html, determine_ext class NovaIE(InfoExtractor): @@ -42,7 +42,7 @@ class NovaIE(InfoExtractor): 'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove', 'ext': 'mp4', 'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově', - 'description': 'md5:d804ba6b30bc7da2705b1fea961bddfe', + 'description': 'md5:dc24e50be5908df83348e50d1431295e', 'thumbnail': 're:^https?://.*\.(?:jpg)', }, 'params': { @@ -140,7 +140,7 @@ class NovaIE(InfoExtractor): video_url = video_url.replace('&{}:'.format(ext), '') title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage) - description = self._og_search_description(webpage) + description = clean_html(self._og_search_description(webpage, default=None)) thumbnail = config.get('poster') mobj = None From a00234f1c517d077a237da576be638fef980d79e Mon Sep 17 00:00:00 2001 From: Slava Shklyaev <shk.slava@gmail.com> Date: Tue, 2 Jun 2015 12:57:03 +0300 Subject: [PATCH 0857/2721] [nova] Minor style improvement --- youtube_dl/extractor/nova.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 140312f9c..10957e5fa 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -132,7 +132,7 @@ class NovaIE(InfoExtractor): config = self._download_json( config_url, display_id, 'Downloading config JSON', - transform_source=lambda s: re.sub(r'var\s+[\da-zA-Z_]+\s*=\s*({.+?});', r'\1', s)) + transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) mediafile = config['mediafile'] video_url = mediafile['src'] From bc03e58565d99677a643e0a058d25c7ee9b265d6 Mon Sep 17 00:00:00 2001 From: Slava Shklyaev <shk.slava@gmail.com> Date: Tue, 2 Jun 2015 13:16:58 +0300 Subject: [PATCH 0858/2721] [iprima] Update --- youtube_dl/extractor/iprima.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 8529bedfc..f3536893a 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -15,7 +15,7 @@ from ..utils import ( class IPrimaIE(InfoExtractor): - _VALID_URL = r'https?://play\.iprima\.cz/[^?#]+/(?P<id>[^?#]+)' + _VALID_URL = r'https?://play\.iprima\.cz/([^/]+/)*(?P<id>[^?#]+)' _TESTS = [{ 'url': 'http://play.iprima.cz/particka/particka-92', @@ -23,7 +23,7 @@ class IPrimaIE(InfoExtractor): 'id': '39152', 'ext': 'flv', 'title': 'Partička (92)', - 'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6', + 'description': 'md5:db00b9bc10ffd26fb148fa6a3a67c40b', 'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg', }, 'params': { @@ -35,13 +35,16 @@ class IPrimaIE(InfoExtractor): 'id': '9718337', 'ext': 'flv', 'title': 'Tchibo Partička - Jarní móda', - 'description': 'md5:589f8f59f414220621ff8882eb3ce7be', + 'description': 'md5:db00b9bc10ffd26fb148fa6a3a67c40b', 'thumbnail': 're:^http:.*\.jpg$', }, 'params': { 'skip_download': True, # requires rtmpdump }, 'skip': 'Do not have permission to access this page', + }, { + 'url': 'http://play.iprima.cz/zpravy-ftv-prima-2752015', + 'only_matching': True, }] def _real_extract(self, url): @@ -102,7 +105,7 @@ class IPrimaIE(InfoExtractor): return { 'id': real_id, - 'title': self._og_search_title(webpage), + 'title': self._og_search_title(webpage).replace(' | Prima PLAY', ''), 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'description': self._og_search_description(webpage), From b5597738d4de35fd6f2be7bf1cb6a32c754d873f Mon Sep 17 00:00:00 2001 From: Slava Shklyaev <shk.slava@gmail.com> Date: Tue, 2 Jun 2015 17:28:14 +0300 Subject: [PATCH 0859/2721] [iprima] Comply with review --- youtube_dl/extractor/iprima.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index f3536893a..502507551 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -11,11 +11,12 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + remove_end, ) class IPrimaIE(InfoExtractor): - _VALID_URL = r'https?://play\.iprima\.cz/([^/]+/)*(?P<id>[^?#]+)' + _VALID_URL = r'https?://play\.iprima\.cz/(?:[^/]+/)*(?P<id>[^?#]+)' _TESTS = [{ 'url': 'http://play.iprima.cz/particka/particka-92', @@ -105,7 +106,7 @@ class IPrimaIE(InfoExtractor): return { 'id': real_id, - 'title': self._og_search_title(webpage).replace(' | Prima PLAY', ''), + 'title': remove_end(self._og_search_title(webpage), ' | Prima PLAY'), 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'description': self._og_search_description(webpage), From d23da75b32e02963f988bad962b3f5259e4a6d31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 2 Jun 2015 21:10:18 +0600 Subject: [PATCH 0860/2721] [iprima] Fix description extraction `og:description` does not contain actual description anymore. --- youtube_dl/extractor/iprima.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 502507551..821c8ec10 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -24,7 +24,7 @@ class IPrimaIE(InfoExtractor): 'id': '39152', 'ext': 'flv', 'title': 'Partička (92)', - 'description': 'md5:db00b9bc10ffd26fb148fa6a3a67c40b', + 'description': 'md5:74e9617e51bca67c3ecfb2c6f9766f45', 'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg', }, 'params': { @@ -36,13 +36,11 @@ class IPrimaIE(InfoExtractor): 'id': '9718337', 'ext': 'flv', 'title': 'Tchibo Partička - Jarní móda', - 'description': 'md5:db00b9bc10ffd26fb148fa6a3a67c40b', 'thumbnail': 're:^http:.*\.jpg$', }, 'params': { 'skip_download': True, # requires rtmpdump }, - 'skip': 'Do not have permission to access this page', }, { 'url': 'http://play.iprima.cz/zpravy-ftv-prima-2752015', 'only_matching': True, @@ -109,5 +107,7 @@ class IPrimaIE(InfoExtractor): 'title': remove_end(self._og_search_title(webpage), ' | Prima PLAY'), 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, - 'description': self._og_search_description(webpage), + 'description': self._search_regex( + r'<p[^>]+itemprop="description"[^>]*>([^<]+)', + webpage, 'description', default=None), } From 4b5fe1349f5568f3b9b939520db0a1ddc598b4b3 Mon Sep 17 00:00:00 2001 From: Slava Shklyaev <shk.slava@gmail.com> Date: Tue, 2 Jun 2015 18:15:05 +0300 Subject: [PATCH 0861/2721] [nova] Comply with review --- youtube_dl/extractor/nova.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 10957e5fa..4a2d76506 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -4,7 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import clean_html, determine_ext +from ..utils import ( + clean_html, + determine_ext, + unified_strdate, +) class NovaIE(InfoExtractor): @@ -143,14 +147,12 @@ class NovaIE(InfoExtractor): description = clean_html(self._og_search_description(webpage, default=None)) thumbnail = config.get('poster') - mobj = None if site == 'novaplus': - mobj = re.search(r'(?P<day>\d{1,2})-(?P<month>\d{1,2})-(?P<year>\d{4})$', display_id) - if site == 'fanda': - mobj = re.search( - r'<span class="date_time">(?P<day>\d{1,2})\.(?P<month>\d{1,2})\.(?P<year>\d{4})\b', webpage) - if mobj: - upload_date = '{}{:02d}{:02d}'.format(mobj.group('year'), int(mobj.group('month')), int(mobj.group('day'))) + upload_date = unified_strdate(self._search_regex( + r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None)) + elif site == 'fanda': + upload_date = unified_strdate(self._search_regex( + r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None)) else: upload_date = None From 08b7968e2873b45dafe465ec04541db8fcd4967d Mon Sep 17 00:00:00 2001 From: Slava Shklyaev <shk.slava@gmail.com> Date: Tue, 2 Jun 2015 17:49:15 +0300 Subject: [PATCH 0862/2721] [nova] Fix display_id extraction bug --- youtube_dl/extractor/nova.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 4a2d76506..8360a65d9 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -13,9 +13,9 @@ from ..utils import ( class NovaIE(InfoExtractor): IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz' - _VALID_URL = 'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/?)$' + _VALID_URL = 'http://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)' _TESTS = [{ - 'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html', + 'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus', 'info_dict': { 'id': '1608920', 'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou', @@ -29,7 +29,7 @@ class NovaIE(InfoExtractor): 'skip_download': True, } }, { - 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html', + 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260', 'md5': '1dd7b9d5ea27bc361f110cd855a19bd3', 'info_dict': { 'id': '1757139', @@ -40,7 +40,7 @@ class NovaIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.(?:jpg)', } }, { - 'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove/', + 'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove', 'info_dict': { 'id': '1756825', 'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove', From b0cda32f726443d464a68a34b22a2e02ef8b29b0 Mon Sep 17 00:00:00 2001 From: Slava Shklyaev <shk.slava@gmail.com> Date: Tue, 2 Jun 2015 18:17:33 +0300 Subject: [PATCH 0863/2721] [nova] Fix Python 2.6 compatability issue --- youtube_dl/extractor/nova.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 8360a65d9..7e3498eea 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -141,7 +141,7 @@ class NovaIE(InfoExtractor): mediafile = config['mediafile'] video_url = mediafile['src'] ext = determine_ext(video_url) - video_url = video_url.replace('&{}:'.format(ext), '') + video_url = video_url.replace('&%s:' % ext, '') title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage) description = clean_html(self._og_search_description(webpage, default=None)) From fa971259e69a8031c384754b6238cfff71bea773 Mon Sep 17 00:00:00 2001 From: Slava Shklyaev <shk.slava@gmail.com> Date: Tue, 2 Jun 2015 19:09:47 +0300 Subject: [PATCH 0864/2721] [nova] Add a comment about html in description --- youtube_dl/extractor/nova.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 7e3498eea..85253b6ed 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -46,7 +46,7 @@ class NovaIE(InfoExtractor): 'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove', 'ext': 'mp4', 'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově', - 'description': 'md5:dc24e50be5908df83348e50d1431295e', + 'description': 'md5:dc24e50be5908df83348e50d1431295e', # Make sure this description is clean of html tags 'thumbnail': 're:^https?://.*\.(?:jpg)', }, 'params': { From 23dd1fc74c36329ed40855301ac499a0ad2a0009 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 3 Jun 2015 10:21:03 +0800 Subject: [PATCH 0865/2721] [vidme] Always use the non-embedded page For example, https://vid.me/Wmur contains more information than https://vid.me/e/Wmur --- youtube_dl/extractor/vidme.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index bd953fb4c..e0b55078b 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -10,7 +10,7 @@ from ..utils import ( class VidmeIE(InfoExtractor): _VALID_URL = r'https?://vid\.me/(?:e/)?(?P<id>[\da-zA-Z]+)' - _TEST = { + _TESTS = [{ 'url': 'https://vid.me/QNB', 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82', 'info_dict': { @@ -23,9 +23,14 @@ class VidmeIE(InfoExtractor): 'upload_date': '20140725', 'thumbnail': 're:^https?://.*\.jpg', }, - } + }, { + # From http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching + 'url': 'https://vid.me/e/Wmur', + 'only_matching': True, + }] def _real_extract(self, url): + url = url.replace('vid.me/e/', 'vid.me/') video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) From 8f9478412424b87e4fb77be53d239c13932b078a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 3 Jun 2015 10:26:39 +0800 Subject: [PATCH 0866/2721] [tumblr] Detect vid.me embeds (fixes #5883) --- youtube_dl/extractor/tumblr.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 828c808a6..e6218808f 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -28,6 +28,17 @@ class TumblrIE(InfoExtractor): 'description': 'md5:dba62ac8639482759c8eb10ce474586a', 'thumbnail': 're:http://.*\.jpg', } + }, { + 'url': 'http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching', + 'md5': 'de07e5211d60d4f3a2c3df757ea9f6ab', + 'info_dict': { + 'id': 'Wmur', + 'ext': 'mp4', + 'title': 'naked smoking & stretching', + 'upload_date': '20150506', + 'timestamp': 1430931613, + }, + 'add_ie': ['Vidme'], }] def _real_extract(self, url): @@ -38,6 +49,12 @@ class TumblrIE(InfoExtractor): url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) webpage = self._download_webpage(url, video_id) + vid_me_embed_url = self._search_regex( + r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', + webpage, 'vid.me embed', default=None) + if vid_me_embed_url is not None: + return self.url_result(vid_me_embed_url, 'Vidme') + iframe_url = self._search_regex( r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'', webpage, 'iframe url') From 687cb3ad35ac49f1053c1ea52e3b6db18b3aa1cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 3 Jun 2015 20:47:11 +0600 Subject: [PATCH 0867/2721] [24video] Fix uploader extraction --- youtube_dl/extractor/twentyfourvideo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index 67e8bfea0..c1ee1decc 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -15,7 +15,7 @@ class TwentyFourVideoIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.24video.net/video/view/1044982', - 'md5': '48dd7646775690a80447a8dca6a2df76', + 'md5': 'd041af8b5b4246ea466226a0d6693345', 'info_dict': { 'id': '1044982', 'ext': 'mp4', @@ -54,7 +54,7 @@ class TwentyFourVideoIE(InfoExtractor): webpage, 'upload date')) uploader = self._html_search_regex( - r'Загрузил\s*<a href="/jsecUser/movies/[^"]+" class="link">([^<]+)</a>', + r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>', webpage, 'uploader', fatal=False) view_count = int_or_none(self._html_search_regex( From 15b74b94beb8720a4f6d7ee076c123dd8ae05309 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 3 Jun 2015 20:52:47 +0600 Subject: [PATCH 0868/2721] [tvigle] Capture error message --- youtube_dl/extractor/tvigle.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index a85693888..aa07c8251 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, float_or_none, int_or_none, parse_age_limit, @@ -63,6 +64,13 @@ class TvigleIE(InfoExtractor): item = video_data['playlist']['items'][0] + videos = item.get('videos') + + error_message = item.get('errorMessage') + if not videos and error_message: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) + title = item['title'] description = item.get('description') thumbnail = item.get('thumbnail') From 3153a2c98d7201b1ae8104c346db58e19f322cc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 3 Jun 2015 20:53:54 +0600 Subject: [PATCH 0869/2721] [tvigle] Skip tests --- youtube_dl/extractor/tvigle.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py index aa07c8251..dc3a8334a 100644 --- a/youtube_dl/extractor/tvigle.py +++ b/youtube_dl/extractor/tvigle.py @@ -30,6 +30,7 @@ class TvigleIE(InfoExtractor): 'duration': 6586, 'age_limit': 12, }, + 'skip': 'georestricted', }, { 'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/', @@ -42,6 +43,7 @@ class TvigleIE(InfoExtractor): 'duration': 186.080, 'age_limit': 0, }, + 'skip': 'georestricted', }, { 'url': 'https://cloud.tvigle.ru/video/5267604/', 'only_matching': True, From 6800d3372f35e08dcc4d34d06601815bf0cb0a3d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 3 Jun 2015 23:10:18 +0800 Subject: [PATCH 0870/2721] [YoutubeDL] Support DASH manifest downloading --- youtube_dl/downloader/dash.py | 50 +++++++++++++++++++++++++++++++++ youtube_dl/downloader/http.py | 4 +++ youtube_dl/extractor/youtube.py | 6 ++++ 3 files changed, 60 insertions(+) create mode 100644 youtube_dl/downloader/dash.py diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py new file mode 100644 index 000000000..18eca2c04 --- /dev/null +++ b/youtube_dl/downloader/dash.py @@ -0,0 +1,50 @@ +from __future__ import unicode_literals +from .common import FileDownloader +from ..compat import compat_urllib_request + +import re + + +class DashSegmentsFD(FileDownloader): + """ + Download segments in a DASH manifest + """ + def real_download(self, filename, info_dict): + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + base_url = info_dict['url'] + segment_urls = info_dict['segment_urls'] + + self.byte_counter = 0 + + def append_url_to_file(outf, target_url, target_name): + self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name)) + req = compat_urllib_request.Request(target_url) + data = self.ydl.urlopen(req).read() + outf.write(data) + self.byte_counter += len(data) + + def combine_url(base_url, target_url): + if re.match(r'^https?://', target_url): + return target_url + return '%s/%s' % (base_url, target_url) + + with open(tmpfilename, 'wb') as outf: + append_url_to_file( + outf, combine_url(base_url, info_dict['initialization_url']), + 'initialization segment') + for i, segment_url in enumerate(segment_urls): + append_url_to_file( + outf, combine_url(base_url, segment_url), + 'segment %d / %d' % (i + 1, len(segment_urls))) + + self.try_rename(tmpfilename, filename) + + self._hook_progress({ + 'downloaded_bytes': self.byte_counter, + 'total_bytes': self.byte_counter, + 'filename': filename, + 'status': 'finished', + }) + + return True diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index b7f144af9..ceacb8522 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -6,6 +6,7 @@ import socket import time from .common import FileDownloader +from .dash import DashSegmentsFD from ..compat import ( compat_urllib_request, compat_urllib_error, @@ -19,6 +20,9 @@ from ..utils import ( class HttpFD(FileDownloader): def real_download(self, filename, info_dict): + if info_dict.get('initialization_url') and list(filter(None, info_dict.get('segment_urls', []))): + return DashSegmentsFD(self.ydl, self.params).real_download(filename, info_dict) + url = info_dict['url'] tmpfilename = self.temp_name(filename) stream = None diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index aacb999ce..5d1297e0d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -802,6 +802,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # TODO implement WebVTT downloading pass elif mime_type.startswith('audio/') or mime_type.startswith('video/'): + segment_list = r.find('{urn:mpeg:DASH:schema:MPD:2011}SegmentList') format_id = r.attrib['id'] video_url = url_el.text filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) @@ -815,6 +816,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'filesize': filesize, 'fps': int_or_none(r.attrib.get('frameRate')), } + if segment_list: + f.update({ + 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], + 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')] + }) try: existing_format = next( fo for fo in formats From e4ac7bb1e598b0317742737ea06c162fa7f22cd4 Mon Sep 17 00:00:00 2001 From: Slava Shklyaev <shk.slava@gmail.com> Date: Wed, 3 Jun 2015 19:18:41 +0300 Subject: [PATCH 0871/2721] [nova] Revert "Fix extension extraction bug" This reverts commit 9464a194dbf48989c486fa2de9e1aebc59e28ed4. --- youtube_dl/extractor/nova.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 85253b6ed..3f9c776ef 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -6,7 +6,6 @@ import re from .common import InfoExtractor from ..utils import ( clean_html, - determine_ext, unified_strdate, ) @@ -44,7 +43,7 @@ class NovaIE(InfoExtractor): 'info_dict': { 'id': '1756825', 'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově', 'description': 'md5:dc24e50be5908df83348e50d1431295e', # Make sure this description is clean of html tags 'thumbnail': 're:^https?://.*\.(?:jpg)', @@ -57,7 +56,7 @@ class NovaIE(InfoExtractor): 'url': 'http://novaplus.nova.cz/porad/televizni-noviny/video/5585-televizni-noviny-30-5-2015/', 'info_dict': { 'id': '1756858', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Televizní noviny - 30. 5. 2015', 'thumbnail': 're:^https?://.*\.(?:jpg)', 'upload_date': '20150530', @@ -140,8 +139,21 @@ class NovaIE(InfoExtractor): mediafile = config['mediafile'] video_url = mediafile['src'] - ext = determine_ext(video_url) - video_url = video_url.replace('&%s:' % ext, '') + + m = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+?))/&*(?P<playpath>.+)$', video_url) + if m: + formats = [{ + 'url': m.group('url'), + 'app': m.group('app'), + 'play_path': m.group('playpath'), + 'player_path': 'http://tvnoviny.nova.cz/static/shared/app/videojs/video-js.swf', + 'ext': 'flv', + }] + else: + formats = [{ + 'url': video_url, + }] + self._sort_formats(formats) title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage) description = clean_html(self._og_search_description(webpage, default=None)) @@ -163,6 +175,5 @@ class NovaIE(InfoExtractor): 'description': description, 'upload_date': upload_date, 'thumbnail': thumbnail, - 'url': video_url, - 'ext': ext, + 'formats': formats, } From 4c8fea92f350d5a3f33d505980ac750b05a9cd34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 3 Jun 2015 23:50:38 +0200 Subject: [PATCH 0872/2721] [test/aes] Fix on python 3.3 and higher Since 878563c847fa5248eedbd44187536dec04643eaf the aes functions only accepts the base64 data as a unicode string. --- test/test_aes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_aes.py b/test/test_aes.py index 4dc7de7b5..315a3f5ae 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -39,7 +39,7 @@ class TestAES(unittest.TestCase): encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' - ) + ).decode('utf-8') decrypted = (aes_decrypt_text(encrypted, password, 16)) self.assertEqual(decrypted, self.secret_msg) @@ -47,7 +47,7 @@ class TestAES(unittest.TestCase): encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' - ) + ).decode('utf-8') decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) From eedda32e6bc4620d704eabab7a2c8f4b1f1a9169 Mon Sep 17 00:00:00 2001 From: ping <lipng.ong@gmail.com> Date: Thu, 4 Jun 2015 11:27:18 +0800 Subject: [PATCH 0873/2721] [qqmusic] Fix toplist --- youtube_dl/extractor/qqmusic.py | 58 +++++++++++++-------------------- 1 file changed, 23 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index b540033e2..48f28ffe9 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -181,60 +181,48 @@ class QQMusicToplistIE(QQPlaylistBaseIE): _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)' _TESTS = [{ - 'url': 'http://y.qq.com/#type=toplist&p=global_12', + 'url': 'http://y.qq.com/#type=toplist&p=global_123', 'info_dict': { - 'id': 'global_12', - 'title': 'itunes榜', + 'id': 'global_123', + 'title': '美国iTunes榜', }, 'playlist_count': 10, }, { - 'url': 'http://y.qq.com/#type=toplist&p=top_6', + 'url': 'http://y.qq.com/#type=toplist&p=top_3', 'info_dict': { - 'id': 'top_6', + 'id': 'top_3', 'title': 'QQ音乐巅峰榜·欧美', + 'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成,集结当下最流行的欧美新歌!:更新时间:每周四22点|统' + '计周期:一周(上周四至本周三)|统计对象:三个月内发行的欧美歌曲|统计数量:100首|统计算法:根据' + '歌曲在一周内的有效播放次数,由高到低取前100名(同一歌手最多允许5首歌曲同时上榜)|有效播放次数:' + '登录用户完整播放一首歌曲,记为一次有效播放;同一用户收听同一首歌曲,每天记录为1次有效播放' }, 'playlist_count': 100, }, { - 'url': 'http://y.qq.com/#type=toplist&p=global_5', + 'url': 'http://y.qq.com/#type=toplist&p=global_106', 'info_dict': { - 'id': 'global_5', - 'title': '韩国mnet排行榜', + 'id': 'global_106', + 'title': '韩国Mnet榜', }, 'playlist_count': 50, }] - @staticmethod - def strip_qq_jsonp(code): - return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code)) - def _real_extract(self, url): list_id = self._match_id(url) list_type, num_id = list_id.split("_") - list_page = self._download_webpage( - "http://y.qq.com/y/static/toplist/index/%s.html" % list_id, + toplist_json = self._download_json( + 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg?type=%s&topid=%s&format=json' + % (list_type, num_id), list_id, 'Download toplist page') - entries = [] - if list_type == 'top': - jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id - else: - jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id + entries = [ + self.url_result( + 'http://y.qq.com/#type=song&mid=' + song['data']['songmid'], 'QQMusic', song['data']['songmid'] + ) for song in toplist_json['songlist'] + ] - toplist_json = self._download_json( - jsonp_url, list_id, note='Retrieve toplist json', - errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp) - - for song in toplist_json['l']: - s = song['s'] - song_mid = s.split("|")[20] - entries.append(self.url_result( - 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', - song_mid)) - - list_name = self._html_search_regex( - r'<h2 id="top_name">([^\']+)</h2>', list_page, 'top list name', - default=None) - - return self.playlist_result(entries, list_id, list_name) + list_name = toplist_json['topinfo']['ListName'] + list_description = toplist_json['topinfo']['info'] + return self.playlist_result(entries, list_id, list_name, list_description) From ed15e9ba02382fc7db22e6176068d2220c00a32e Mon Sep 17 00:00:00 2001 From: ping <lipng.ong@gmail.com> Date: Thu, 4 Jun 2015 17:32:06 +0800 Subject: [PATCH 0874/2721] [qqmusic] Remove unused import --- youtube_dl/extractor/qqmusic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 48f28ffe9..9943fcddb 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -9,7 +9,6 @@ from .common import InfoExtractor from ..utils import ( strip_jsonp, unescapeHTML, - js_to_json, ) from ..compat import compat_urllib_request From 55e5841f14131ab61359535fdcc44e1564d555b8 Mon Sep 17 00:00:00 2001 From: ping <lipng.ong@gmail.com> Date: Thu, 4 Jun 2015 17:41:29 +0800 Subject: [PATCH 0875/2721] [qqmusic] Extract additional formats (mp3-128, mp3-320) --- youtube_dl/extractor/qqmusic.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index b540033e2..4b4ef4993 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -19,10 +19,10 @@ class QQMusicIE(InfoExtractor): _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', - 'md5': 'bed90b6db2a7a7a7e11bc585f471f63a', + 'md5': '9ce1c1c8445f561506d2e3cfb0255705', 'info_dict': { 'id': '004295Et37taLD', - 'ext': 'm4a', + 'ext': 'mp3', 'title': '可惜没如果', 'upload_date': '20141227', 'creator': '林俊杰', @@ -30,6 +30,12 @@ class QQMusicIE(InfoExtractor): } }] + _FORMATS = { + 'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40}, + 'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30}, + 'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10} + } + # Reference: m_r_GetRUin() in top_player.js # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js @staticmethod @@ -69,11 +75,19 @@ class QQMusicIE(InfoExtractor): 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid, mid, note='Retrieve vkey', errnote='Unable to get vkey', transform_source=strip_jsonp)['key'] - song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid) + + formats = [] + for k, sf in self._FORMATS.items(): + formats.append({ + 'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0' + % (sf['prefix'], mid, sf['ext'], vkey, guid), + 'format': k, 'format_id': k, 'preference': sf['preference'] + }) + self._sort_formats(formats) return { 'id': mid, - 'url': song_url, + 'formats': formats, 'title': song_name, 'upload_date': publish_time, 'creator': singer, From b9258c61789388b49792ebdceb5d804217a36da5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 4 Jun 2015 22:05:33 +0800 Subject: [PATCH 0876/2721] [YoutubeDL] Change how DashSegmentsFD is selected --- youtube_dl/downloader/__init__.py | 2 ++ youtube_dl/downloader/http.py | 4 ---- youtube_dl/extractor/youtube.py | 3 ++- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index f110830c4..1b618ab54 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -8,6 +8,7 @@ from .hls import NativeHlsFD from .http import HttpFD from .rtsp import RtspFD from .rtmp import RtmpFD +from .dash import DashSegmentsFD from ..utils import ( determine_protocol, @@ -20,6 +21,7 @@ PROTOCOL_MAP = { 'mms': RtspFD, 'rtsp': RtspFD, 'f4m': F4mFD, + 'dash_segments': DashSegmentsFD, } diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index ceacb8522..b7f144af9 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -6,7 +6,6 @@ import socket import time from .common import FileDownloader -from .dash import DashSegmentsFD from ..compat import ( compat_urllib_request, compat_urllib_error, @@ -20,9 +19,6 @@ from ..utils import ( class HttpFD(FileDownloader): def real_download(self, filename, info_dict): - if info_dict.get('initialization_url') and list(filter(None, info_dict.get('segment_urls', []))): - return DashSegmentsFD(self.ydl, self.params).real_download(filename, info_dict) - url = info_dict['url'] tmpfilename = self.temp_name(filename) stream = None diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5d1297e0d..692d4d8db 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -819,7 +819,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if segment_list: f.update({ 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], - 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')] + 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], + 'protocol': 'dash_segments', }) try: existing_format = next( From 453a1617aac6e8000ed947cad7d88817c5740ede Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 4 Jun 2015 22:12:05 +0800 Subject: [PATCH 0877/2721] [downloader/dash] Reorder imports --- youtube_dl/downloader/dash.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 18eca2c04..5f14658ba 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -1,9 +1,10 @@ from __future__ import unicode_literals -from .common import FileDownloader -from ..compat import compat_urllib_request import re +from .common import FileDownloader +from ..compat import compat_urllib_request + class DashSegmentsFD(FileDownloader): """ From 423d2be5f8c5e70d202ddfa63f3e5365e6afe823 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 4 Jun 2015 22:27:29 +0800 Subject: [PATCH 0878/2721] [downloader/dash] Rename the protocol 'http_dash_segments' looks more like a protocol name than 'dash_segments' --- youtube_dl/downloader/__init__.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 1b618ab54..dccc59212 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -21,7 +21,7 @@ PROTOCOL_MAP = { 'mms': RtspFD, 'rtsp': RtspFD, 'f4m': F4mFD, - 'dash_segments': DashSegmentsFD, + 'http_dash_segments': DashSegmentsFD, } diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 692d4d8db..6d288e848 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -820,7 +820,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): f.update({ 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], - 'protocol': 'dash_segments', + 'protocol': 'http_dash_segments', }) try: existing_format = next( From 56c837ccb75b639d362397095f33300229c4bd1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 4 Jun 2015 20:34:48 +0600 Subject: [PATCH 0879/2721] [tnaflix] Fix typo --- youtube_dl/extractor/tnaflix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 59af9aba0..bc51bae37 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -33,7 +33,7 @@ class TNAFlixIE(InfoExtractor): }, { 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632', - 'matching_only': True, + 'only_matching': True, } ] From e52c0bd0eb853d4d242872e1d9ff5426a35dd30c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 4 Jun 2015 20:37:05 +0600 Subject: [PATCH 0880/2721] [tnaflix] Modernize --- youtube_dl/extractor/tnaflix.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index bc51bae37..3e335d653 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -51,9 +51,8 @@ class TNAFlixIE(InfoExtractor): age_limit = self._rta_search(webpage) - duration = self._html_search_meta('duration', webpage, 'duration', default=None) - if duration: - duration = parse_duration(duration[1:]) + duration = parse_duration(self._html_search_meta( + 'duration', webpage, 'duration', default=None)) cfg_url = self._proto_relative_url(self._html_search_regex( self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:') From 3ce9bc712acd88df8499dd0982277c8f64b0d15a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 4 Jun 2015 20:39:03 +0600 Subject: [PATCH 0881/2721] [empflix] Fix typo --- youtube_dl/extractor/empflix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py index 9a5a8f4bb..4827022e0 100644 --- a/youtube_dl/extractor/empflix.py +++ b/youtube_dl/extractor/empflix.py @@ -26,6 +26,6 @@ class EMPFlixIE(TNAFlixIE): }, { 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html', - 'matching_only': True, + 'only_matching': True, } ] From 3d6388e34ea41d937f39e561b7731f1389971a66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 4 Jun 2015 20:42:37 +0600 Subject: [PATCH 0882/2721] [tnaflix] Fix relative URLs (empflix) --- youtube_dl/extractor/tnaflix.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index 3e335d653..c282865b2 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -61,14 +61,15 @@ class TNAFlixIE(InfoExtractor): cfg_url, display_id, note='Downloading metadata', transform_source=fix_xml_ampersands) - thumbnail = cfg_xml.find('./startThumb').text + thumbnail = self._proto_relative_url( + cfg_xml.find('./startThumb').text, 'http:') formats = [] for item in cfg_xml.findall('./quality/item'): video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text) format_id = item.find('res').text fmt = { - 'url': video_url, + 'url': self._proto_relative_url(video_url, 'http:'), 'format_id': format_id, } m = re.search(r'^(\d+)', format_id) From 9d4f213f90d6024c7748f4defdc7b45f2351b0da Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 5 Jun 2015 00:52:18 +0800 Subject: [PATCH 0883/2721] [qqmusic:toplist] List name and description are optional --- youtube_dl/extractor/qqmusic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 9943fcddb..f773332a8 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -222,6 +222,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): ) for song in toplist_json['songlist'] ] - list_name = toplist_json['topinfo']['ListName'] - list_description = toplist_json['topinfo']['info'] + topinfo = toplist_json.get('topinfo', {}) + list_name = topinfo.get('ListName') + list_description = topinfo.get('info') return self.playlist_result(entries, list_id, list_name, list_description) From f5c78d118ba2d7e5e4a1ccd40c97fc1bf85a8dcf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 4 Jun 2015 21:49:02 +0200 Subject: [PATCH 0884/2721] release 2015.06.04 --- docs/supportedsites.md | 5 +++++ youtube_dl/version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a421ae62b..d147b53fe 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -10,6 +10,7 @@ - **56.com** - **5min** - **8tracks** + - **91porn** - **9gag** - **abc.net.au** - **Abc7News** @@ -319,6 +320,7 @@ - **Noco** - **Normalboots** - **NosVideo** + - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **novamov**: NovaMov - **Nowness** - **NowTV** @@ -431,6 +433,8 @@ - **smotri:user**: Smotri.com user videos - **Snotr** - **Sohu** + - **soompi** + - **soompi:show** - **soundcloud** - **soundcloud:playlist** - **soundcloud:set** @@ -505,6 +509,7 @@ - **Trilulilu** - **TruTube** - **Tube8** + - **TubiTv** - **Tudou** - **Tumblr** - **TuneIn** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 653710131..84224b7a7 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.05.29' +__version__ = '2015.06.04' From 0e805e782bd05951ca3f420cf2a050e2ac3ae846 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 4 Jun 2015 21:54:33 +0200 Subject: [PATCH 0885/2721] release 2015.06.04.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 84224b7a7..9cf84ff71 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.06.04' +__version__ = '2015.06.04.1' From 8b8cde21406b53f5aeb6586dab03a9d78d62e631 Mon Sep 17 00:00:00 2001 From: ping <lipng.ong@gmail.com> Date: Fri, 5 Jun 2015 06:04:26 +0800 Subject: [PATCH 0886/2721] [qqmusic] Set abr for mp3 formats --- youtube_dl/extractor/qqmusic.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 4b4ef4993..dc300e189 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -31,8 +31,8 @@ class QQMusicIE(InfoExtractor): }] _FORMATS = { - 'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40}, - 'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30}, + 'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320}, + 'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128}, 'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10} } @@ -77,11 +77,12 @@ class QQMusicIE(InfoExtractor): transform_source=strip_jsonp)['key'] formats = [] - for k, sf in self._FORMATS.items(): + for k, f in self._FORMATS.items(): formats.append({ 'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0' - % (sf['prefix'], mid, sf['ext'], vkey, guid), - 'format': k, 'format_id': k, 'preference': sf['preference'] + % (f['prefix'], mid, f['ext'], vkey, guid), + 'format': k, 'format_id': k, 'preference': f['preference'], + 'abr': f.get('abr') }) self._sort_formats(formats) From d31573fa37c8db7133492baf0a6be3ece643f8ff Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 5 Jun 2015 22:55:29 +0800 Subject: [PATCH 0887/2721] [teamcoco] Handle incomplete m3u8 URLs (fixes #5798) There are 2 TODOs. I don't know how to handle these cases correctly. --- youtube_dl/extractor/teamcoco.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index b2a4b1fc0..d1b7264b4 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -51,6 +51,17 @@ class TeamcocoIE(InfoExtractor): 'params': { 'skip_download': True, # m3u8 downloads } + }, { + 'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9', + 'info_dict': { + 'id': '89341', + 'ext': 'mp4', + 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett', + 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett', + }, + 'params': { + 'skip_download': True, # m3u8 downloads + } } ] _VIDEO_ID_REGEXES = ( @@ -110,9 +121,23 @@ class TeamcocoIE(InfoExtractor): get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) for filed in data['files']: if determine_ext(filed['url']) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - filed['url'], video_id, ext='mp4')) + # compat_urllib_parse.urljoin does not work here + if filed['url'].startswith('/'): + m3u8_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed['url'] + else: + m3u8_url = filed['url'] + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4') + for m3u8_format in m3u8_formats: + if m3u8_format not in formats: + formats.append(m3u8_format) + elif determine_ext(filed['url']) == 'f4m': + # TODO Correct f4m extraction + continue else: + if filed['url'].startswith('/mp4:protected/'): + # TODO Correct extraction for these files + continue m_format = re.search(r'(\d+(k|p))\.mp4', filed['url']) if m_format is not None: format_id = m_format.group(1) From f00a650705a5e5b4f2b540ea8133a1752e63dd81 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 5 Jun 2015 23:16:34 +0800 Subject: [PATCH 0888/2721] [qqmusic] Rearrange codes --- youtube_dl/extractor/qqmusic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index e24ddaefe..c903bee58 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -80,7 +80,9 @@ class QQMusicIE(InfoExtractor): formats.append({ 'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0' % (f['prefix'], mid, f['ext'], vkey, guid), - 'format': k, 'format_id': k, 'preference': f['preference'], + 'format': k, + 'format_id': k, + 'preference': f['preference'], 'abr': f.get('abr') }) self._sort_formats(formats) From e8ac61e840b5c02e406b910f5f0eed3d8b331969 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 5 Jun 2015 23:19:25 +0800 Subject: [PATCH 0889/2721] [qqmusic] Use meaningful variable names --- youtube_dl/extractor/qqmusic.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index c903bee58..bafa81c21 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -76,14 +76,14 @@ class QQMusicIE(InfoExtractor): transform_source=strip_jsonp)['key'] formats = [] - for k, f in self._FORMATS.items(): + for format_id, details in self._FORMATS.items(): formats.append({ 'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0' - % (f['prefix'], mid, f['ext'], vkey, guid), - 'format': k, - 'format_id': k, - 'preference': f['preference'], - 'abr': f.get('abr') + % (details['prefix'], mid, details['ext'], vkey, guid), + 'format': format_id, + 'format_id': format_id, + 'preference': details['preference'], + 'abr': details.get('abr'), }) self._sort_formats(formats) From dfe7dd9bdb45ec765c9b335c149e9913cf7e413f Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sat, 6 Jun 2015 02:54:57 +0300 Subject: [PATCH 0890/2721] [izlesene] Unquote video URLs and simplify --- youtube_dl/extractor/izlesene.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py index 99a1361f8..753cb98ea 100644 --- a/youtube_dl/extractor/izlesene.py +++ b/youtube_dl/extractor/izlesene.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote from ..utils import ( determine_ext, float_or_none, @@ -67,9 +68,9 @@ class IzleseneIE(InfoExtractor): uploader = self._html_search_regex( r"adduserUsername\s*=\s*'([^']+)';", - webpage, 'uploader', fatal=False, default='') + webpage, 'uploader', fatal=False) timestamp = parse_iso8601(self._html_search_meta( - 'uploadDate', webpage, 'upload date', fatal=False)) + 'uploadDate', webpage, 'upload date')) duration = float_or_none(self._html_search_regex( r'"videoduration"\s*:\s*"([^"]+)"', @@ -86,8 +87,7 @@ class IzleseneIE(InfoExtractor): # Might be empty for some videos. streams = self._html_search_regex( - r'"qualitylevel"\s*:\s*"([^"]+)"', - webpage, 'streams', fatal=False, default='') + r'"qualitylevel"\s*:\s*"([^"]+)"', webpage, 'streams', default='') formats = [] if streams: @@ -95,15 +95,15 @@ class IzleseneIE(InfoExtractor): quality, url = re.search(r'\[(\w+)\](.+)', stream).groups() formats.append({ 'format_id': '%sp' % quality if quality else 'sd', - 'url': url, + 'url': compat_urllib_parse_unquote(url), 'ext': ext, }) else: stream_url = self._search_regex( - r'"streamurl"\s?:\s?"([^"]+)"', webpage, 'stream URL') + r'"streamurl"\s*:\s*"([^"]+)"', webpage, 'stream URL') formats.append({ 'format_id': 'sd', - 'url': stream_url, + 'url': compat_urllib_parse_unquote(stream_url), 'ext': ext, }) From c33c547d66b20064f83932cdaa04823b17a96b70 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sat, 6 Jun 2015 02:57:12 +0300 Subject: [PATCH 0891/2721] [izlesene] Avoid timestamp differences in tests due to DST --- youtube_dl/extractor/izlesene.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dl/extractor/izlesene.py index 753cb98ea..bc226fa67 100644 --- a/youtube_dl/extractor/izlesene.py +++ b/youtube_dl/extractor/izlesene.py @@ -31,7 +31,7 @@ class IzleseneIE(InfoExtractor): 'description': 'md5:253753e2655dde93f59f74b572454f6d', 'thumbnail': 're:^http://.*\.jpg', 'uploader_id': 'pelikzzle', - 'timestamp': 1404302298, + 'timestamp': int, 'upload_date': '20140702', 'duration': 95.395, 'age_limit': 0, @@ -47,7 +47,7 @@ class IzleseneIE(InfoExtractor): 'description': 'Tarkan Dortmund 2006 Konseri', 'thumbnail': 're:^http://.*\.jpg', 'uploader_id': 'parlayankiz', - 'timestamp': 1163322193, + 'timestamp': int, 'upload_date': '20061112', 'duration': 253.666, 'age_limit': 0, From 54eb81a087516e9d040bc1ad274c0a64b51dd1d1 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sat, 6 Jun 2015 03:11:43 +0300 Subject: [PATCH 0892/2721] [pornovoisines] Improve average_rating extraction and update test case --- youtube_dl/extractor/pornovoisines.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py index 9688ed948..eba4dfbb3 100644 --- a/youtube_dl/extractor/pornovoisines.py +++ b/youtube_dl/extractor/pornovoisines.py @@ -34,7 +34,7 @@ class PornoVoisinesIE(InfoExtractor): 'duration': 120, 'view_count': int, 'average_rating': float, - 'categories': ['Débutante', 'Scénario', 'Sodomie'], + 'categories': ['Débutantes', 'Scénario', 'Sodomie'], 'age_limit': 18, } } @@ -71,7 +71,7 @@ class PornoVoisinesIE(InfoExtractor): view_count = int_or_none(self._search_regex( r'(\d+) vues', webpage, 'view count', fatal=False)) average_rating = self._search_regex( - r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False) + r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False) if average_rating: average_rating = float_or_none(average_rating.replace(',', '.')) From 3d8e9573a470594df6fa471dc33c4c4b938b668a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 6 Jun 2015 06:25:37 +0600 Subject: [PATCH 0893/2721] [youtube:channel] Improve channel id extraction (#5904) --- youtube_dl/extractor/youtube.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index aacb999ce..419f7b019 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1406,10 +1406,12 @@ class YoutubeChannelIE(InfoExtractor): channel_page = self._download_webpage( url + '?view=57', channel_id, 'Downloading channel page', fatal=False) - channel_playlist_id = self._search_regex( - [r'<meta itemprop="channelId" content="([^"]+)">', - r'data-channel-external-id="([^"]+)"'], - channel_page, 'channel id', default=None) + channel_playlist_id = self._html_search_meta( + 'channelId', channel_page, 'channel id', default=None) + if not channel_playlist_id: + channel_playlist_id = self._search_regex( + r'data-channel-external-id="([^"]+)"', + channel_page, 'channel id', default=None) if channel_playlist_id and channel_playlist_id.startswith('UC'): playlist_id = 'UU' + channel_playlist_id[2:] return self.url_result( From 223544552fcfec0c5c6a83326520c614e4489cbb Mon Sep 17 00:00:00 2001 From: Hannu Lintala <hannu.lintala@gmail.com> Date: Sat, 9 May 2015 03:53:43 +0300 Subject: [PATCH 0894/2721] [Ruutu] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ruutu.py | 88 ++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 youtube_dl/extractor/ruutu.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8ec0c1032..860023d14 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -448,6 +448,7 @@ from .rutube import ( RutubePersonIE, ) from .rutv import RUTVIE +from .ruutu import RuutuIE from .sandia import SandiaIE from .safari import ( SafariIE, diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py new file mode 100644 index 000000000..e346434f9 --- /dev/null +++ b/youtube_dl/extractor/ruutu.py @@ -0,0 +1,88 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse +import re + + +class RuutuIE(InfoExtractor): + _VALID_URL = r'http://(www\.)?ruutu\.fi/ohjelmat/(?:[^/]+/)?(?P<id>.*)$' + _TESTS = [ + { + 'url': 'http://www.ruutu.fi/ohjelmat/oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi', + 'md5': 'ab2093f39be1ca8581963451b3c0234f', + 'info_dict': { + 'id': 'oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi', + 'ext': 'mp4', + 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!', + 'description': 'Toinen toistaan huikeampia ohjelmaideoita ja täysin päätöntä sekoilua? No sitä juuri nimenomaan. Metro Helsingin Iltapäivän vieraaksi saapui Tuomas Kauhanen ja he Petra Kalliomaan kanssa keskustelivat hieman ennen lähetyksen alkua, mutta kamerat olivatkin jo päällä.', + }, + 'params': { + 'format': 'http-1000', + } + }, + { + 'url': 'http://www.ruutu.fi/ohjelmat/superpesis/superpesis-katso-koko-kausi-ruudussa', + 'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9', + 'info_dict': { + 'id': 'superpesis-katso-koko-kausi-ruudussa', + 'ext': 'mp4', + 'title': 'Superpesis: katso koko kausi Ruudussa', + 'description': 'Huippujännittävän Superpesiksen suoria ottelulähetyksiä seurataan Ruudussa kauden alusta viimeiseen finaaliin asti. Katso lisätiedot osoitteesta ruutu.fi/superpesis.', + }, + 'params': { + 'format': 'http-1000', + } + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + media_id = self._html_search_regex(r'data-media-id="(\d+)"', webpage, 'media_id') + media_json = self._parse_json(self._search_regex(r'jQuery.extend\([^,]+, (.*)\);', webpage, 'media_data'), video_id) + xml_url = media_json['ruutuplayer']['xmlUrl'].replace('{ID}', media_id) + media_xml = self._download_xml(xml_url, media_id) + + formats = [] + parsed_urls = [] + for fmt in media_xml.findall('.//Clip//'): + url = fmt.text + if not fmt.tag.endswith('File') or url in parsed_urls or \ + 'NOT_USED' in url: + continue + + if url.endswith('m3u8'): + formats.extend(self._extract_m3u8_formats(url, media_id, m3u8_id='hls')) + parsed_urls.append(url) + elif url.endswith('f4m'): + formats.extend(self._extract_f4m_formats(url, media_id, f4m_id='hds')) + parsed_urls.append(url) + else: + proto = compat_urllib_parse_urlparse(url).scheme + width_str, height_str = fmt.get('resolution').split('x') + tbr = int(fmt.get('bitrate', 0)) + formats.append({ + 'format_id': '%s-%d' % (proto, tbr), + 'url': url, + 'width': int(width_str), + 'height': int(height_str), + 'tbr': tbr, + 'ext': url.rsplit('.', 1)[-1], + 'live': True, + 'protocol': proto, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'formats': formats, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'duration': int(media_xml.find('.//Runtime').text), + 'age_limit': int(media_xml.find('.//AgeLimit').text), + } From a9e58ecd3fdfa93cdc8a7f9fc852dbbd0814d6a4 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sat, 6 Jun 2015 13:56:46 +0300 Subject: [PATCH 0895/2721] [turbo] Improve description extraction `og:description` is empty for some videos. --- youtube_dl/extractor/turbo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/turbo.py b/youtube_dl/extractor/turbo.py index 29703a8a9..7ae63a499 100644 --- a/youtube_dl/extractor/turbo.py +++ b/youtube_dl/extractor/turbo.py @@ -23,7 +23,7 @@ class TurboIE(InfoExtractor): 'ext': 'mp4', 'duration': 3715, 'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ', - 'description': 'Retrouvez dans cette rubrique toutes les vidéos de l\'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ', + 'description': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...', 'thumbnail': 're:^https?://.*\.jpg$', } } @@ -42,7 +42,7 @@ class TurboIE(InfoExtractor): title = xpath_text(item, './title', 'title') duration = int_or_none(xpath_text(item, './durate', 'duration')) thumbnail = xpath_text(item, './visuel_clip', 'thumbnail') - description = self._og_search_description(webpage) + description = self._html_search_meta('description', webpage) formats = [] get_quality = qualities(['3g', 'sd', 'hq']) From 05aa9c82d90644af406519e5e25fefb0884d504e Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis <njonaitis@gmail.com> Date: Sat, 6 Jun 2015 13:58:20 +0300 Subject: [PATCH 0896/2721] [sunporno] Fix view_count extraction --- youtube_dl/extractor/sunporno.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py index 854d01bee..e527aa971 100644 --- a/youtube_dl/extractor/sunporno.py +++ b/youtube_dl/extractor/sunporno.py @@ -44,7 +44,7 @@ class SunPornoIE(InfoExtractor): webpage, 'duration', fatal=False)) view_count = int_or_none(self._html_search_regex( - r'class="views">\s*(\d+)\s*<', + r'class="views">(?:<noscript>)?\s*(\d+)\s*<', webpage, 'view count', fatal=False)) comment_count = int_or_none(self._html_search_regex( r'(\d+)</b> Comments?', From 4da31bd56629054497634d041035e4bd6fcfacbb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 6 Jun 2015 22:22:26 +0800 Subject: [PATCH 0897/2721] [youtube] Fix a FutureWarning from xml.etree.ElementTree --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6d288e848..2424ac2c0 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -816,7 +816,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'filesize': filesize, 'fps': int_or_none(r.attrib.get('frameRate')), } - if segment_list: + if len(segment_list): f.update({ 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], From f1da861018924e6f442ffedd9a5682055c79aea6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 00:37:29 +0800 Subject: [PATCH 0898/2721] [iqiyi] PEP8 --- youtube_dl/extractor/iqiyi.py | 56 +++++++++++++++++------------------ 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 747f3f902..597441baf 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -16,19 +16,20 @@ import random import zlib import hashlib + class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html' _TEST = { - 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', - 'md5': '2cb594dc2781e6c941a110d8f358118b', - 'info_dict': { - 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', - 'title': '美国德州空中惊现奇异云团 酷似UFO', - 'ext': 'f4v', - } + 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', + 'md5': '2cb594dc2781e6c941a110d8f358118b', + 'info_dict': { + 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', + 'title': '美国德州空中惊现奇异云团 酷似UFO', + 'ext': 'f4v', + } } def construct_video_urls(self, data, video_id, _uuid, bid): @@ -46,7 +47,7 @@ class IqiyiIE(InfoExtractor): c = len(b) s = '' for i in range(c - 1, -1, -1): - a = do_xor(int(b[c-i-1], 16), i) + a = do_xor(int(b[c - i - 1], 16), i) s += chr(a) return s[::-1] @@ -54,15 +55,14 @@ class IqiyiIE(InfoExtractor): mg = ')(*&^flash@#$%a' tm = self._download_json( 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id)['t'] - t = str(int(math.floor(int(tm)/(600.0)))) - return hashlib.md5( - (t+mg+x).encode('utf8')).hexdigest() + t = str(int(math.floor(int(tm) / (600.0)))) + return hashlib.md5((t + mg + x).encode('utf8')).hexdigest() # get accept format # getting all format will spend minutes for a big video. if bid == 'best': - bids = [int(i['bid']) for i in data['vp']['tkl'][0]['vs'] \ - if 0 < int(i['bid']) <= 10] + bids = [int(i['bid']) for i in data['vp']['tkl'][0]['vs'] + if 0 < int(i['bid']) <= 10] bid = str(max(bids)) video_urls_dict = {} @@ -117,24 +117,24 @@ class IqiyiIE(InfoExtractor): def get_format(self, bid): _dict = { - '1' : 'h6', - '2' : 'h5', - '3' : 'h4', - '4' : 'h3', - '5' : 'h2', - '10' : 'h1' + '1': 'h6', + '2': 'h5', + '3': 'h4', + '4': 'h3', + '5': 'h2', + '10': 'h1' } return _dict.get(str(bid), None) def get_bid(self, format_id): _dict = { - 'h6' : '1', - 'h5' : '2', - 'h4' : '3', - 'h3' : '4', - 'h2' : '5', - 'h1' : '10', - 'best' : 'best' + 'h6': '1', + 'h5': '2', + 'h4': '3', + 'h3': '4', + 'h2': '5', + 'h1': '10', + 'best': 'best' } return _dict.get(format_id, None) @@ -207,7 +207,7 @@ class IqiyiIE(InfoExtractor): for format_id in video_urls_dict: video_urls = video_urls_dict[format_id] for i, video_url_info in enumerate(video_urls): - if len(entries) < i+1: + if len(entries) < i + 1: entries.append({'formats': []}) entries[i]['formats'].append( { @@ -222,7 +222,7 @@ class IqiyiIE(InfoExtractor): self._sort_formats(entries[i]['formats']) entries[i].update( { - 'id': '_part%d' % (i+1), + 'id': '_part%d' % (i + 1), 'title': title, } ) From 7012620e2b9355d25ddfc855fc5990af938f04d8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 00:44:54 +0800 Subject: [PATCH 0899/2721] [iqiyi] Remove format selection codes --- youtube_dl/extractor/iqiyi.py | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 597441baf..5645fb6ee 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -32,7 +32,7 @@ class IqiyiIE(InfoExtractor): } } - def construct_video_urls(self, data, video_id, _uuid, bid): + def construct_video_urls(self, data, video_id, _uuid): def do_xor(x, y): a = y % 3 if a == 1: @@ -58,13 +58,6 @@ class IqiyiIE(InfoExtractor): t = str(int(math.floor(int(tm) / (600.0)))) return hashlib.md5((t + mg + x).encode('utf8')).hexdigest() - # get accept format - # getting all format will spend minutes for a big video. - if bid == 'best': - bids = [int(i['bid']) for i in data['vp']['tkl'][0]['vs'] - if 0 < int(i['bid']) <= 10] - bid = str(max(bids)) - video_urls_dict = {} for i in data['vp']['tkl'][0]['vs']: if 0 < int(i['bid']) <= 10: @@ -80,12 +73,6 @@ class IqiyiIE(InfoExtractor): if t.endswith('mp4'): video_urls_info = i['flvs'] - if int(i['bid']) != int(bid): # ignore missing match format - video_urls.extend( - [('http://example.com/v.flv', ii['b']) for ii in video_urls_info]) - video_urls_dict[format_id] = video_urls - continue - for ii in video_urls_info: vl = ii['l'] if not vl.startswith('/'): @@ -193,14 +180,9 @@ class IqiyiIE(InfoExtractor): title = data['vi']['vn'] - format = self._downloader.params.get('format', None) - bid = self.get_bid(format) if format else 'best' - if not bid: - raise ExtractorError('Can\'t get format.') - # generate video_urls_dict video_urls_dict = self.construct_video_urls( - data, video_id, _uuid, bid) + data, video_id, _uuid) # construct info entries = [] From 29e7e0781b1b8e276c28a079bc5b18e1b0db2d5e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 00:56:08 +0800 Subject: [PATCH 0900/2721] [iqiyi] Simplify and improve regex patterns See the comments in #5849 --- youtube_dl/extractor/iqiyi.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 5645fb6ee..18a7587a2 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -161,12 +161,11 @@ class IqiyiIE(InfoExtractor): webpage = self._download_webpage( url, 'temp_id', note='download video page') tvid = self._search_regex( - r'tvId ?= ?(\'|\")(?P<tvid>\d+)', webpage, 'tvid', flags=re.I, group='tvid') + r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') video_id = self._search_regex( - r'videoId ?= ?(\'|\")(?P<video_id>[a-z\d]+)', - webpage, 'video_id', flags=re.I, group='video_id') + r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') swf_url = self._search_regex( - r'(?P<swf>http://.+?MainPlayer.+?\.swf)', webpage, 'swf') + r'(http://.+?MainPlayer.+?\.swf)', webpage, 'swf player URL') _uuid = uuid.uuid4().hex enc_key = self.get_enc_key(swf_url, video_id) From aacda28b28c1804866d634c5c5086b3d53cb2b2f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 01:32:03 +0800 Subject: [PATCH 0901/2721] [iqiyi] Give error message for assertion failures --- youtube_dl/extractor/iqiyi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 18a7587a2..dc35c3380 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -171,7 +171,10 @@ class IqiyiIE(InfoExtractor): enc_key = self.get_enc_key(swf_url, video_id) raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) - assert raw_data['code'] == 'A000000' + + if raw_data['code'] != 'A000000': + raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) + if not raw_data['data']['vp']['tkl']: raise ExtractorError('No support iQiqy VIP video') From 958d0b659b80d4493d045d4da82074ed68ed6c4e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 01:35:09 +0800 Subject: [PATCH 0902/2721] [iqiyi] Reorder imports --- youtube_dl/extractor/iqiyi.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index dc35c3380..36029361a 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -1,20 +1,17 @@ # coding: utf-8 - from __future__ import unicode_literals -from .common import InfoExtractor - -from ..compat import compat_urllib_parse - -from ..utils import ExtractorError - +import hashlib +import math +import random import re import time import uuid -import math -import random import zlib -import hashlib + +from .common import InfoExtractor +from ..compat import compat_urllib_parse +from ..utils import ExtractorError class IqiyiIE(InfoExtractor): From ffba4edb067238b593b98c71f4293e9b60ba95ce Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 01:52:51 +0800 Subject: [PATCH 0903/2721] [iqiyi] Improve some variable names and add download notes --- youtube_dl/extractor/iqiyi.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 36029361a..c17e1fde4 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -48,35 +48,37 @@ class IqiyiIE(InfoExtractor): s += chr(a) return s[::-1] - def get_path_key(x): + def get_path_key(x, format_id, segment_index): mg = ')(*&^flash@#$%a' tm = self._download_json( - 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id)['t'] + 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id, + note='Download path key of segment %d for format %s' % (segment_index + 1, format_id) + )['t'] t = str(int(math.floor(int(tm) / (600.0)))) return hashlib.md5((t + mg + x).encode('utf8')).hexdigest() video_urls_dict = {} - for i in data['vp']['tkl'][0]['vs']: - if 0 < int(i['bid']) <= 10: - format_id = self.get_format(i['bid']) + for format_item in data['vp']['tkl'][0]['vs']: + if 0 < int(format_item['bid']) <= 10: + format_id = self.get_format(format_item['bid']) else: continue video_urls = [] - video_urls_info = i['fs'] - if not i['fs'][0]['l'].startswith('/'): - t = get_encode_code(i['fs'][0]['l']) + video_urls_info = format_item['fs'] + if not format_item['fs'][0]['l'].startswith('/'): + t = get_encode_code(format_item['fs'][0]['l']) if t.endswith('mp4'): - video_urls_info = i['flvs'] + video_urls_info = format_item['flvs'] - for ii in video_urls_info: - vl = ii['l'] + for segment_index, segment in enumerate(video_urls_info): + vl = segment['l'] if not vl.startswith('/'): vl = get_encode_code(vl) key = get_path_key( - vl.split('/')[-1].split('.')[0]) - filesize = ii['b'] + vl.split('/')[-1].split('.')[0], format_id, segment_index) + filesize = segment['b'] base_url = data['vp']['du'].split('/') base_url.insert(-1, key) base_url = '/'.join(base_url) @@ -91,7 +93,9 @@ class IqiyiIE(InfoExtractor): } api_video_url = base_url + vl + '?' + \ compat_urllib_parse.urlencode(param) - js = self._download_json(api_video_url, video_id) + js = self._download_json( + api_video_url, video_id, + note='Download video info of segment %d for format %s' % (segment_index + 1, format_id)) video_url = js['l'] video_urls.append( (video_url, filesize)) From c4ee87022bd18863fc3f22f80064453e272d956f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 01:57:05 +0800 Subject: [PATCH 0904/2721] [iqiyi] Change id for multipart videos --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index c17e1fde4..840cc9a4d 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -207,7 +207,7 @@ class IqiyiIE(InfoExtractor): self._sort_formats(entries[i]['formats']) entries[i].update( { - 'id': '_part%d' % (i + 1), + 'id': '%s_part%d' % (video_id, i + 1), 'title': title, } ) From 99481135907b5fa3558d4f176fd02acbdafccdb6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 02:09:33 +0800 Subject: [PATCH 0905/2721] [iqiyi] Add a multipart test case --- youtube_dl/extractor/iqiyi.py | 67 +++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 840cc9a4d..d73687d88 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -19,7 +19,7 @@ class IqiyiIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html' - _TEST = { + _TESTS = [{ 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', 'md5': '2cb594dc2781e6c941a110d8f358118b', 'info_dict': { @@ -27,7 +27,70 @@ class IqiyiIE(InfoExtractor): 'title': '美国德州空中惊现奇异云团 酷似UFO', 'ext': 'f4v', } - } + }, { + 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb', + 'title': '名侦探柯南第752集', + }, + 'playlist': [{ + 'md5': '7e49376fecaffa115d951634917fe105', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': '41b75ba13bb7ac0e411131f92bc4f6ca', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': '0cee1dd0a3d46a83e71e2badeae2aab0', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': '4f8ad72373b0c491b582e7c196b0b1f9', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': 'd89ad028bcfad282918e8098e811711d', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': '9cb1e5c95da25dff0660c32ae50903b7', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': '155116e0ff1867bbc9b98df294faabc9', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'md5': '53f5db77622ae14fa493ed2a278a082b', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }], + }] def construct_video_urls(self, data, video_id, _uuid): def do_xor(x, y): From 865ab62f43eb94a9f4f757a464df147e983cb439 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 02:13:22 +0800 Subject: [PATCH 0906/2721] [iqiyi] Make _VALID_URL more accurate v_* urls are individual videos, while a_* urls are playlists, which are not supported yet. --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index d73687d88..f0d423331 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -17,7 +17,7 @@ from ..utils import ExtractorError class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' - _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html' + _VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html' _TESTS = [{ 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', From 08bb8ef2011d795948d8e89478bf3afe4b99405f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 02:25:00 +0800 Subject: [PATCH 0907/2721] [iqiyi] Unify get_format() and get_bid() --- youtube_dl/extractor/iqiyi.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index f0d423331..122f33692 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -92,6 +92,15 @@ class IqiyiIE(InfoExtractor): }], }] + _FORMATS_MAP = [ + ('1', 'h6'), + ('2', 'h5'), + ('3', 'h4'), + ('4', 'h3'), + ('5', 'h2'), + ('10', 'h1'), + ] + def construct_video_urls(self, data, video_id, _uuid): def do_xor(x, y): a = y % 3 @@ -167,27 +176,12 @@ class IqiyiIE(InfoExtractor): return video_urls_dict def get_format(self, bid): - _dict = { - '1': 'h6', - '2': 'h5', - '3': 'h4', - '4': 'h3', - '5': 'h2', - '10': 'h1' - } - return _dict.get(str(bid), None) + matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)] + return matched_format_ids[0] if len(matched_format_ids) else None def get_bid(self, format_id): - _dict = { - 'h6': '1', - 'h5': '2', - 'h4': '3', - 'h3': '4', - 'h2': '5', - 'h1': '10', - 'best': 'best' - } - return _dict.get(format_id, None) + matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id] + return matched_bids[0] if len(matched_bids) else None def get_raw_data(self, tvid, video_id, enc_key, _uuid): tm = str(int(time.time())) From 9c5f685ef14a8b44d17b897ba8ae2da051011c35 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 02:39:03 +0800 Subject: [PATCH 0908/2721] [iqiyi] Improve regex pattern again --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 122f33692..15481b84b 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -223,7 +223,7 @@ class IqiyiIE(InfoExtractor): video_id = self._search_regex( r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') swf_url = self._search_regex( - r'(http://.+?MainPlayer.+?\.swf)', webpage, 'swf player URL') + r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL') _uuid = uuid.uuid4().hex enc_key = self.get_enc_key(swf_url, video_id) From b5a3c7f10927c9d55f6fdad5f5c002e02338642e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 02:47:36 +0800 Subject: [PATCH 0909/2721] [iqiyi] Cache encryption keys --- youtube_dl/extractor/iqiyi.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 15481b84b..9106dd074 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import hashlib import math +import os.path import random import re import time @@ -11,7 +12,10 @@ import zlib from .common import InfoExtractor from ..compat import compat_urllib_parse -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + url_basename, +) class IqiyiIE(InfoExtractor): @@ -207,12 +211,20 @@ class IqiyiIE(InfoExtractor): return raw_data def get_enc_key(self, swf_url, video_id): + filename, _ = os.path.splitext(url_basename(swf_url)) + enc_key_json = self._downloader.cache.load('iqiyi-enc-key', filename) + if enc_key_json is not None: + return enc_key_json[0] + req = self._request_webpage( swf_url, video_id, note='download swf content') cn = req.read() cn = zlib.decompress(cn[8:]) pt = re.compile(b'MixerRemote\x08(?P<enc_key>.+?)\$&vv') enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8') + + self._downloader.cache.store('iqiyi-enc-key', filename, [enc_key]) + return enc_key def _real_extract(self, url): From d00735a0c5aabd38b37bfea76a93ae8c47a8d419 Mon Sep 17 00:00:00 2001 From: Hannu Lintala <hannu.lintala@gmail.com> Date: Sat, 6 Jun 2015 23:01:23 +0300 Subject: [PATCH 0910/2721] [ruutu] Don't use fallback for DASH and other non-HTTP urls --- youtube_dl/extractor/ruutu.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index e346434f9..59e0b12fd 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -62,6 +62,8 @@ class RuutuIE(InfoExtractor): formats.extend(self._extract_f4m_formats(url, media_id, f4m_id='hds')) parsed_urls.append(url) else: + if not fmt.tag.startswith('HTTP'): + continue proto = compat_urllib_parse_urlparse(url).scheme width_str, height_str = fmt.get('resolution').split('x') tbr = int(fmt.get('bitrate', 0)) From de390ea0771a0e35c0c2970bc00f5fa2dd9d3eac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 7 Jun 2015 00:19:45 +0200 Subject: [PATCH 0911/2721] update: Use https for getting the version info (fixes #5909) --- youtube_dl/update.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/update.py b/youtube_dl/update.py index de3169eef..fc7ac8305 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -50,7 +50,7 @@ def rsa_verify(message, signature, key): def update_self(to_screen, verbose): """Update the program file with the latest version from the repository""" - UPDATE_URL = "http://rg3.github.io/youtube-dl/update/" + UPDATE_URL = "https://rg3.github.io/youtube-dl/update/" VERSION_URL = UPDATE_URL + 'LATEST_VERSION' JSON_URL = UPDATE_URL + 'versions.json' UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) From 9414338a48ca815fd666aad496ebabd6d0c76e5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 7 Jun 2015 05:37:29 +0600 Subject: [PATCH 0912/2721] [ruutu] Improve, make more robust and fix python 2.6 support --- youtube_dl/extractor/ruutu.py | 129 +++++++++++++++++++++------------- 1 file changed, 79 insertions(+), 50 deletions(-) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index 59e0b12fd..4e22628d0 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -3,88 +3,117 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse -import re +from ..utils import ( + determine_ext, + int_or_none, + xpath_text, +) class RuutuIE(InfoExtractor): - _VALID_URL = r'http://(www\.)?ruutu\.fi/ohjelmat/(?:[^/]+/)?(?P<id>.*)$' + _VALID_URL = r'http://(?:www\.)?ruutu\.fi/ohjelmat/(?:[^/?#]+/)*(?P<id>[^/?#]+)' _TESTS = [ { 'url': 'http://www.ruutu.fi/ohjelmat/oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi', 'md5': 'ab2093f39be1ca8581963451b3c0234f', 'info_dict': { - 'id': 'oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi', + 'id': '2058907', + 'display_id': 'oletko-aina-halunnut-tietaa-mita-tapahtuu-vain-hetki-ennen-lahetysta-nyt-se-selvisi', 'ext': 'mp4', 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!', - 'description': 'Toinen toistaan huikeampia ohjelmaideoita ja täysin päätöntä sekoilua? No sitä juuri nimenomaan. Metro Helsingin Iltapäivän vieraaksi saapui Tuomas Kauhanen ja he Petra Kalliomaan kanssa keskustelivat hieman ennen lähetyksen alkua, mutta kamerat olivatkin jo päällä.', + 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 114, + 'age_limit': 0, }, - 'params': { - 'format': 'http-1000', - } }, { 'url': 'http://www.ruutu.fi/ohjelmat/superpesis/superpesis-katso-koko-kausi-ruudussa', 'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9', 'info_dict': { - 'id': 'superpesis-katso-koko-kausi-ruudussa', + 'id': '2057306', + 'display_id': 'superpesis-katso-koko-kausi-ruudussa', 'ext': 'mp4', 'title': 'Superpesis: katso koko kausi Ruudussa', - 'description': 'Huippujännittävän Superpesiksen suoria ottelulähetyksiä seurataan Ruudussa kauden alusta viimeiseen finaaliin asti. Katso lisätiedot osoitteesta ruutu.fi/superpesis.', + 'description': 'md5:44c44a99fdbe5b380ab74ebd75f0af77', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 40, + 'age_limit': 0, }, - 'params': { - 'format': 'http-1000', - } }, ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) + display_id = self._match_id(url) - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - media_id = self._html_search_regex(r'data-media-id="(\d+)"', webpage, 'media_id') - media_json = self._parse_json(self._search_regex(r'jQuery.extend\([^,]+, (.*)\);', webpage, 'media_data'), video_id) - xml_url = media_json['ruutuplayer']['xmlUrl'].replace('{ID}', media_id) - media_xml = self._download_xml(xml_url, media_id) + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'data-media-id="(\d+)"', webpage, 'media id') + + video_xml_url = None + + media_data = self._search_regex( + r'jQuery\.extend\([^,]+,\s*(.+?)\);', webpage, + 'media data', default=None) + if media_data: + media_json = self._parse_json(media_data, display_id, fatal=False) + if media_json: + xml_url = media_json.get('ruutuplayer', {}).get('xmlUrl') + if xml_url: + video_xml_url = xml_url.replace('{ID}', video_id) + + if not video_xml_url: + video_xml_url = 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id + + video_xml = self._download_xml(video_xml_url, video_id) formats = [] - parsed_urls = [] - for fmt in media_xml.findall('.//Clip//'): - url = fmt.text - if not fmt.tag.endswith('File') or url in parsed_urls or \ - 'NOT_USED' in url: - continue + processed_urls = [] - if url.endswith('m3u8'): - formats.extend(self._extract_m3u8_formats(url, media_id, m3u8_id='hls')) - parsed_urls.append(url) - elif url.endswith('f4m'): - formats.extend(self._extract_f4m_formats(url, media_id, f4m_id='hds')) - parsed_urls.append(url) - else: - if not fmt.tag.startswith('HTTP'): - continue - proto = compat_urllib_parse_urlparse(url).scheme - width_str, height_str = fmt.get('resolution').split('x') - tbr = int(fmt.get('bitrate', 0)) - formats.append({ - 'format_id': '%s-%d' % (proto, tbr), - 'url': url, - 'width': int(width_str), - 'height': int(height_str), - 'tbr': tbr, - 'ext': url.rsplit('.', 1)[-1], - 'live': True, - 'protocol': proto, - }) + def extract_formats(node): + for child in node: + if child.tag.endswith('Files'): + extract_formats(child) + elif child.tag.endswith('File'): + video_url = child.text + if not video_url or video_url in processed_urls or 'NOT_USED' in video_url: + return + processed_urls.append(video_url) + ext = determine_ext(video_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id='hls')) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id='hds')) + else: + proto = compat_urllib_parse_urlparse(video_url).scheme + if not child.tag.startswith('HTTP') and proto != 'rtmp': + continue + preference = -1 if proto == 'rtmp' else 1 + label = child.get('label') + tbr = int_or_none(child.get('bitrate')) + width, height = [int_or_none(x) for x in child.get('resolution', '').split('x')] + formats.append({ + 'format_id': '%s-%s' % (proto, label if label else tbr), + 'url': video_url, + 'width': width, + 'height': height, + 'tbr': tbr, + 'preference': preference, + }) + + extract_formats(video_xml.find('./Clip')) self._sort_formats(formats) return { 'id': video_id, + 'display_id': display_id, 'title': self._og_search_title(webpage), - 'formats': formats, 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), - 'duration': int(media_xml.find('.//Runtime').text), - 'age_limit': int(media_xml.find('.//AgeLimit').text), + 'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')), + 'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')), + 'formats': formats, } From 9836cfb8d682c91036ce417fa31200673b52115b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 7 Jun 2015 08:12:21 +0600 Subject: [PATCH 0913/2721] [options] Clarify `--list-extractors` (Closes #5916) --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 5a2315bd9..689fa7595 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -145,7 +145,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--list-extractors', action='store_true', dest='list_extractors', default=False, - help='List all supported extractors and the URLs they would handle') + help='List all supported extractors') general.add_option( '--extractor-descriptions', action='store_true', dest='list_extractor_descriptions', default=False, From b26733ba7f376f8c9285ac7928534286622bbc7c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 15:29:17 +0800 Subject: [PATCH 0914/2721] [brightcove] Allow single quotes in Brightcove URLs (fixes #5901) --- youtube_dl/extractor/brightcove.py | 2 +- youtube_dl/extractor/generic.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 4f60d5366..c1d4320e1 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -172,7 +172,7 @@ class BrightcoveIE(InfoExtractor): """Return a list of all Brightcove URLs from the webpage """ url_m = re.search( - r'<meta\s+property="og:video"\s+content="(https?://(?:secure|c)\.brightcove.com/[^"]+)"', + r'<meta\s+property=[\'"]og:video[\'"]\s+content=[\'"](https?://(?:secure|c)\.brightcove.com/[^\'"]+)[\'"]', webpage) if url_m: url = unescapeHTML(url_m.group(1)) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 96ca398de..759691365 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -789,6 +789,18 @@ class GenericIE(InfoExtractor): # rtmpe downloads 'skip_download': True, } + }, + # Brightcove URL in single quotes + { + 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/', + 'md5': '4ae374f1f8b91c889c4b9203c8c752af', + 'info_dict': { + 'id': '4255764656001', + 'ext': 'mp4', + 'title': 'SN Presents: Russell Martin, World Citizen', + 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.', + 'uploader': 'Rogers Sportsnet', + }, } ] From 621ed9f5f4d9d82659272ebe01e740e9196fad61 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 16:33:22 +0800 Subject: [PATCH 0915/2721] [common] Add note and errnote field for _extract_m3u8_formats --- youtube_dl/extractor/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index cecf917ff..49e4dc710 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -846,7 +846,7 @@ class InfoExtractor(object): def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, entry_protocol='m3u8', preference=None, - m3u8_id=None): + m3u8_id=None, note=None, errnote=None): formats = [{ 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])), @@ -865,8 +865,8 @@ class InfoExtractor(object): m3u8_doc = self._download_webpage( m3u8_url, video_id, - note='Downloading m3u8 information', - errnote='Failed to download m3u8 information') + note=note or 'Downloading m3u8 information', + errnote=errnote or 'Failed to download m3u8 information') last_info = None last_media = None kv_rex = re.compile( From 65ba8b23f471b96e6f937f2754c729e22bf2cf0a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 16:34:19 +0800 Subject: [PATCH 0916/2721] [discovery] Rewrite DiscoveryIE (fixes #5898) Discovery.com now uses a completely different approach for serving videos. At least in both test cases brightcove are involved. However, AMF support is necessary for these brightcove videos. As a result, I try to extract videos from the info page ('?flat=1'). The downloaded file can be different from the one in browsers. --- youtube_dl/extractor/discovery.py | 52 ++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index d3e667528..d6723ecf2 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -2,19 +2,19 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + parse_duration, parse_iso8601, - int_or_none, ) +from ..compat import compat_str class DiscoveryIE(InfoExtractor): _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?' - _TEST = { + _TESTS = [{ 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', - 'md5': '3c69d77d9b0d82bfd5e5932a60f26504', 'info_dict': { - 'id': 'mission-impossible-outtakes', - 'ext': 'flv', + 'id': '20769', + 'ext': 'mp4', 'title': 'Mission Impossible Outtakes', 'description': ('Watch Jamie Hyneman and Adam Savage practice being' ' each other -- to the point of confusing Jamie\'s dog -- and ' @@ -24,22 +24,36 @@ class DiscoveryIE(InfoExtractor): 'timestamp': 1303099200, 'upload_date': '20110418', }, - } + 'params': { + 'skip_download': True, # requires ffmpeg + } + }, { + 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mythbusters-the-simpsons', + 'info_dict': { + 'id': 'mythbusters-the-simpsons', + 'title': 'MythBusters: The Simpsons', + }, + 'playlist_count': 9, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + info = self._download_json(url + '?flat=1', video_id) - info = self._parse_json(self._search_regex( - r'(?s)<script type="application/ld\+json">(.*?)</script>', - webpage, 'video info'), video_id) + video_title = info.get('playlist_title') or info.get('video_title') - return { - 'id': video_id, - 'title': info['name'], - 'url': info['contentURL'], - 'description': info.get('description'), - 'thumbnail': info.get('thumbnailUrl'), - 'timestamp': parse_iso8601(info.get('uploadDate')), - 'duration': int_or_none(info.get('duration')), - } + entries = [{ + 'id': compat_str(video_info['id']), + 'formats': self._extract_m3u8_formats( + video_info['src'], video_id, ext='mp4', + note='Download m3u8 information for video %d' % (idx + 1)), + 'title': video_info['title'], + 'description': video_info.get('description'), + 'duration': parse_duration(video_info.get('video_length')), + 'webpage_url': video_info.get('href'), + 'thumbnail': video_info.get('thumbnailURL'), + 'alt_title': video_info.get('secondary_title'), + 'timestamp': parse_iso8601(video_info.get('publishedDate')), + } for idx, video_info in enumerate(info['playlist'])] + + return self.playlist_result(entries, video_id, video_title) From 68477c3dab97733eb7a2feb8fcc90f648c29c2b4 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 7 Jun 2015 16:38:39 +0800 Subject: [PATCH 0917/2721] [tlc] Fix test failure due to DiscoveryIE changes --- youtube_dl/extractor/tlc.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index 9f9e388c5..13263614c 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -12,17 +12,22 @@ class TlcIE(DiscoveryIE): IE_NAME = 'tlc.com' _VALID_URL = r'http://www\.tlc\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?' - _TEST = { + # DiscoveryIE has _TESTS + _TESTS = [{ 'url': 'http://www.tlc.com/tv-shows/cake-boss/videos/too-big-to-fly.htm', - 'md5': 'c4038f4a9b44d0b5d74caaa64ed2a01a', 'info_dict': { - 'id': '853232', + 'id': '104493', 'ext': 'mp4', - 'title': 'Cake Boss: Too Big to Fly', + 'title': 'Too Big to Fly', 'description': 'Buddy has taken on a high flying task.', 'duration': 119, + 'timestamp': 1393365060, + 'upload_date': '20140225', }, - } + 'params': { + 'skip_download': True, # requires ffmpef + }, + }] class TlcDeIE(InfoExtractor): From edb99d4c18475ba27fae4f7d0ec6e3db9b574885 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 8 Jun 2015 01:17:21 +0800 Subject: [PATCH 0918/2721] [instagram] Handling null values (fixes #5919) I didn't add the test case here because it takes too much time. (7 minutes on my machine) --- youtube_dl/extractor/instagram.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index b10755788..b92367a9d 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -100,7 +100,9 @@ class InstagramUserIE(InfoExtractor): thumbnails_el = it.get('images', {}) thumbnail = thumbnails_el.get('thumbnail', {}).get('url') - title = it.get('caption', {}).get('text', it['id']) + # In some cases caption is null, which corresponds to None + # in python. As a result, it.get('caption', {}) gives None + title = (it.get('caption') or {}).get('text', it['id']) entries.append({ 'id': it['id'], From e1ec93304dfcf385380feb95a3777c796cc49420 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 8 Jun 2015 01:46:33 +0800 Subject: [PATCH 0919/2721] [instagram:user] Truncate title to 80 characters (#5919) This is a workaround. Currently YoutubeDL.process_info() truncates info_dict['title'] to 200 characters, but the implementation can't handle wide characters. --- youtube_dl/extractor/instagram.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index b92367a9d..3d78f78c4 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + limit_length, +) class InstagramIE(InfoExtractor): @@ -106,7 +109,7 @@ class InstagramUserIE(InfoExtractor): entries.append({ 'id': it['id'], - 'title': title, + 'title': limit_length(title, 80), 'formats': formats, 'thumbnail': thumbnail, 'webpage_url': it.get('link'), From 788be3313df7ad020dc0a98bd5ed43a60120fb3b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 8 Jun 2015 13:32:04 +0800 Subject: [PATCH 0920/2721] [cnet] Fix theplatform vid extraction (fixes #5924) --- youtube_dl/extractor/cnet.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py index 3145b3051..5dd69bff7 100644 --- a/youtube_dl/extractor/cnet.py +++ b/youtube_dl/extractor/cnet.py @@ -11,7 +11,7 @@ from ..utils import ( class CNETIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?cnet\.com/videos/(?P<id>[^/]+)/' - _TEST = { + _TESTS = [{ 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', 'info_dict': { 'id': '56f4ea68-bd21-4852-b08c-4de5b8354c60', @@ -25,7 +25,20 @@ class CNETIE(InfoExtractor): 'params': { 'skip_download': 'requires rtmpdump', } - } + }, { + 'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/', + 'info_dict': { + 'id': '56527b93-d25d-44e3-b738-f989ce2e49ba', + 'ext': 'flv', + 'description': 'Khail and Ashley wonder what other civic woes can be solved by self-tweeting objects, investigate a new kind of VR camera and watch an origami robot self-assemble, walk, climb, dig and dissolve. #TDPothole', + 'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40', + 'uploader': 'Ashley Esqueda', + 'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)', + }, + 'params': { + 'skip_download': True, # requires rtmpdump + }, + }] def _real_extract(self, url): display_id = self._match_id(url) @@ -42,7 +55,7 @@ class CNETIE(InfoExtractor): raise ExtractorError('Cannot find video data') mpx_account = data['config']['players']['default']['mpx_account'] - vid = vdata['files']['rtmp'] + vid = vdata['files'].get('rtmp', vdata['files']['hds']) tp_link = 'http://link.theplatform.com/s/%s/%s' % (mpx_account, vid) video_id = vdata['id'] From 01e21b89eefc32bcc4a92c3a82658cee139b6b2c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Mon, 8 Jun 2015 17:39:55 +0800 Subject: [PATCH 0921/2721] [noco] Skip invalid timestamps (closes #5826) --- youtube_dl/extractor/noco.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index 664dc81d4..5bbd2dcf6 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -166,6 +166,10 @@ class NocoIE(InfoExtractor): self._sort_formats(formats) timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ') + + if timestamp is not None and timestamp < 0: + timestamp = None + uploader = show.get('partner_name') uploader_id = show.get('partner_key') duration = float_or_none(show.get('duration_ms'), 1000) From a55e36f48d1f0dc5454b144c7373361f284b9236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 8 Jun 2015 21:05:17 +0600 Subject: [PATCH 0922/2721] [YoutubeDL] Handle out-of-range timestamps (#5826) --- youtube_dl/YoutubeDL.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index aa6ec9d9a..b1f792d4e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1016,13 +1016,13 @@ class YoutubeDL(object): info_dict['display_id'] = info_dict['id'] if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None: - # Working around negative timestamps in Windows - # (see http://bugs.python.org/issue1646728) - if info_dict['timestamp'] < 0 and os.name == 'nt': - info_dict['timestamp'] = 0 - upload_date = datetime.datetime.utcfromtimestamp( - info_dict['timestamp']) - info_dict['upload_date'] = upload_date.strftime('%Y%m%d') + # Working around out-of-range timestamp values (e.g. negative ones on Windows, + # see http://bugs.python.org/issue1646728) + try: + upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp']) + info_dict['upload_date'] = upload_date.strftime('%Y%m%d') + except (ValueError, OverflowError, OSError): + pass if self.params.get('listsubtitles', False): if 'automatic_captions' in info_dict: From 627b96482567c1525dddfcceae2c16ff53c18b6a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 9 Jun 2015 11:41:17 +0800 Subject: [PATCH 0923/2721] [kickstarted] Extract thumbnails in embedded videos (#5929) --- youtube_dl/extractor/kickstarter.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index 7d4b57056..1d391e69f 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -28,6 +28,14 @@ class KickStarterIE(InfoExtractor): 'uploader': 'Pebble Technology', 'title': 'Pebble iOS Notifications', } + }, { + 'url': 'https://www.kickstarter.com/projects/1420158244/power-drive-2000/widget/video.html', + 'info_dict': { + 'id': '1420158244', + 'ext': 'mp4', + 'title': 'Power Drive 2000', + }, + 'expected_warnings': ['OpenGraph description'], }] def _real_extract(self, url): @@ -48,10 +56,15 @@ class KickStarterIE(InfoExtractor): 'title': title, } + thumbnail = self._og_search_thumbnail(webpage, default=None) + if thumbnail is None: + thumbnail = self._html_search_regex( + r'<img[^>]+class="[^"]+\s*poster\s*[^"]+"[^>]+src="([^"]+)"', + webpage, 'thumbnail image', fatal=False) return { 'id': video_id, 'url': video_url, 'title': title, 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': thumbnail, } From e1b9322b091122b6f6832c70e3a845a84ee1764e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 9 Jun 2015 14:48:18 +0800 Subject: [PATCH 0924/2721] [youtube] Restricter DASH signature pattern A problematic DASH url is: https://manifest.googlevideo.com/api/manifest/dash/mm/35/key/yt5/ip/140.112.247.145/ms/pm/mv/s/mt/1433794435/id/o-AD2Od_dsOlAUYPu03ZsVWKSbGEbCJJrMp9vnXGhnyRhd/mn/sn-aigllm7r/sparams/as%2Chfr%2Cid%2Cip%2Cipbits%2Citag%2Cmm%2Cmn%2Cms%2Cmv%2Cnh%2Cpl%2Cplayback_host%2Crequiressl%2Csource%2Cexpire/fexp/9406009%2C9406821%2C9407575%2C9408142%2C9408420%2C9408710%2C9409121%2C9409208%2C9412514%2C9412780%2C9413208%2C9413426%2C9413476%2C9413503%2C9415304%2C9415753/upn/viDQrs8SnmE/as/fmp4_audio_clear%2Cwebm_audio_clear%2Cfmp4_sd_hd_clear%2Cwebm_sd_hd_clear%2Cwebm2_sd_hd_clear/playback_host/r4---sn-aigllm7r.googlevideo.com/ipbits/0/requiressl/yes/pl/20/itag/0/source/youtube/expire/1433824806/nh/EAQ/signature/81ABE6391E351BA495F5B041B00FF1257A353318.1A6E48ABB74E8F4AE73CA2CB1F963FC34E33DEE7/sver/3/hfr/1 --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 419f7b019..083da777d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -785,7 +785,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): s = mobj.group(1) dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) return '/signature/%s' % dec_s - dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) + dash_manifest_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, dash_manifest_url) dash_doc = self._download_xml( dash_manifest_url, video_id, note='Downloading DASH manifest', From d9cf48e81e38f4bf151a8648c48d6e5233325b40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 9 Jun 2015 20:36:08 +0600 Subject: [PATCH 0925/2721] [spiegeltv] Extract all formats and prefer hls (Closes #5843) --- youtube_dl/extractor/spiegeltv.py | 45 ++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index 359722ad6..08a5c4314 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -2,7 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import float_or_none +from ..compat import compat_urllib_parse_urlparse +from ..utils import ( + determine_ext, + float_or_none, +) class SpiegeltvIE(InfoExtractor): @@ -17,7 +21,7 @@ class SpiegeltvIE(InfoExtractor): 'thumbnail': 're:http://.*\.jpg$', }, 'params': { - # rtmp download + # m3u8 download 'skip_download': True, } }, { @@ -53,7 +57,35 @@ class SpiegeltvIE(InfoExtractor): server_json = self._download_json( 'http://spiegeltv-prod-static.s3.amazonaws.com/projectConfigs/projectConfig.json', video_id, note='Downloading server information') - server = server_json['streamingserver'][0]['endpoint'] + + format = '16x9' if is_wide else '4x3' + + formats = [] + for streamingserver in server_json['streamingserver']: + endpoint = streamingserver.get('endpoint') + if not endpoint: + continue + play_path = 'mp4:%s_spiegeltv_0500_%s.m4v' % (uuid, format) + if endpoint.startswith('rtmp'): + formats.append({ + 'url': endpoint, + 'format_id': 'rtmp', + 'app': compat_urllib_parse_urlparse(endpoint).path[1:], + 'play_path': play_path, + 'player_path': 'http://prod-static.spiegel.tv/frontend-076.swf', + 'ext': 'flv', + 'rtmp_live': True, + }) + elif determine_ext(endpoint) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + endpoint.replace('[video]', play_path), + video_id, 'm4v', + preference=1, # Prefer hls since it allows to workaround georestriction + m3u8_id='hls')) + else: + formats.append({ + 'url': endpoint, + }) thumbnails = [] for image in media_json['images']: @@ -65,17 +97,12 @@ class SpiegeltvIE(InfoExtractor): description = media_json['subtitle'] duration = float_or_none(media_json.get('duration_in_ms'), scale=1000) - format = '16x9' if is_wide else '4x3' - - url = server + 'mp4:' + uuid + '_spiegeltv_0500_' + format + '.m4v' return { 'id': video_id, 'title': title, - 'url': url, - 'ext': 'm4v', 'description': description, 'duration': duration, 'thumbnails': thumbnails, - 'rtmp_live': True, + 'formats': formats, } From 9bf99891d08d166ac1d81b652dc487bb940fa685 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 9 Jun 2015 21:23:53 +0600 Subject: [PATCH 0926/2721] [cbs] Add support for colbertlateshow (Closes #5888) --- youtube_dl/extractor/cbs.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 1ceb9d8d9..89614a3c9 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class CBSIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(?:video|artist)/(?P<id>[^/]+)/.*' + _VALID_URL = r'https?://(?:(?:www\.)?cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/(?P<id>[^/]+)/.*' _TESTS = [{ 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', @@ -34,12 +34,18 @@ class CBSIE(InfoExtractor): 'skip_download': True, }, '_skip': 'Blocked outside the US', + }, { + 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', + 'only_matching': True, + }, { + 'url': 'http://colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) real_id = self._search_regex( - r"video\.settings\.pid\s*=\s*'([^']+)';", + [r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"], webpage, 'real video ID') return self.url_result('theplatform:%s' % real_id) From 9d581f3d5224140ca35ebd06d614b929e9252cd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 9 Jun 2015 21:39:45 +0600 Subject: [PATCH 0927/2721] [cbs] Extract display_id --- youtube_dl/extractor/cbs.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 89614a3c9..75fffb156 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -4,12 +4,13 @@ from .common import InfoExtractor class CBSIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/(?P<id>[^/]+)/.*' + _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', 'info_dict': { 'id': '4JUVEwq3wUT7', + 'display_id': 'connect-chat-feat-garth-brooks', 'ext': 'flv', 'title': 'Connect Chat feat. Garth Brooks', 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', @@ -24,6 +25,7 @@ class CBSIE(InfoExtractor): 'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/', 'info_dict': { 'id': 'WWF_5KqY3PK1', + 'display_id': 'st-vincent', 'ext': 'flv', 'title': 'Live on Letterman - St. Vincent', 'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.', @@ -38,14 +40,19 @@ class CBSIE(InfoExtractor): 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', 'only_matching': True, }, { - 'url': 'http://colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', + 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) real_id = self._search_regex( [r"video\.settings\.pid\s*=\s*'([^']+)';", r"cbsplayer\.pid\s*=\s*'([^']+)';"], webpage, 'real video ID') - return self.url_result('theplatform:%s' % real_id) + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': 'theplatform:%s' % real_id, + 'display_id': display_id, + } From 6e054aacca2ac44413ed37ee8b1d63a09c8b4ae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 9 Jun 2015 23:07:22 +0600 Subject: [PATCH 0928/2721] [theplatform] Take care of /select/media URLs (Closes #5746) --- youtube_dl/extractor/theplatform.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 92731ad3d..48c6ff03f 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -26,7 +26,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language class ThePlatformIE(InfoExtractor): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/ - (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)? + (?:(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)|(?P<media>(?:[^/]+/)+select/media/))? |theplatform:)(?P<id>[^/\?&]+)''' _TESTS = [{ @@ -56,6 +56,17 @@ class ThePlatformIE(InfoExtractor): # rtmp download 'skip_download': True, } + }, { + 'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD', + 'info_dict': { + 'id': 'yMBg9E8KFxZD', + 'ext': 'mp4', + 'description': 'md5:644ad9188d655b742f942bf2e06b002d', + 'title': 'HIGHLIGHTS: USA bag first ever series Cup win', + } + }, { + 'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7', + 'only_matching': True, }] @staticmethod @@ -85,6 +96,11 @@ class ThePlatformIE(InfoExtractor): if not provider_id: provider_id = 'dJ5BDC' + path = provider_id + if mobj.group('media'): + path += '/media' + path += '/' + video_id + if smuggled_data.get('force_smil_url', False): smil_url = url elif mobj.group('config'): @@ -94,8 +110,7 @@ class ThePlatformIE(InfoExtractor): config = self._download_json(config_url, video_id, 'Downloading config') smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' else: - smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?' - 'format=smil&mbr=true'.format(provider_id, video_id)) + smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path sig = smuggled_data.get('sig') if sig: @@ -112,7 +127,7 @@ class ThePlatformIE(InfoExtractor): else: raise ExtractorError(error_msg, expected=True) - info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id) + info_url = 'http://link.theplatform.com/s/%s?format=preview' % path info_json = self._download_webpage(info_url, video_id) info = json.loads(info_json) From bd5bc0cd5af257abf7a1a4c14a9dd39c4f97e622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 9 Jun 2015 23:12:13 +0600 Subject: [PATCH 0929/2721] [theplatform] Check for /select/media URLs first (#5746) --- youtube_dl/extractor/theplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 48c6ff03f..83d833e30 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -26,7 +26,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language class ThePlatformIE(InfoExtractor): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/ - (?:(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)|(?P<media>(?:[^/]+/)+select/media/))? + (?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))? |theplatform:)(?P<id>[^/\?&]+)''' _TESTS = [{ From 70219b0f4371fe54cc72d025ce06fc4691ba12fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 9 Jun 2015 23:49:11 +0200 Subject: [PATCH 0930/2721] [youtube:playlist] Use an iterator for the entries (closes #5935) So that '--playlist-end' downloads only the required pages. --- youtube_dl/extractor/youtube.py | 47 ++++++++++++++++----------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 083da777d..3448bec4f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1290,7 +1290,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): def _extract_playlist(self, playlist_id): url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) - more_widget_html = content_html = page for match in re.findall(r'<div class="yt-alert-message">([^<]+)</div>', page): match = match.strip() @@ -1310,36 +1309,36 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): self.report_warning('Youtube gives an alert message: ' + match) # Extract the video ids from the playlist pages - ids = [] + def _entries(): + more_widget_html = content_html = page + for page_num in itertools.count(1): + matches = re.finditer(self._VIDEO_RE, content_html) + # We remove the duplicates and the link with index 0 + # (it's not the first video of the playlist) + new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0') + for vid_id in new_ids: + yield self.url_result(vid_id, 'Youtube', video_id=vid_id) - for page_num in itertools.count(1): - matches = re.finditer(self._VIDEO_RE, content_html) - # We remove the duplicates and the link with index 0 - # (it's not the first video of the playlist) - new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0') - ids.extend(new_ids) + mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html) + if not mobj: + break - mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html) - if not mobj: - break - - more = self._download_json( - 'https://youtube.com/%s' % mobj.group('more'), playlist_id, - 'Downloading page #%s' % page_num, - transform_source=uppercase_escape) - content_html = more['content_html'] - if not content_html.strip(): - # Some webpages show a "Load more" button but they don't - # have more videos - break - more_widget_html = more['load_more_widget_html'] + more = self._download_json( + 'https://youtube.com/%s' % mobj.group('more'), playlist_id, + 'Downloading page #%s' % page_num, + transform_source=uppercase_escape) + content_html = more['content_html'] + if not content_html.strip(): + # Some webpages show a "Load more" button but they don't + # have more videos + break + more_widget_html = more['load_more_widget_html'] playlist_title = self._html_search_regex( r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>', page, 'title') - url_results = self._ids_to_results(ids) - return self.playlist_result(url_results, playlist_id, playlist_title) + return self.playlist_result(_entries(), playlist_id, playlist_title) def _real_extract(self, url): # Extract playlist id From d84f1d14b526c4a5359117a58f25691a3da4c97e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lio=20A=2E=20Heckert?= <aurelio@colivre.coop.br> Date: Tue, 9 Jun 2015 22:08:16 -0300 Subject: [PATCH 0931/2721] Adds support for XviD output with extra parametrization As the "LG Time Machine" (a (not so) smart TV) has a limitation for video dimensions (as for codecs), I take to implement an extra parameter `--pp-params` where we can send extra parameterization for the video converter (post-processor). Example: ``` $ youtube-dl --recode-video=xvid --pp-params='-s 720x480' -c https://www.youtube.com/watch?v=BE7Qoe2ZiXE ``` That works fine on a 4yo LG Time Machine. Closes #5733 --- README.md | 3 ++- youtube_dl/__init__.py | 5 ++++- youtube_dl/options.py | 6 +++++- youtube_dl/postprocessor/ffmpeg.py | 14 ++++++++++---- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index f3d83c89f..726ec9cf2 100644 --- a/README.md +++ b/README.md @@ -213,7 +213,8 @@ which means you can modify it, redistribute it or use it however you like. --audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5) - --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv) + --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|xvid) + --pp-params Extra parameters for video post-processor. The params will be splited on spaces. -k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default --no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default --embed-subs Embed subtitles in the video (only for mkv and mp4 videos) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ace17857c..5b28e4817 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -169,8 +169,10 @@ def _real_main(argv=None): if not opts.audioquality.isdigit(): parser.error('invalid audio quality specified') if opts.recodevideo is not None: - if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']: + if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'xvid']: parser.error('invalid video recode format specified') + if opts.pp_params is not None: + opts.pp_params = opts.pp_params.split() if opts.convertsubtitles is not None: if opts.convertsubtitles not in ['srt', 'vtt', 'ass']: parser.error('invalid subtitle format specified') @@ -227,6 +229,7 @@ def _real_main(argv=None): postprocessors.append({ 'key': 'FFmpegVideoConvertor', 'preferedformat': opts.recodevideo, + 'extra_params': opts.pp_params }) if opts.convertsubtitles: postprocessors.append({ diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 689fa7595..ceb4b5f38 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -686,7 +686,11 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--recode-video', metavar='FORMAT', dest='recodevideo', default=None, - help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)') + help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|xvid)') + postproc.add_option( + '--pp-params', + dest='pp_params', default=None, + help='Extra parameters for video post-processor. The params will be splited on spaces.') postproc.add_option( '-k', '--keep-video', action='store_true', dest='keepvideo', default=False, diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index cc65b34e7..a696b12b4 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -287,22 +287,28 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): class FFmpegVideoConvertorPP(FFmpegPostProcessor): - def __init__(self, downloader=None, preferedformat=None): + def __init__(self, downloader=None, preferedformat=None, extra_params=[]): super(FFmpegVideoConvertorPP, self).__init__(downloader) self._preferedformat = preferedformat + self._extra_params = extra_params def run(self, information): path = information['filepath'] prefix, sep, ext = path.rpartition('.') - outpath = prefix + sep + self._preferedformat + ext = self._preferedformat + options = self._extra_params + if self._preferedformat == 'xvid': + ext = 'avi' + options.extend(['-c:v', 'libxvid', '-vtag', 'XVID']) + outpath = prefix + sep + ext if information['ext'] == self._preferedformat: self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat)) return [], information self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath) - self.run_ffmpeg(path, outpath, []) + self.run_ffmpeg(path, outpath, options) information['filepath'] = outpath information['format'] = self._preferedformat - information['ext'] = self._preferedformat + information['ext'] = ext return [path], information From 0c8662d2b6f033ad42f1cc97989d4975629b524b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 10 Jun 2015 13:40:41 +0800 Subject: [PATCH 0932/2721] [youtube] Fix a TypeError caused by 4da31bd56629054497634d041035e4bd6fcfacbb --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2424ac2c0..a1906eef6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -816,7 +816,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'filesize': filesize, 'fps': int_or_none(r.attrib.get('frameRate')), } - if len(segment_list): + if segment_list is not None: f.update({ 'initialization_url': segment_list.find('{urn:mpeg:DASH:schema:MPD:2011}Initialization').attrib['sourceURL'], 'segment_urls': [segment.attrib.get('media') for segment in segment_list.findall('{urn:mpeg:DASH:schema:MPD:2011}SegmentURL')], From 93dfcb9357b400b4d7e353d0a9db0e0194135b19 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 10 Jun 2015 13:44:54 +0800 Subject: [PATCH 0933/2721] [downloader/dash] Do not pollute ```self``` --- youtube_dl/downloader/dash.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 5f14658ba..cd84e0b07 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -16,14 +16,14 @@ class DashSegmentsFD(FileDownloader): base_url = info_dict['url'] segment_urls = info_dict['segment_urls'] - self.byte_counter = 0 + byte_counter = 0 def append_url_to_file(outf, target_url, target_name): self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name)) req = compat_urllib_request.Request(target_url) data = self.ydl.urlopen(req).read() outf.write(data) - self.byte_counter += len(data) + return len(data) def combine_url(base_url, target_url): if re.match(r'^https?://', target_url): @@ -35,15 +35,16 @@ class DashSegmentsFD(FileDownloader): outf, combine_url(base_url, info_dict['initialization_url']), 'initialization segment') for i, segment_url in enumerate(segment_urls): - append_url_to_file( + segment_len = append_url_to_file( outf, combine_url(base_url, segment_url), 'segment %d / %d' % (i + 1, len(segment_urls))) + byte_counter += segment_len self.try_rename(tmpfilename, filename) self._hook_progress({ - 'downloaded_bytes': self.byte_counter, - 'total_bytes': self.byte_counter, + 'downloaded_bytes': byte_counter, + 'total_bytes': byte_counter, 'filename': filename, 'status': 'finished', }) From 7ebd5376feb493edd0bc04abd07bba89397b7307 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 10 Jun 2015 14:15:20 +0800 Subject: [PATCH 0934/2721] [nfl] Relax _VALID_URL (fixes #5940) --- youtube_dl/extractor/nfl.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index 2684dd250..dc54634a5 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -19,7 +19,7 @@ class NFLIE(InfoExtractor): _VALID_URL = r'''(?x)https?:// (?P<host>(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/ (?:.+?/)* - (?P<id>(?:\d[a-z]{2}\d{13}|\w{8}\-(?:\w{4}\-){3}\w{12}))''' + (?P<id>(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12}))''' _TESTS = [ { 'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', @@ -58,6 +58,10 @@ class NFLIE(InfoExtractor): 'upload_date': '20150202', }, }, + { + 'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood', + 'only_matching': True, + } ] @staticmethod From 5bf3276e8d6ee7d017c8be04414398752cd9cdf3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 10 Jun 2015 14:45:54 +0800 Subject: [PATCH 0935/2721] [downloader/dash] Add testing facility --- youtube_dl/downloader/dash.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index cd84e0b07..a4685d307 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -16,12 +16,21 @@ class DashSegmentsFD(FileDownloader): base_url = info_dict['url'] segment_urls = info_dict['segment_urls'] + is_test = self.params.get('test', False) + remaining_bytes = self._TEST_FILE_SIZE if is_test else None byte_counter = 0 - def append_url_to_file(outf, target_url, target_name): + def append_url_to_file(outf, target_url, target_name, remaining_bytes=None): self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name)) req = compat_urllib_request.Request(target_url) + if remaining_bytes is not None: + req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) + data = self.ydl.urlopen(req).read() + + if remaining_bytes is not None: + data = data[:remaining_bytes] + outf.write(data) return len(data) @@ -37,8 +46,13 @@ class DashSegmentsFD(FileDownloader): for i, segment_url in enumerate(segment_urls): segment_len = append_url_to_file( outf, combine_url(base_url, segment_url), - 'segment %d / %d' % (i + 1, len(segment_urls))) + 'segment %d / %d' % (i + 1, len(segment_urls)), + remaining_bytes) byte_counter += segment_len + if remaining_bytes is not None: + remaining_bytes -= segment_len + if remaining_bytes <= 0: + break self.try_rename(tmpfilename, filename) From 8a1a26ce4c64d7a2c142718fc56f46d9a1c2c4f2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 10 Jun 2015 14:47:02 +0800 Subject: [PATCH 0936/2721] [youtube] Add a test for the DASH segment downloader --- youtube_dl/extractor/youtube.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index a1906eef6..939f5e61f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -516,6 +516,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': 'requires avconv', } }, + # DASH manifest with segment_list + { + 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8', + 'md5': '8ce563a1d667b599d21064e982ab9e31', + 'info_dict': { + 'id': 'CsmdDsKjzN8', + 'ext': 'mp4', + 'upload_date': '20150510', + 'uploader': 'Airtek', + 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.', + 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ', + 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015', + }, + 'params': { + 'youtube_include_dash_manifest': True, + 'format': '135', # bestvideo + } + } ] def __init__(self, *args, **kwargs): From eb8be1fe76a9fbc285e6c957b3fdd5c05135ae3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 10 Jun 2015 14:12:43 +0200 Subject: [PATCH 0937/2721] [rtbf] Extract all formats (closes #5947) --- youtube_dl/extractor/rtbf.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py index 5a381d9ce..e4215d546 100644 --- a/youtube_dl/extractor/rtbf.py +++ b/youtube_dl/extractor/rtbf.py @@ -21,6 +21,13 @@ class RTBFIE(InfoExtractor): } } + _QUALITIES = [ + ('mobile', 'mobile'), + ('web', 'SD'), + ('url', 'MD'), + ('high', 'HD'), + ] + def _real_extract(self, url): video_id = self._match_id(url) @@ -32,14 +39,21 @@ class RTBFIE(InfoExtractor): r'data-video="([^"]+)"', webpage, 'data video')), video_id) - video_url = data.get('downloadUrl') or data.get('url') - if data.get('provider').lower() == 'youtube': + video_url = data.get('downloadUrl') or data.get('url') return self.url_result(video_url, 'Youtube') + formats = [] + for key, format_id in self._QUALITIES: + format_url = data['sources'].get(key) + if format_url: + formats.append({ + 'format_id': format_id, + 'url': format_url, + }) return { 'id': video_id, - 'url': video_url, + 'formats': formats, 'title': data['title'], 'description': data.get('description') or data.get('subtitle'), 'thumbnail': data.get('thumbnail'), From f98470df690d053e45691ede2751ab6a4063082b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 10 Jun 2015 23:01:12 +0600 Subject: [PATCH 0938/2721] [bilibili] Fix FutureWarning --- youtube_dl/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 2103ed73a..bf60450c2 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -105,7 +105,7 @@ class BiliBiliIE(InfoExtractor): 'filesize': int_or_none( lq_durl.find('./size'), get_attr='text'), }] - if hq_durl: + if hq_durl is not None: formats.append({ 'format_id': 'hq', 'quality': 2, From a9d56c684319eaf8b9494bd8d2dc9d0f40485254 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 11 Jun 2015 19:03:22 +0600 Subject: [PATCH 0939/2721] [rtlnl] Improve _VALID_URL (#5950) --- youtube_dl/extractor/rtlnl.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index cfce4550a..41d202c28 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -12,10 +12,10 @@ class RtlNlIE(InfoExtractor): IE_NAME = 'rtl.nl' IE_DESC = 'rtl.nl and rtlxl.nl' _VALID_URL = r'''(?x) - https?://(www\.)? + https?://(?:www\.)? (?: rtlxl\.nl/\#!/[^/]+/| - rtl\.nl/system/videoplayer/[^?#]+?/video_embed\.html\#uuid= + rtl\.nl/system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html\b.+?\buuid= ) (?P<id>[0-9a-f-]+)''' @@ -43,6 +43,9 @@ class RtlNlIE(InfoExtractor): 'upload_date': '20150215', 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.', } + }, { + 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0', + 'only_matching': True, }] def _real_extract(self, url): From 97b570a94cc2387153af525f781e144bb4bb791e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 11 Jun 2015 19:04:12 +0600 Subject: [PATCH 0940/2721] [generic] Improve rtl.nl embeds detection (Closes #5950) --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 759691365..75526384f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1073,7 +1073,7 @@ class GenericIE(InfoExtractor): # Look for embedded rtl.nl player matches = re.findall( - r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+video_embed[^"]+)"', + r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', webpage) if matches: return _playlist_from_matches(matches, ie='RtlNl') From ff0f0b9172e432ebbfca88da91278554eb47c307 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 11 Jun 2015 22:18:08 +0600 Subject: [PATCH 0941/2721] [tube8] Fix extraction (Closes #5952) --- youtube_dl/extractor/tube8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 6ca8840b0..c9cb69333 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -47,7 +47,7 @@ class Tube8IE(InfoExtractor): webpage = self._download_webpage(req, display_id) flashvars = json.loads(self._html_search_regex( - r'flashvars\s*=\s*({.+?})', webpage, 'flashvars')) + r'flashvars\s*=\s*({.+?});\r?\n', webpage, 'flashvars')) video_url = flashvars['video_url'] if flashvars.get('encrypted') is True: From 99ac0390f559aa6dd09ffd8a15b9b562fda5f363 Mon Sep 17 00:00:00 2001 From: Hannu Lintala <hannu.lintala@gmail.com> Date: Mon, 8 Jun 2015 05:58:41 +0300 Subject: [PATCH 0942/2721] [fivetv] Add extractor (Closes #5794) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/fivetv.py | 67 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 youtube_dl/extractor/fivetv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 67eb96057..d10275d03 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -152,6 +152,7 @@ from .fc2 import FC2IE from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE +from .fivetv import FiveTVIE from .fktv import ( FKTVIE, FKTVPosteckeIE, diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py new file mode 100644 index 000000000..e47383b39 --- /dev/null +++ b/youtube_dl/extractor/fivetv.py @@ -0,0 +1,67 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, +) + + +class FiveTVIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?5-tv\.ru/[^/]*/(?P<id>\d+)' + _TESTS = [ + { + 'url': 'http://5-tv.ru/news/96814/', + 'md5': 'bbff554ad415ecf5416a2f48c22d9283', + 'info_dict': { + 'id': '96814', + 'ext': 'mp4', + 'title': 'Россияне выбрали имя для общенациональной платежной системы', + 'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660', + 'thumbnail': 're:^https?://.*\.jpg$', + 'width': 480, + 'height': 360, + 'duration': 180, + }, + }, + { + 'url': 'http://5-tv.ru/video/1021729/', + 'md5': '299c8b72960efc9990acd2c784dc2296', + 'info_dict': { + 'id': '1021729', + 'ext': 'mp4', + 'title': '3D принтер', + 'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41', + 'thumbnail': 're:^https?://.*\.jpg$', + 'width': 480, + 'height': 360, + 'duration': 180, + }, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_link = self._search_regex( + r'(<a.*?class="videoplayer">)', webpage, 'video link') + + url = self._search_regex(r'href="([^"]+)"', video_link, 'video url') + width = int_or_none(self._search_regex( + r'width:(\d+)px', video_link, 'width', default=None, fatal=False)) + height = int_or_none(self._search_regex( + r'height:(\d+)px', video_link, 'height', default=None, fatal=False)) + duration = int_or_none(self._og_search_property( + 'video:duration', webpage, 'duration')) + return { + 'id': video_id, + 'url': url, + 'width': width, + 'height': height, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'duration': duration, + } From 87446dc6186c7e4247fd7f9bc1046ef41f5d1a0f Mon Sep 17 00:00:00 2001 From: Hannu Lintala <hannu.lintala@gmail.com> Date: Sun, 7 Jun 2015 17:25:30 +0300 Subject: [PATCH 0943/2721] [tvc] Add extractor (Closes #5795) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/tvc.py | 79 ++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 youtube_dl/extractor/tvc.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 67eb96057..8c4e12904 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -582,6 +582,7 @@ from .tv2 import ( TV2ArticleIE, ) from .tv4 import TV4IE +from .tvc import TVCIE from .tvigle import TvigleIE from .tvp import TvpIE, TvpSeriesIE from .tvplay import TVPlayIE diff --git a/youtube_dl/extractor/tvc.py b/youtube_dl/extractor/tvc.py new file mode 100644 index 000000000..b62ab857c --- /dev/null +++ b/youtube_dl/extractor/tvc.py @@ -0,0 +1,79 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + str_or_none, +) + + +class TVCIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?tvc\.ru/.*/show/.*id/(?P<id>\d+)' + _TESTS = [ + { + 'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/', + 'md5': 'aa6fb3cf384e18a0ad3b30ee2898beba', + 'info_dict': { + 'id': '74622', + 'display_id': '39702', + 'ext': 'mp4', + 'title': 'События. "События". Эфир от 22.05.2015 14:30', + 'description': 'md5:ad7aa7db22903f983e687b8a3e98c6dd', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 1122, + }, + }, + { + 'url': 'http://www.tvc.ru/news/show/id/69944', + 'md5': 'b173128ee7b88b5b06c84e5f7880909f', + 'info_dict': { + 'id': '75399', + 'display_id': '69944', + 'ext': 'mp4', + 'title': 'Эксперты: в столице встал вопрос о максимально безопасных остановках', + 'description': 'md5:f675c8eaf23aab9df542d31773ed6518', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 278, + }, + }, + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_url = self._og_search_video_url(webpage) + + video_id = self._search_regex( + r'video/iframe/id/(\d+)/', video_url, 'video id') + + video_json_url = 'http://www.tvc.ru/video/json/id/%s' % (video_id) + + video_json = self._download_json(video_json_url, video_id) + + formats = [] + for info in video_json.get('path', {}).get('quality', []): + format_id = self._search_regex( + r'cdnvideo/([^-]+)-[^/]+/', info.get('url'), 'format id', + fatal=False) + formats.append({ + 'format_id': str_or_none(format_id), + 'url': info.get('url'), + 'width': int_or_none(info.get('width')), + 'height': int_or_none(info.get('height')), + 'tbr': int_or_none(info.get('bitrate')), + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'duration': int_or_none(video_json.get('duration')), + 'formats': formats, + } From 9f15bdabc85add582d78a6dd57cfbb56cb33baff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jun 2015 16:13:36 +0600 Subject: [PATCH 0944/2721] [tvc] Separate embed extractor --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/tvc.py | 125 ++++++++++++++++++------------- 2 files changed, 77 insertions(+), 53 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6dc3cbff4..a8d3a8928 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -584,7 +584,10 @@ from .tv2 import ( TV2ArticleIE, ) from .tv4 import TV4IE -from .tvc import TVCIE +from .tvc import ( + TVCIE, + TVCEmbedIE, +) from .tvigle import TvigleIE from .tvp import TvpIE, TvpSeriesIE from .tvplay import TVPlayIE diff --git a/youtube_dl/extractor/tvc.py b/youtube_dl/extractor/tvc.py index b62ab857c..0055f9598 100644 --- a/youtube_dl/extractor/tvc.py +++ b/youtube_dl/extractor/tvc.py @@ -3,77 +3,98 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + clean_html, int_or_none, - str_or_none, ) -class TVCIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?tvc\.ru/.*/show/.*id/(?P<id>\d+)' - _TESTS = [ - { - 'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/', - 'md5': 'aa6fb3cf384e18a0ad3b30ee2898beba', - 'info_dict': { - 'id': '74622', - 'display_id': '39702', - 'ext': 'mp4', - 'title': 'События. "События". Эфир от 22.05.2015 14:30', - 'description': 'md5:ad7aa7db22903f983e687b8a3e98c6dd', - 'thumbnail': 're:^https?://.*\.jpg$', - 'duration': 1122, - }, +class TVCEmbedIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)' + _TEST = { + 'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702', + 'md5': 'bbc5ff531d1e90e856f60fc4b3afd708', + 'info_dict': { + 'id': '74622', + 'ext': 'mp4', + 'title': 'События. "События". Эфир от 22.05.2015 14:30', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 1122, }, - { - 'url': 'http://www.tvc.ru/news/show/id/69944', - 'md5': 'b173128ee7b88b5b06c84e5f7880909f', - 'info_dict': { - 'id': '75399', - 'display_id': '69944', - 'ext': 'mp4', - 'title': 'Эксперты: в столице встал вопрос о максимально безопасных остановках', - 'description': 'md5:f675c8eaf23aab9df542d31773ed6518', - 'thumbnail': 're:^https?://.*\.jpg$', - 'duration': 278, - }, - }, - ] + } def _real_extract(self, url): - display_id = self._match_id(url) + video_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - video_url = self._og_search_video_url(webpage) - - video_id = self._search_regex( - r'video/iframe/id/(\d+)/', video_url, 'video id') - - video_json_url = 'http://www.tvc.ru/video/json/id/%s' % (video_id) - - video_json = self._download_json(video_json_url, video_id) + video = self._download_json( + 'http://www.tvc.ru/video/json/id/%s' % video_id, video_id) formats = [] - for info in video_json.get('path', {}).get('quality', []): + for info in video.get('path', {}).get('quality', []): + video_url = info.get('url') + if not video_url: + continue format_id = self._search_regex( - r'cdnvideo/([^-]+)-[^/]+/', info.get('url'), 'format id', - fatal=False) + r'cdnvideo/([^/]+?)(?:-[^/]+?)?/', video_url, + 'format id', default=None) formats.append({ - 'format_id': str_or_none(format_id), - 'url': info.get('url'), + 'url': video_url, + 'format_id': format_id, 'width': int_or_none(info.get('width')), 'height': int_or_none(info.get('height')), 'tbr': int_or_none(info.get('bitrate')), }) - self._sort_formats(formats) return { 'id': video_id, - 'display_id': display_id, - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), - 'duration': int_or_none(video_json.get('duration')), + 'title': video['title'], + 'thumbnail': video.get('picture'), + 'duration': int_or_none(video.get('duration')), 'formats': formats, } + + +class TVCIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)' + _TESTS = [{ + 'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/', + 'info_dict': { + 'id': '74622', + 'ext': 'mp4', + 'title': 'События. "События". Эфир от 22.05.2015 14:30', + 'description': 'md5:ad7aa7db22903f983e687b8a3e98c6dd', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 1122, + }, + }, { + 'url': 'http://www.tvc.ru/news/show/id/69944', + 'info_dict': { + 'id': '75399', + 'ext': 'mp4', + 'title': 'Эксперты: в столице встал вопрос о максимально безопасных остановках', + 'description': 'md5:f2098f71e21f309e89f69b525fd9846e', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 278, + }, + }, { + 'url': 'http://www.tvc.ru/channel/brand/id/47/show/episodes#', + 'info_dict': { + 'id': '2185', + 'ext': 'mp4', + 'title': 'Ещё не поздно. Эфир от 03.08.2013', + 'description': 'md5:51fae9f3f8cfe67abce014e428e5b027', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 3316, + }, + }] + + def _real_extract(self, url): + webpage = self._download_webpage(url, self._match_id(url)) + return { + '_type': 'url_transparent', + 'ie_key': 'TVCEmbed', + 'url': self._og_search_video_url(webpage), + 'title': clean_html(self._og_search_title(webpage)), + 'description': clean_html(self._og_search_description(webpage)), + 'thumbnail': self._og_search_thumbnail(webpage), + } From 29902c8ec016a7128557d47a7413e82d4e022f01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jun 2015 16:22:23 +0600 Subject: [PATCH 0945/2721] [tvc:embed] Add embed extraction routine --- youtube_dl/extractor/tvc.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/tvc.py b/youtube_dl/extractor/tvc.py index 0055f9598..756fec732 100644 --- a/youtube_dl/extractor/tvc.py +++ b/youtube_dl/extractor/tvc.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( clean_html, @@ -22,6 +24,13 @@ class TVCEmbedIE(InfoExtractor): }, } + @classmethod + def _extract_url(cls, webpage): + mobj = re.search( + r'<iframe[^>]+?src=(["\'])(?P<url>(?:http://)?(?:www\.)?tvc\.ru/video/iframe/id/[^"]+)\1', webpage) + if mobj: + return mobj.group('url') + def _real_extract(self, url): video_id = self._match_id(url) From 494f20cbdca8e76e3cb452bb0feabcb855d9b4a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jun 2015 16:22:46 +0600 Subject: [PATCH 0946/2721] [extractor/generic] Add support for tvc embeds --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 75526384f..c797c4b52 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -34,6 +34,7 @@ from .brightcove import BrightcoveIE from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE +from .tvc import TVCEmbedIE from .sportbox import SportBoxEmbedIE from .smotri import SmotriIE from .condenast import CondeNastIE @@ -1301,6 +1302,11 @@ class GenericIE(InfoExtractor): if rutv_url: return self.url_result(rutv_url, 'RUTV') + # Look for embedded TVC player + rutv_url = TVCEmbedIE._extract_url(webpage) + if rutv_url: + return self.url_result(rutv_url, 'TVCEmbed') + # Look for embedded SportBox player sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) if sportbox_urls: From 954c1d05299ae7c6a51db46c1ac33ddf150266c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jun 2015 16:24:13 +0600 Subject: [PATCH 0947/2721] [tvc] Refactor extractor names --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/generic.py | 4 ++-- youtube_dl/extractor/tvc.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a8d3a8928..18b1c5e54 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -586,7 +586,7 @@ from .tv2 import ( from .tv4 import TV4IE from .tvc import ( TVCIE, - TVCEmbedIE, + TVCArticleIE, ) from .tvigle import TvigleIE from .tvp import TvpIE, TvpSeriesIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c797c4b52..507e4a571 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -34,7 +34,7 @@ from .brightcove import BrightcoveIE from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE -from .tvc import TVCEmbedIE +from .tvc import TVCIE from .sportbox import SportBoxEmbedIE from .smotri import SmotriIE from .condenast import CondeNastIE @@ -1303,7 +1303,7 @@ class GenericIE(InfoExtractor): return self.url_result(rutv_url, 'RUTV') # Look for embedded TVC player - rutv_url = TVCEmbedIE._extract_url(webpage) + rutv_url = TVCIE._extract_url(webpage) if rutv_url: return self.url_result(rutv_url, 'TVCEmbed') diff --git a/youtube_dl/extractor/tvc.py b/youtube_dl/extractor/tvc.py index 756fec732..36c2a3196 100644 --- a/youtube_dl/extractor/tvc.py +++ b/youtube_dl/extractor/tvc.py @@ -10,7 +10,7 @@ from ..utils import ( ) -class TVCEmbedIE(InfoExtractor): +class TVCIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)' _TEST = { 'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702', @@ -63,7 +63,7 @@ class TVCEmbedIE(InfoExtractor): } -class TVCIE(InfoExtractor): +class TVCArticleIE(InfoExtractor): _VALID_URL = r'http://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)' _TESTS = [{ 'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/', From 5ccddb7ecfb1015038f2616dd7e0da78a4365c89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jun 2015 16:25:26 +0600 Subject: [PATCH 0948/2721] [tvc] Fix ie_key --- youtube_dl/extractor/tvc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvc.py b/youtube_dl/extractor/tvc.py index 36c2a3196..6b5d80aee 100644 --- a/youtube_dl/extractor/tvc.py +++ b/youtube_dl/extractor/tvc.py @@ -101,7 +101,7 @@ class TVCArticleIE(InfoExtractor): webpage = self._download_webpage(url, self._match_id(url)) return { '_type': 'url_transparent', - 'ie_key': 'TVCEmbed', + 'ie_key': 'TVC', 'url': self._og_search_video_url(webpage), 'title': clean_html(self._og_search_title(webpage)), 'description': clean_html(self._og_search_description(webpage)), From 2da09ff8b0de3c27a16d9096f5d28d03f44fcf70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jun 2015 16:26:31 +0600 Subject: [PATCH 0949/2721] [extractor/generic] Fix tvc ie_key --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 507e4a571..66aceefb8 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1305,7 +1305,7 @@ class GenericIE(InfoExtractor): # Look for embedded TVC player rutv_url = TVCIE._extract_url(webpage) if rutv_url: - return self.url_result(rutv_url, 'TVCEmbed') + return self.url_result(rutv_url, 'TVC') # Look for embedded SportBox player sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) From f37bdbe537134f1ece0819e2aa677b1fec0c1cc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jun 2015 16:28:45 +0600 Subject: [PATCH 0950/2721] [extractor/generic] Add test for tvc embed --- youtube_dl/extractor/generic.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 66aceefb8..6be9e6329 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -292,6 +292,15 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # TVC embed + { + 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/', + 'info_dict': { + 'id': '55304', + 'ext': 'mp4', + 'title': 'Дошкольное воспитание', + }, + }, # SportBox embed { 'url': 'http://www.vestifinance.ru/articles/25753', From 499a077761b1577857952dc3b541c9f61a8bcade Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 12 Jun 2015 17:48:42 +0600 Subject: [PATCH 0951/2721] [5tv] Improve --- youtube_dl/extractor/fivetv.py | 115 +++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py index e47383b39..13fbc4da2 100644 --- a/youtube_dl/extractor/fivetv.py +++ b/youtube_dl/extractor/fivetv.py @@ -1,67 +1,88 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor -from ..utils import ( - int_or_none, -) +from ..utils import int_or_none class FiveTVIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?5-tv\.ru/[^/]*/(?P<id>\d+)' - _TESTS = [ - { - 'url': 'http://5-tv.ru/news/96814/', - 'md5': 'bbff554ad415ecf5416a2f48c22d9283', - 'info_dict': { - 'id': '96814', - 'ext': 'mp4', - 'title': 'Россияне выбрали имя для общенациональной платежной системы', - 'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660', - 'thumbnail': 're:^https?://.*\.jpg$', - 'width': 480, - 'height': 360, - 'duration': 180, - }, + _VALID_URL = r'''(?x) + http:// + (?:www\.)?5-tv\.ru/ + (?: + (?:[^/]+/)+(?P<id>\d+)| + (?P<path>[^/?#]+)(?:[/?#])? + ) + ''' + + _TESTS = [{ + 'url': 'http://5-tv.ru/news/96814/', + 'md5': 'bbff554ad415ecf5416a2f48c22d9283', + 'info_dict': { + 'id': '96814', + 'ext': 'mp4', + 'title': 'Россияне выбрали имя для общенациональной платежной системы', + 'description': 'md5:a8aa13e2b7ad36789e9f77a74b6de660', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 180, }, - { - 'url': 'http://5-tv.ru/video/1021729/', - 'md5': '299c8b72960efc9990acd2c784dc2296', - 'info_dict': { - 'id': '1021729', - 'ext': 'mp4', - 'title': '3D принтер', - 'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41', - 'thumbnail': 're:^https?://.*\.jpg$', - 'width': 480, - 'height': 360, - 'duration': 180, - }, + }, { + 'url': 'http://5-tv.ru/video/1021729/', + 'info_dict': { + 'id': '1021729', + 'ext': 'mp4', + 'title': '3D принтер', + 'description': 'md5:d76c736d29ef7ec5c0cf7d7c65ffcb41', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 180, }, - ] + }, { + 'url': 'http://www.5-tv.ru/glavnoe/#itemDetails', + 'info_dict': { + 'id': 'glavnoe', + 'ext': 'mp4', + 'title': 'Итоги недели с 8 по 14 июня 2015 года', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, { + 'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/', + 'only_matching': True, + }, { + 'url': 'http://5-tv.ru/films/1507502/', + 'only_matching': True, + }, { + 'url': 'http://5-tv.ru/programs/broadcast/508713/', + 'only_matching': True, + }, { + 'url': 'http://5-tv.ru/angel/', + 'only_matching': True, + }, { + 'url': 'http://www.5-tv.ru/schedule/?iframe=true&width=900&height=450', + 'only_matching': True, + }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') or mobj.group('path') webpage = self._download_webpage(url, video_id) - video_link = self._search_regex( - r'(<a.*?class="videoplayer">)', webpage, 'video link') + video_url = self._search_regex( + r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"', + webpage, 'video url') - url = self._search_regex(r'href="([^"]+)"', video_link, 'video url') - width = int_or_none(self._search_regex( - r'width:(\d+)px', video_link, 'width', default=None, fatal=False)) - height = int_or_none(self._search_regex( - r'height:(\d+)px', video_link, 'height', default=None, fatal=False)) + title = self._og_search_title(webpage, default=None) or self._search_regex( + r'<title>([^<]+)', webpage, 'title') duration = int_or_none(self._og_search_property( - 'video:duration', webpage, 'duration')) + 'video:duration', webpage, 'duration', default=None)) + return { 'id': video_id, - 'url': url, - 'width': width, - 'height': height, - 'title': self._og_search_title(webpage), - 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), + 'url': video_url, + 'title': title, + 'description': self._og_search_description(webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'duration': duration, } From b859971873915df55668a59a18ccfd259c20800e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Jun 2015 18:15:30 +0600 Subject: [PATCH 0952/2721] [extractor/generic] Rename tvc embed url variable --- youtube_dl/extractor/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6be9e6329..357d58cea 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1312,9 +1312,9 @@ class GenericIE(InfoExtractor): return self.url_result(rutv_url, 'RUTV') # Look for embedded TVC player - rutv_url = TVCIE._extract_url(webpage) - if rutv_url: - return self.url_result(rutv_url, 'TVC') + tvc_url = TVCIE._extract_url(webpage) + if tvc_url: + return self.url_result(tvc_url, 'TVC') # Look for embedded SportBox player sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) From 9872d3110c0d3027dac856e005299f3682ef23ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Jun 2015 18:37:09 +0600 Subject: [PATCH 0953/2721] [extractor/generic] Add support for tvigle embeds --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 357d58cea..40d869c53 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1321,6 +1321,12 @@ class GenericIE(InfoExtractor): if sportbox_urls: return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') + # Look for embedded Tvigle player + mobj = re.search( + r']+?src=(["\'])(?P(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Tvigle') + # Look for embedded TED player mobj = re.search( r']+?src=(["\'])(?Phttps?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) From d22dec74ffa2a53a1c04770af37d39f384f3d56c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Jun 2015 19:20:12 +0600 Subject: [PATCH 0954/2721] Add `--force-generic-extractor` For some extractors that are hard to workout a good _VALID_URL we use very vague and unrestrictive ones, e.g. just allowing anything after hostname and capturing part of URL as id. If some of these extractors happen to have an video embed of some different hoster or platform and this scenario was not handled in extractor itself we end up with inability to download this embed until extractor is fixed to support embed of this kind. Forcing downloader to use the generic extractor can be a neat temporary solution for this problem. Example: FiveTV extractor with Tvigle embed - http://www.5-tv.ru/rabota/broadcasts/48/ --- youtube_dl/YoutubeDL.py | 6 ++++++ youtube_dl/__init__.py | 1 + youtube_dl/extractor/generic.py | 4 +++- youtube_dl/options.py | 4 ++++ 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b1f792d4e..4b801a917 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -139,6 +139,7 @@ class YoutubeDL(object): outtmpl: Template for output names. restrictfilenames: Do not allow "&" and spaces in file names ignoreerrors: Do not stop on download errors. + force_generic_extractor: Force downloader to use the generic extractor nooverwrites: Prevent overwriting files. playliststart: Playlist item to start at. playlistend: Playlist item to end at. @@ -282,6 +283,7 @@ class YoutubeDL(object): self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._err_file = sys.stderr + self._force_generic_extractor_required = params.get('force_generic_extractor', False) self.params = params self.cache = Cache(self) @@ -633,6 +635,10 @@ class YoutubeDL(object): extra_info is a dict containing the extra values to add to each result ''' + if not ie_key and self._force_generic_extractor_required: + self._force_generic_extractor_required = False + ie_key = 'Generic' + if ie_key: ies = [self.get_info_extractor(ie_key)] else: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ace17857c..215b616de 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -293,6 +293,7 @@ def _real_main(argv=None): 'autonumber_size': opts.autonumber_size, 'restrictfilenames': opts.restrictfilenames, 'ignoreerrors': opts.ignoreerrors, + 'force_generic_extractor': opts.force_generic_extractor, 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, 'retries': opts_retries, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 40d869c53..3d672197c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -977,7 +977,9 @@ class GenericIE(InfoExtractor): 'upload_date': upload_date, } - if not self._downloader.params.get('test', False) and not is_intentional: + if (not self._downloader.params.get('test', False) and + not is_intentional and + not self._downloader.params.get('force_generic_extractor', False)): self._downloader.report_warning('Falling back on generic information extractor.') if not full_response: diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 689fa7595..096ab6137 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -150,6 +150,10 @@ def parseOpts(overrideArguments=None): '--extractor-descriptions', action='store_true', dest='list_extractor_descriptions', default=False, help='Output descriptions of all supported extractors') + general.add_option( + '--force-generic-extractor', + action='store_true', dest='force_generic_extractor', default=False, + help='Force extraction to use the generic extractor') general.add_option( '--default-search', dest='default_search', metavar='PREFIX', From 3d535e047162af021b3df6086f9a90d0cb0b6100 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Jun 2015 19:31:52 +0600 Subject: [PATCH 0955/2721] [tvc] Fix embed regex --- youtube_dl/extractor/tvc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvc.py b/youtube_dl/extractor/tvc.py index 6b5d80aee..3a4f393fc 100644 --- a/youtube_dl/extractor/tvc.py +++ b/youtube_dl/extractor/tvc.py @@ -27,7 +27,7 @@ class TVCIE(InfoExtractor): @classmethod def _extract_url(cls, webpage): mobj = re.search( - r']+?src=(["\'])(?P(?:http://)?(?:www\.)?tvc\.ru/video/iframe/id/[^"]+)\1', webpage) + r']+?src=(["\'])(?P(?:http:)?//(?:www\.)?tvc\.ru/video/iframe/id/[^"]+)\1', webpage) if mobj: return mobj.group('url') From 185dbc49749ca81fbb0a61a78c6dd35f2c32b15f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Jun 2015 21:13:14 +0600 Subject: [PATCH 0956/2721] [prosiebensat1] Fix rtmp extraction (Closes #5962) --- youtube_dl/extractor/prosiebensat1.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 255d4abc1..6b13eb605 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -206,8 +206,8 @@ class ProSiebenSat1IE(InfoExtractor): def _extract_clip(self, url, webpage): clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id') - access_token = 'testclient' - client_name = 'kolibri-1.2.5' + access_token = 'prosieben' + client_name = 'kolibri-1.12.6' client_location = url videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({ @@ -275,13 +275,17 @@ class ProSiebenSat1IE(InfoExtractor): for source in urls_sources: protocol = source['protocol'] if protocol == 'rtmp' or protocol == 'rtmpe': - mobj = re.search(r'^(?Prtmpe?://[^/]+/(?P[^/]+))/(?P.+)$', source['url']) + mobj = re.search(r'^(?Prtmpe?://[^/]+)/(?P.+)$', source['url']) if not mobj: continue + path = mobj.group('path') + mp4colon_index = path.rfind('mp4:') + app = path[:mp4colon_index] + play_path = path[mp4colon_index:] formats.append({ - 'url': mobj.group('url'), - 'app': mobj.group('app'), - 'play_path': mobj.group('playpath'), + 'url': '%s/%s' % (mobj.group('url'), app), + 'app': app, + 'play_path': play_path, 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', 'page_url': 'http://www.prosieben.de', 'vbr': fix_bitrate(source['bitrate']), From 8b6c896c4b60fe13b30227071aba2783222132a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Jun 2015 21:18:13 +0600 Subject: [PATCH 0957/2721] [prosiebensat1] Add title regex --- youtube_dl/extractor/prosiebensat1.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 6b13eb605..536a42dc8 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -177,6 +177,7 @@ class ProSiebenSat1IE(InfoExtractor): r'
    \s*

    (.+?)

    ', r'\s*

    (.+?)

    ', r'

    \s*(.+?)

    ', + r'
    \s*

    ([^<]+)

    \s*
    ', ] _DESCRIPTION_REGEXES = [ r'

    \s*(.+?)

    ', From 9f4323252abade4f10b0884682f92cedc78b4d4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 12 Jun 2015 21:56:50 +0600 Subject: [PATCH 0958/2721] [YoutubeDL] Fix for multiple URLs --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4b801a917..8dbad7cf8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -283,7 +283,6 @@ class YoutubeDL(object): self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._err_file = sys.stderr - self._force_generic_extractor_required = params.get('force_generic_extractor', False) self.params = params self.cache = Cache(self) @@ -1504,6 +1503,7 @@ class YoutubeDL(object): for url in url_list: try: + self._force_generic_extractor_required = self.params.get('force_generic_extractor', False) # It also downloads the videos res = self.extract_info(url) except UnavailableVideoError: From 61aa5ba36eea3b7cf8c3570ab33604dd2c13b855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Jun 2015 02:05:21 +0600 Subject: [PATCH 0959/2721] [YoutubeDL] Remove global state for force_generic_extractor flag in favor of passing argument --- youtube_dl/YoutubeDL.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 8dbad7cf8..dd2d8cb3c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -627,15 +627,14 @@ class YoutubeDL(object): info_dict.setdefault(key, value) def extract_info(self, url, download=True, ie_key=None, extra_info={}, - process=True): + process=True, force_generic_extractor=False): ''' Returns a list with a dictionary for each video we find. If 'download', also downloads the videos. extra_info is a dict containing the extra values to add to each result ''' - if not ie_key and self._force_generic_extractor_required: - self._force_generic_extractor_required = False + if not ie_key and force_generic_extractor: ie_key = 'Generic' if ie_key: @@ -663,7 +662,7 @@ class YoutubeDL(object): } self.add_default_extra_info(ie_result, ie, url) if process: - return self.process_ie_result(ie_result, download, extra_info) + return self.process_ie_result(ie_result, download, extra_info, force_generic_extractor=False) else: return ie_result except ExtractorError as de: # An error we somewhat expected @@ -688,7 +687,7 @@ class YoutubeDL(object): 'extractor_key': ie.ie_key(), }) - def process_ie_result(self, ie_result, download=True, extra_info={}): + def process_ie_result(self, ie_result, download=True, extra_info={}, force_generic_extractor=False): """ Take the result of the ie(may be modified) and resolve all unresolved references (URLs, playlist items). @@ -716,7 +715,8 @@ class YoutubeDL(object): return self.extract_info(ie_result['url'], download, ie_key=ie_result.get('ie_key'), - extra_info=extra_info) + extra_info=extra_info, + force_generic_extractor=force_generic_extractor) elif result_type == 'url_transparent': # Use the information from the embedding page info = self.extract_info( @@ -1503,9 +1503,9 @@ class YoutubeDL(object): for url in url_list: try: - self._force_generic_extractor_required = self.params.get('force_generic_extractor', False) # It also downloads the videos - res = self.extract_info(url) + res = self.extract_info( + url, force_generic_extractor=self.params.get('force_generic_extractor', False)) except UnavailableVideoError: self.report_error('unable to download video') except MaxDownloadsReached: From 0072afca8e02052c77dc3b7009e51114887e31b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Jun 2015 02:21:29 +0600 Subject: [PATCH 0960/2721] [YoutubeDL] Remove force_generic_extractor arg from process_ie_result --- youtube_dl/YoutubeDL.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index dd2d8cb3c..a7d3a1c01 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -662,7 +662,7 @@ class YoutubeDL(object): } self.add_default_extra_info(ie_result, ie, url) if process: - return self.process_ie_result(ie_result, download, extra_info, force_generic_extractor=False) + return self.process_ie_result(ie_result, download, extra_info) else: return ie_result except ExtractorError as de: # An error we somewhat expected @@ -687,7 +687,7 @@ class YoutubeDL(object): 'extractor_key': ie.ie_key(), }) - def process_ie_result(self, ie_result, download=True, extra_info={}, force_generic_extractor=False): + def process_ie_result(self, ie_result, download=True, extra_info={}): """ Take the result of the ie(may be modified) and resolve all unresolved references (URLs, playlist items). @@ -715,8 +715,7 @@ class YoutubeDL(object): return self.extract_info(ie_result['url'], download, ie_key=ie_result.get('ie_key'), - extra_info=extra_info, - force_generic_extractor=force_generic_extractor) + extra_info=extra_info) elif result_type == 'url_transparent': # Use the information from the embedding page info = self.extract_info( From 4f3bf679f5a764f7a26d3c45c82be43e34a3cc4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Jun 2015 03:09:35 +0600 Subject: [PATCH 0961/2721] [vk] Fix authentication for non-ASCII login/password --- youtube_dl/extractor/vk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index cc384adbf..d0e772108 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -119,8 +119,8 @@ class VKIE(InfoExtractor): 'act': 'login', 'role': 'al_frame', 'expire': '1', - 'email': username, - 'pass': password, + 'email': username.encode('cp1251'), + 'pass': password.encode('cp1251'), } request = compat_urllib_request.Request('https://login.vk.com/?act=login', From 9fcbd5db2abb5a56beefe4a64486da692705ad12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Jun 2015 03:24:36 +0600 Subject: [PATCH 0962/2721] [pornhub] Add support for embeds --- youtube_dl/extractor/pornhub.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index daa284ea2..3c99b4def 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -19,7 +19,7 @@ from ..aes import ( class PornHubIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P[0-9a-f]+)' + _VALID_URL = r'https?://(?:www\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P[0-9a-f]+)' _TEST = { 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'md5': '882f488fa1f0026f023f33576004a2ed', @@ -39,7 +39,8 @@ class PornHubIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - req = compat_urllib_request.Request(url) + req = compat_urllib_request.Request( + 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id) req.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(req, video_id) From 65d161c480e9964026e618a2e95f9fc9eb8119e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Jun 2015 03:36:16 +0600 Subject: [PATCH 0963/2721] [extractor/generic] Add support for pornhub embeds --- youtube_dl/extractor/generic.py | 5 +++++ youtube_dl/extractor/pornhub.py | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 40d869c53..f683760e4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -42,6 +42,7 @@ from .udn import UDNEmbedIE from .senateisvp import SenateISVPIE from .bliptv import BlipTVIE from .svt import SVTIE +from .pornhub import PornHubIE class GenericIE(InfoExtractor): @@ -1321,6 +1322,10 @@ class GenericIE(InfoExtractor): if sportbox_urls: return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') + pornhub_url = PornHubIE._extract_url(webpage) + if pornhub_url: + return self.url_result(pornhub_url, 'PornHub') + # Look for embedded Tvigle player mobj = re.search( r']+?src=(["\'])(?P(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 3c99b4def..8565d7551 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -32,6 +32,13 @@ class PornHubIE(InfoExtractor): } } + @classmethod + def _extract_url(cls, webpage): + mobj = re.search( + r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage) + if mobj: + return mobj.group('url') + def _extract_count(self, pattern, webpage, name): return str_to_int(self._search_regex( pattern, webpage, '%s count' % name, fatal=False)) From 78e2b74bb945dc7f1724f7486405dd523486d634 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Jun 2015 03:39:14 +0600 Subject: [PATCH 0964/2721] [tumblr] Add support for pornhub embeds (Closes #5963) --- youtube_dl/extractor/generic.py | 1 + youtube_dl/extractor/tumblr.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f683760e4..f6b984300 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1322,6 +1322,7 @@ class GenericIE(InfoExtractor): if sportbox_urls: return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') + # Look for embedded PornHub player pornhub_url = PornHubIE._extract_url(webpage) if pornhub_url: return self.url_result(pornhub_url, 'PornHub') diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index e6218808f..63c20310d 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from .pornhub import PornHubIE class TumblrIE(InfoExtractor): @@ -55,6 +56,10 @@ class TumblrIE(InfoExtractor): if vid_me_embed_url is not None: return self.url_result(vid_me_embed_url, 'Vidme') + pornhub_url = PornHubIE._extract_url(webpage) + if pornhub_url: + return self.url_result(pornhub_url, 'PornHub') + iframe_url = self._search_regex( r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'', webpage, 'iframe url') From b4e1576aee7cf18f5842714c87985ae0b72f1546 Mon Sep 17 00:00:00 2001 From: fnord Date: Sat, 13 Jun 2015 06:09:44 -0500 Subject: [PATCH 0965/2721] Brightcove extractor: support customBC.createVideo(...); method found in http://www.americanbar.org/groups/family_law.html and http://america.aljazeera.com/watch/shows/america-tonight/2015/6/exclusive-hunting-isil-with-the-pkk.html --- youtube_dl/extractor/brightcove.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index c1d4320e1..20a6ed965 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -188,7 +188,19 @@ class BrightcoveIE(InfoExtractor): [^>]*?>\s*\s*''', webpage) - return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) + if matches: + return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) + + custombcs = re.findall(r'customBC.\createVideo\((.+?)\);',webpage) + if custombcs: + urls = [] + for match in custombcs: + # brightcove playerkey begins with AQ and is 50 characters in length, + # however it's appended to itself in places, so truncate. + f = re.search(r'["\'](AQ[^"\']{48}).*?["\'](\d+)["\']', match) + if f: + urls.append('brightcove:playerKey='+f.group(1)+'&%40videoPlayer='+f.group(2)) + return urls def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) From af9cdee9cba610aa3924f90a8a3fcd7dd43c65eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 13 Jun 2015 19:53:32 +0600 Subject: [PATCH 0966/2721] [brightcove] Improve and generalize brightcove URL extraction from JS --- youtube_dl/extractor/brightcove.py | 35 +++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 20a6ed965..d768f99e6 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -156,6 +156,28 @@ class BrightcoveIE(InfoExtractor): linkBase = find_param('linkBaseURL') if linkBase is not None: params['linkBaseURL'] = linkBase + return cls._make_brightcove_url(params) + + @classmethod + def _build_brighcove_url_from_js(cls, object_js): + # The layout of JS is as follows: + # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) { + # // build Brightcove XML + # } + m = re.search( + r'''(?x)customBC.\createVideo\( + .*? # skipping width and height + ["\'](?P\d+)["\']\s*,\s* # playerID + ["\'](?PAQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters + # in length, however it's appended to itself + # in places, so truncate + ["\'](?P\d+)["\'] # @videoPlayer + ''', object_js) + if m: + return cls._make_brightcove_url(m.groupdict()) + + @classmethod + def _make_brightcove_url(cls, params): data = compat_urllib_parse.urlencode(params) return cls._FEDERATED_URL_TEMPLATE % data @@ -191,16 +213,9 @@ class BrightcoveIE(InfoExtractor): if matches: return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) - custombcs = re.findall(r'customBC.\createVideo\((.+?)\);',webpage) - if custombcs: - urls = [] - for match in custombcs: - # brightcove playerkey begins with AQ and is 50 characters in length, - # however it's appended to itself in places, so truncate. - f = re.search(r'["\'](AQ[^"\']{48}).*?["\'](\d+)["\']', match) - if f: - urls.append('brightcove:playerKey='+f.group(1)+'&%40videoPlayer='+f.group(2)) - return urls + return list(filter(None, [ + cls._build_brighcove_url_from_js(custom_bc) + for custom_bc in re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)])) def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) From 0029071adbdc0e1469d76cdc7e058c2f77299610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jun 2015 07:43:14 +0600 Subject: [PATCH 0967/2721] [dramefever] Improve and simplify --- youtube_dl/extractor/dramafever.py | 172 +++++++++++++++++------------ 1 file changed, 101 insertions(+), 71 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 40787ffcd..0f33a61a3 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -1,104 +1,111 @@ # encoding: utf-8 from __future__ import unicode_literals -import re +import itertools from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + clean_html, + determine_ext, + int_or_none, + parse_iso8601, +) class DramaFeverIE(InfoExtractor): IE_NAME = 'dramafever' - _VALID_URL = r'^https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+/[0-9]+)/' - _TESTS = [{ + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+/[0-9]+)' + _TEST = { 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', 'info_dict': { 'id': '4512.1', 'ext': 'flv', 'title': 'Cooking with Shin 4512.1', + 'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0', + 'thumbnail': 're:^https?://.*\.jpg', + 'timestamp': 1404336058, 'upload_date': '20140702', - 'description': 'Served at all special occasions and featured in the hit drama Heirs, Shin cooks Red Bean Rice.', + 'duration': 343, } - }] + } def _real_extract(self, url): - video_id = self._match_id(url).replace("/", ".") + video_id = self._match_id(url).replace('/', '.') - consumer_secret = self._get_consumer_secret(video_id) + try: + feed = self._download_json( + 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id, + video_id, 'Downloading episode JSON')['channel']['item'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + raise ExtractorError( + 'Currently unavailable in your country.', expected=True) + raise - ep_json = self._download_json( - "http://www.dramafever.com/amp/episode/feed.json?guid=%s" % video_id, - video_id, note='Downloading episode metadata', - errnote="Video may not be available for your location")["channel"]["item"] - - title = ep_json["media-group"]["media-title"] - description = ep_json["media-group"]["media-description"] - thumbnail = ep_json["media-group"]["media-thumbnail"]["@attributes"]["url"] - duration = int(ep_json["media-group"]["media-content"][0]["@attributes"]["duration"]) - mobj = re.match(r"([0-9]{4})-([0-9]{2})-([0-9]{2})", ep_json["pubDate"]) - upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3) if mobj is not None else None + media_group = feed.get('media-group', {}) formats = [] - for vid_format in ep_json["media-group"]["media-content"]: - src = vid_format["@attributes"]["url"] - if '.f4m' in src: - formats.extend(self._extract_f4m_formats(src, video_id)) - + for media_content in media_group['media-content']: + src = media_content.get('@attributes', {}).get('url') + if not src: + continue + ext = determine_ext(src) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + src, video_id, f4m_id='hds')) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', m3u8_id='hls')) + else: + formats.append({ + 'url': src, + }) self._sort_formats(formats) - video_subtitles = self.extract_subtitles(video_id, consumer_secret) + + title = media_group.get('media-title') + description = media_group.get('media-description') + duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration')) + thumbnail = self._proto_relative_url( + media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url')) + timestamp = parse_iso8601(feed.get('pubDate'), ' ') + + subtitles = {} + for media_subtitle in media_group.get('media-subTitle', []): + lang = media_subtitle.get('@attributes', {}).get('lang') + href = media_subtitle.get('@attributes', {}).get('href') + if not lang or not href: + continue + subtitles[lang] = [{ + 'ext': 'ttml', + 'url': href, + }] return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, - 'upload_date': upload_date, + 'timestamp': timestamp, 'duration': duration, 'formats': formats, - 'subtitles': video_subtitles, + 'subtitles': subtitles, } - def _get_consumer_secret(self, video_id): - df_js = self._download_webpage( - "http://www.dramafever.com/static/126960d/v2/js/plugins/jquery.threadedcomments.js", video_id) - return self._search_regex(r"'cs': '([0-9a-zA-Z]+)'", df_js, "cs") - def _get_episodes(self, series_id, consumer_secret, episode_filter=None): - _PAGE_SIZE = 60 - - curr_page = 1 - max_pages = curr_page + 1 - results = [] - while max_pages >= curr_page: - page_url = "http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d" % \ - (consumer_secret, series_id, _PAGE_SIZE, curr_page) - series = self._download_json( - page_url, series_id, note="Downloading series json page #%d" % curr_page) - max_pages = series['num_pages'] - results.extend([ep for ep in series['value'] if episode_filter is None or episode_filter(ep)]) - curr_page += 1 - return results - - def _get_subtitles(self, video_id, consumer_secret): - - res = None - info = self._get_episodes( - video_id.split(".")[0], consumer_secret, - episode_filter=lambda x: x['guid'] == video_id) - - if len(info) == 1 and info[0]['subfile'] != '': - res = {'en': [{'url': info[0]['subfile'], 'ext': 'srt'}]} - return res - - -class DramaFeverSeriesIE(DramaFeverIE): +class DramaFeverSeriesIE(InfoExtractor): IE_NAME = 'dramafever:series' - _VALID_URL = r'^https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+)/\d*[a-zA-Z_][a-zA-Z0-9_]*/' + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+)(?:/(?:(?!\d).+)?)?$' _TESTS = [{ 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/', 'info_dict': { 'id': '4512', 'title': 'Cooking with Shin', - 'description': 'Professional chef and cooking instructor Shin Kim takes some of the delicious dishes featured in your favorite dramas and shows you how to make them right at home.', + 'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1', }, 'playlist_count': 4, }, { @@ -106,25 +113,48 @@ class DramaFeverSeriesIE(DramaFeverIE): 'info_dict': { 'id': '124', 'title': 'IRIS', - 'description': 'Lee Byung Hun and Kim Tae Hee star in this powerhouse drama and ratings megahit of action, intrigue and romance.', + 'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862', }, 'playlist_count': 20, }] + _CONSUMER_SECRET = 'DA59dtVXYLxajktV' + _PAGE_SIZE = 5 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-) + + def _get_consumer_secret(self, video_id): + mainjs = self._download_webpage( + 'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js', + video_id, 'Downloading main.js', fatal=False) + if not mainjs: + return self._CONSUMER_SECRET + return self._search_regex( + r"var\s+cs\s*=\s*'([^']+)'", mainjs, + 'consumer secret', default=self._CONSUMER_SECRET) + def _real_extract(self, url): series_id = self._match_id(url) + consumer_secret = self._get_consumer_secret(series_id) - series_json = self._download_json( - "http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s" % (consumer_secret, series_id), - series_id, note='Downloading series metadata')["series"][series_id] + series = self._download_json( + 'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s' + % (consumer_secret, series_id), + series_id, 'Downloading series JSON')['series'][series_id] - title = series_json["name"] - description = series_json["description_short"] + title = clean_html(series['name']) + description = clean_html(series.get('description') or series.get('description_short')) - episodes = self._get_episodes(series_id, consumer_secret) entries = [] - for ep in episodes: - entries.append(self.url_result( - 'http://www.dramafever.com%s' % ep['episode_url'], 'DramaFever', ep['guid'])) + for page_num in itertools.count(1): + episodes = self._download_json( + 'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d' + % (consumer_secret, series_id, self._PAGE_SIZE, page_num), + series_id, 'Downloading episodes JSON page #%d' % page_num) + for episode in episodes.get('value', []): + entries.append(self.url_result( + compat_urlparse.urljoin(url, episode['episode_url']), + 'DramaFever', episode.get('guid'))) + if page_num == episodes['num_pages']: + break + return self.playlist_result(entries, series_id, title, description) From 70a2002399b46aa0cde2879d856d1bb68e2c6f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jun 2015 09:50:23 +0600 Subject: [PATCH 0968/2721] [dramafever:series] Fix _VALID_URL (Closes #5973) --- youtube_dl/extractor/dramafever.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 0f33a61a3..42e0df24e 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -99,7 +99,7 @@ class DramaFeverIE(InfoExtractor): class DramaFeverSeriesIE(InfoExtractor): IE_NAME = 'dramafever:series' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+)(?:/(?:(?!\d).+)?)?$' + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$' _TESTS = [{ 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/', 'info_dict': { From 463b2e5542a85d5cd41b255a71833fec7b4f51e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jun 2015 09:51:07 +0600 Subject: [PATCH 0969/2721] [dramafever:series] Rollback _PAGE_SIZE to max possible --- youtube_dl/extractor/dramafever.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 42e0df24e..c4b7c0b68 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -119,7 +119,7 @@ class DramaFeverSeriesIE(InfoExtractor): }] _CONSUMER_SECRET = 'DA59dtVXYLxajktV' - _PAGE_SIZE = 5 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-) + _PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-) def _get_consumer_secret(self, video_id): mainjs = self._download_webpage( From 450d89ddc12d80a500a2429632d35a0045cf630b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 14 Jun 2015 09:58:26 +0600 Subject: [PATCH 0970/2721] [dramafever] Improve _VALID_URL --- youtube_dl/extractor/dramafever.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index c4b7c0b68..a34aad486 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -19,7 +19,7 @@ from ..utils import ( class DramaFeverIE(InfoExtractor): IE_NAME = 'dramafever' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+/[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+/[0-9]+)(?:/|$)' _TEST = { 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', 'info_dict': { From 976b03c56bddf20c978820474e307457523f4c05 Mon Sep 17 00:00:00 2001 From: chaoskagami Date: Sun, 14 Jun 2015 00:18:40 -0400 Subject: [PATCH 0971/2721] Quality note for niconico - at least notify whether you'll get low or src --- youtube_dl/extractor/niconico.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 3cecebf95..e10348004 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -184,6 +184,11 @@ class NiconicoIE(InfoExtractor): extension = determine_ext(video_real_url) video_format = extension.upper() + if video_real_url.endswith('low'): + format_note = 'low' + else: + format_note = 'src' + thumbnail = ( xpath_text(video_info, './/thumbnail_url') or self._html_search_meta('image', webpage, 'thumbnail', default=None) or @@ -242,6 +247,7 @@ class NiconicoIE(InfoExtractor): 'title': title, 'ext': extension, 'format': video_format, + 'format_note' : format_note, 'thumbnail': thumbnail, 'description': description, 'uploader': uploader, From 180940e02df60129bce36035b4a2fd79c0c60995 Mon Sep 17 00:00:00 2001 From: Shrimadhav U K Date: Sun, 14 Jun 2015 11:19:42 +0530 Subject: [PATCH 0972/2721] spelling mistake corrected acces changed to accessing --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b1f792d4e..aacec2958 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -119,7 +119,7 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. - videopassword: Password for acces a video. + videopassword: Password for accessing a video. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. From 755a9d3d1a8f99b061c8d29525d629b8ad6061a4 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sun, 14 Jun 2015 20:58:15 +0300 Subject: [PATCH 0973/2721] [tvplay] Add support for NovaTv --- youtube_dl/extractor/tvplay.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index e83e31a31..79863e781 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -26,6 +26,7 @@ class TVPlayIE(InfoExtractor): viasat4play\.no/programmer| tv6play\.no/programmer| tv3play\.dk/programmer| + play\.novatv\.bg/programi )/[^/]+/(?P\d+) ''' _TESTS = [ @@ -173,6 +174,22 @@ class TVPlayIE(InfoExtractor): 'skip_download': True, }, }, + { + 'url': 'http://play.novatv.bg/programi/zdravei-bulgariya/624952?autostart=true', + 'info_dict': { + 'id': '624952', + 'ext': 'flv', + 'title': 'Здравей, България (12.06.2015 г.) ', + 'description': 'md5:99f3700451ac5bb71a260268b8daefd7', + 'duration': 8838, + 'timestamp': 1434100372, + 'upload_date': '20150612', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, ] def _real_extract(self, url): From 9fd24e3a227a059eed07b679dac858e5bd747123 Mon Sep 17 00:00:00 2001 From: jomo Date: Sun, 14 Jun 2015 21:50:03 +0200 Subject: [PATCH 0974/2721] LiveLeak: support more original videos some (old?) videos use ...mp4.h264_270p.mp4... instead of ...mp4.h264_base.mp4... This is an addition to #4768 --- youtube_dl/extractor/liveleak.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index 35822067f..431f2e85d 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -85,7 +85,7 @@ class LiveLeakIE(InfoExtractor): 'url': s['file'], } for i, s in enumerate(sources)] for i, s in enumerate(sources): - orig_url = s['file'].replace('.h264_base.mp4', '') + orig_url = re.sub(r'.h264_.+\.mp4', '', s['file']) if s['file'] != orig_url: formats.append({ 'format_id': 'original-%s' % i, From 8f75761f24f9f2599efe100b5a094182af6403d0 Mon Sep 17 00:00:00 2001 From: jomo Date: Sun, 14 Jun 2015 22:41:44 +0200 Subject: [PATCH 0975/2721] LiveLak: add test for URLs with 'h264_270p' --- youtube_dl/extractor/liveleak.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index 431f2e85d..c658cc92b 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -40,6 +40,16 @@ class LiveLeakIE(InfoExtractor): 'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck', 'age_limit': 18, } + }, { + 'url': 'http://www.liveleak.com/view?i=801_1409392012', + 'md5': '0b3bec2d888c20728ca2ad3642f0ef15', + 'info_dict': { + 'id': '801_1409392012', + 'ext': 'mp4', + 'description': "Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.", + 'uploader': 'bony333', + 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia' + } }] def _real_extract(self, url): From 00ac23e6e06bf6de59d5d5b3f42ff64ce039fee2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jun 2015 02:51:21 +0600 Subject: [PATCH 0976/2721] [liveleak] Improve regex for restoring original video URL --- youtube_dl/extractor/liveleak.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index c658cc92b..e82f21ea7 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -95,7 +95,7 @@ class LiveLeakIE(InfoExtractor): 'url': s['file'], } for i, s in enumerate(sources)] for i, s in enumerate(sources): - orig_url = re.sub(r'.h264_.+\.mp4', '', s['file']) + orig_url = re.sub(r'\.h264_.+?\.mp4', '', s['file']) if s['file'] != orig_url: formats.append({ 'format_id': 'original-%s' % i, From afa1ded425ffe12a5b90bcd4d316c43941a5dc1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jun 2015 02:54:05 +0600 Subject: [PATCH 0977/2721] [liveleak] Clarify rationale for restoring raw video --- youtube_dl/extractor/liveleak.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index e82f21ea7..0a4e473d6 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -95,6 +95,9 @@ class LiveLeakIE(InfoExtractor): 'url': s['file'], } for i, s in enumerate(sources)] for i, s in enumerate(sources): + # Removing '.h264_*.mp4' gives the raw video, which is essentially + # the same video without the LiveLeak logo at the top (see + # https://github.com/rg3/youtube-dl/pull/4768) orig_url = re.sub(r'\.h264_.+?\.mp4', '', s['file']) if s['file'] != orig_url: formats.append({ From b95cfa917025750805fb873fc4e2eb161241b22b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jun 2015 02:54:49 +0600 Subject: [PATCH 0978/2721] [liveleak] Clarify test --- youtube_dl/extractor/liveleak.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index 0a4e473d6..857edfde2 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -41,6 +41,7 @@ class LiveLeakIE(InfoExtractor): 'age_limit': 18, } }, { + # Covers https://github.com/rg3/youtube-dl/pull/5983 'url': 'http://www.liveleak.com/view?i=801_1409392012', 'md5': '0b3bec2d888c20728ca2ad3642f0ef15', 'info_dict': { From 5774ef35c4d167f7c959041bf4efc5581a98f0a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jun 2015 02:57:07 +0600 Subject: [PATCH 0979/2721] [options] Add missing whitespace for --fixup description --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 689fa7595..740458e51 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -725,7 +725,7 @@ def parseOpts(overrideArguments=None): metavar='POLICY', dest='fixup', default='detect_or_warn', help='Automatically correct known faults of the file. ' 'One of never (do nothing), warn (only emit a warning), ' - 'detect_or_warn(the default; fix file if we can, warn otherwise)') + 'detect_or_warn (the default; fix file if we can, warn otherwise)') postproc.add_option( '--prefer-avconv', action='store_false', dest='prefer_ffmpeg', From 67d95f177c7ffedfc8f8b086535013a1a7a48b29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jun 2015 03:43:33 +0600 Subject: [PATCH 0980/2721] [niconico] Simplify format info --- youtube_dl/extractor/niconico.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index e10348004..0f8aa5ada 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -182,12 +182,6 @@ class NiconicoIE(InfoExtractor): extension = xpath_text(video_info, './/movie_type') if not extension: extension = determine_ext(video_real_url) - video_format = extension.upper() - - if video_real_url.endswith('low'): - format_note = 'low' - else: - format_note = 'src' thumbnail = ( xpath_text(video_info, './/thumbnail_url') or @@ -246,8 +240,7 @@ class NiconicoIE(InfoExtractor): 'url': video_real_url, 'title': title, 'ext': extension, - 'format': video_format, - 'format_note' : format_note, + 'format_id': 'economy' if video_real_url.endswith('low') else 'normal', 'thumbnail': thumbnail, 'description': description, 'uploader': uploader, From 2a0fcf6113c3f0c8d0510167fd7017cc0fdfa622 Mon Sep 17 00:00:00 2001 From: zx8 Date: Mon, 15 Jun 2015 00:27:43 +0100 Subject: [PATCH 0981/2721] [safari] make url regex more lenient --- youtube_dl/extractor/safari.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 10251f29e..20ba6fa33 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -83,7 +83,7 @@ class SafariIE(SafariBaseIE): library/view/[^/]+| api/v1/book )/ - (?P\d+)/ + (?P[^/]+)/ (?:chapter(?:-content)?/)? (?Ppart\d+)\.html ''' @@ -122,7 +122,7 @@ class SafariCourseIE(SafariBaseIE): IE_NAME = 'safari:course' IE_DESC = 'safaribooksonline.com online courses' - _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P\d+)/?(?:[#?]|$)' + _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P[^/]+)/?(?:[#?]|$)' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', From 4b9f9010b0d744969189c383e98e8729f9fe9623 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 15 Jun 2015 01:35:50 +0200 Subject: [PATCH 0982/2721] release 2015.06.15 --- README.md | 4 ++-- docs/supportedsites.md | 7 +++++++ youtube_dl/version.py | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f3d83c89f..5f3a08f5a 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ which means you can modify it, redistribute it or use it however you like. -i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist --abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs --dump-user-agent Display the current browser identification - --list-extractors List all supported extractors and the URLs they would handle + --list-extractors List all supported extractors --extractor-descriptions Output descriptions of all supported extractors --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The @@ -223,7 +223,7 @@ which means you can modify it, redistribute it or use it however you like. parameters replace existing values. Additional templates: %(album)s, %(artist)s. Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise" --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards) - --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; + --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the default; fix file if we can, warn otherwise) --prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default) --prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d147b53fe..220e52b98 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -120,6 +120,8 @@ - **divxstage**: DivxStage - **Dotsub** - **DouyuTV** + - **dramafever** + - **dramafever:series** - **DRBonanza** - **Dropbox** - **DrTuber** @@ -153,6 +155,7 @@ - **fernsehkritik.tv** - **fernsehkritik.tv:postecke** - **Firstpost** + - **FiveTV** - **Flickr** - **Folketinget**: Folketinget (ft.dk; Danish parliament) - **FootyRoom** @@ -217,6 +220,7 @@ - **instagram:user**: Instagram user profile - **InternetVideoArchive** - **IPrima** + - **iqiyi** - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations - **Izlesene** @@ -407,6 +411,7 @@ - **rutube:movie**: Rutube movies - **rutube:person**: Rutube person videos - **RUTV**: RUTV.RU + - **Ruutu** - **safari**: safaribooksonline.com online video - **safari:course**: safaribooksonline.com online courses - **Sandia**: Sandia National Laboratories @@ -519,6 +524,8 @@ - **TV2** - **TV2Article** - **TV4**: tv4.se and tv4play.se + - **TVC** + - **TVCArticle** - **tvigle**: Интернет-телевидение Tvigle.ru - **tvp.pl** - **tvp.pl:Series** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9cf84ff71..34a13cb81 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.06.04.1' +__version__ = '2015.06.15' From 4af98ecdfb896c16e73cb9f7306908cc686782e8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Jun 2015 18:49:27 +0800 Subject: [PATCH 0983/2721] [vbox7] Fix extraction (fixes #5967) --- youtube_dl/extractor/vbox7.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py index dd026748d..722eb5236 100644 --- a/youtube_dl/extractor/vbox7.py +++ b/youtube_dl/extractor/vbox7.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..compat import ( compat_urllib_parse, compat_urllib_request, + compat_urlparse, ) from ..utils import ( ExtractorError, @@ -26,11 +27,21 @@ class Vbox7IE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - redirect_page, urlh = self._download_webpage_handle(url, video_id) - new_location = self._search_regex(r'window\.location = \'(.*)\';', - redirect_page, 'redirect location') - redirect_url = urlh.geturl() + new_location - webpage = self._download_webpage(redirect_url, video_id, + # need to get the page 3 times for the correct jsSecretToken cookie + # which is necessary for the correct title + def get_session_id(): + redirect_page = self._download_webpage(url, video_id) + session_id_url = self._search_regex( + r'var\s*url\s*=\s*\'([^\']+)\';', redirect_page, + 'session id url') + self._download_webpage( + compat_urlparse.urljoin(url, session_id_url), video_id, + 'Getting session id') + + get_session_id() + get_session_id() + + webpage = self._download_webpage(url, video_id, 'Downloading redirect page') title = self._html_search_regex(r'(.*)', From aed473ccf9d9da73b1b80ee8b06d00ee66a3769d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Jun 2015 22:41:24 +0800 Subject: [PATCH 0984/2721] [youku] PEP8 --- youtube_dl/extractor/youku.py | 80 +++++++++++++++++------------------ 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index aed6b960a..4e47fca8a 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -11,6 +11,7 @@ from ..compat import compat_urllib_parse bytes_is_str = (bytes == str) # for compatible + class YoukuIE(InfoExtractor): IE_NAME = 'youku' _VALID_URL = r'''(?x) @@ -21,13 +22,13 @@ class YoukuIE(InfoExtractor): ''' _TEST = { - 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', - 'md5': '5f3af4192eabacc4501508d54a8cabd7', - 'info_dict': { - 'id': 'XMTc1ODE5Njcy', - 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', - 'ext': 'flv' - } + 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', + 'md5': '5f3af4192eabacc4501508d54a8cabd7', + 'info_dict': { + 'id': 'XMTc1ODE5Njcy', + 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', + 'ext': 'flv' + } } def construct_video_urls(self, data1, data2): @@ -36,7 +37,7 @@ class YoukuIE(InfoExtractor): ls = list(range(256)) t = 0 for i in range(256): - t = (t + ls[i] + ord(s1[i%len(s1)])) % 256 + t = (t + ls[i] + ord(s1[i % len(s1)])) % 256 ls[i], ls[t] = ls[t], ls[i] s = '' if not bytes_is_str else b'' x, y = 0, 0 @@ -45,16 +46,16 @@ class YoukuIE(InfoExtractor): x = (x + ls[y]) % 256 ls[x], ls[y] = ls[y], ls[x] if isinstance(s2[i], int): - s += chr(s2[i] ^ ls[(ls[x]+ls[y]) % 256]) + s += chr(s2[i] ^ ls[(ls[x] + ls[y]) % 256]) else: - s += chr(ord(s2[i]) ^ ls[(ls[x]+ls[y]) % 256]) + s += chr(ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256]) return s sid, token = yk_t( 'becaf9be', - base64.b64decode(bytes(data2['ep'], 'ascii')) \ - if not bytes_is_str \ - else base64.b64decode(data2['ep']) + base64.b64decode(bytes(data2['ep'], 'ascii')) + if not bytes_is_str + else base64.b64decode(data2['ep']) ).split('_') # get oip @@ -89,13 +90,13 @@ class YoukuIE(InfoExtractor): fileid = get_fileid(format, n) ep_t = yk_t( 'bf7e5f01', - bytes('%s_%s_%s' % (sid, fileid, token), 'ascii') \ - if not bytes_is_str \ + bytes('%s_%s_%s' % (sid, fileid, token), 'ascii') + if not bytes_is_str else ('%s_%s_%s' % (sid, fileid, token)) ) ep = base64.b64encode( - bytes(ep_t, 'latin') \ - if not bytes_is_str \ + bytes(ep_t, 'latin') + if not bytes_is_str else ep_t ).decode() return ep @@ -121,9 +122,9 @@ class YoukuIE(InfoExtractor): video_url = \ 'http://k.youku.com/player/getFlvPath/' + \ 'sid/' + sid + \ - '_' + str(int(n)+1).zfill(2) + \ + '_' + str(int(n) + 1).zfill(2) + \ '/st/' + self.parse_ext_l(format) + \ - '/fileid/' + get_fileid(format, n) + '?' + \ + '/fileid/' + get_fileid(format, n) + '?' + \ compat_urllib_parse.urlencode(param) video_urls.append(video_url) video_urls_dict[format] = video_urls @@ -132,34 +133,34 @@ class YoukuIE(InfoExtractor): def get_hd(self, fm): hd_id_dict = { - 'flv' : '0', - 'mp4' : '1', - 'hd2' : '2', - 'hd3' : '3', - '3gp' : '0', - '3gphd' : '1' + 'flv': '0', + 'mp4': '1', + 'hd2': '2', + 'hd3': '3', + '3gp': '0', + '3gphd': '1' } return hd_id_dict[fm] def parse_ext_l(self, fm): ext_dict = { - 'flv' : 'flv', - 'mp4' : 'mp4', - 'hd2' : 'flv', - 'hd3' : 'flv', - '3gp' : 'flv', - '3gphd' : 'mp4' + 'flv': 'flv', + 'mp4': 'mp4', + 'hd2': 'flv', + 'hd3': 'flv', + '3gp': 'flv', + '3gphd': 'mp4' } return ext_dict[fm] def get_format_name(self, fm): _dict = { - '3gp' : 'h6', - '3gphd' : 'h5', - 'flv' : 'h4', - 'mp4' : 'h3', - 'hd2' : 'h2', - 'hd3' : 'h1' + '3gp': 'h6', + '3gphd': 'h5', + 'flv': 'h4', + 'mp4': 'h3', + 'hd2': 'h2', + 'hd3': 'h1' } return _dict[fm] @@ -194,10 +195,9 @@ class YoukuIE(InfoExtractor): # construct info entries = [] for fm in data1['streamtypes']: - #formats = [] video_urls = video_urls_dict[fm] for i in range(len(video_urls)): - if len(entries) < i+1: + if len(entries) < i + 1: entries.append({'formats': []}) entries[i]['formats'].append( { @@ -211,7 +211,7 @@ class YoukuIE(InfoExtractor): for i in range(len(entries)): entries[i].update( { - 'id': '_part%d' % (i+1), + 'id': '_part%d' % (i + 1), 'title': title, } ) From 054932f4035d606946f0c054c02cf87496b753f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jun 2015 20:46:10 +0600 Subject: [PATCH 0985/2721] [vk] Fix extraction (Closes #5987) --- youtube_dl/extractor/vk.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index d0e772108..6aeba109d 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -175,16 +175,16 @@ class VKIE(InfoExtractor): m_rutube.group(1).replace('\\', '')) return self.url_result(rutube_url) - m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page) + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page) if m_opts: - m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1)) + m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1)) if m_opts_url: opts_url = m_opts_url.group(1) if opts_url.startswith('//'): opts_url = 'http:' + opts_url return self.url_result(opts_url) - data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars') + data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars') data = json.loads(data_json) # Extract upload date From 7c7dd9dc7fe8139196c7bd1c512301a61f9f362b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jun 2015 20:47:01 +0600 Subject: [PATCH 0986/2721] [vk] Fix upload date extraction --- youtube_dl/extractor/vk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 6aeba109d..f974f8fef 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -189,7 +189,7 @@ class VKIE(InfoExtractor): # Extract upload date upload_date = None - mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page) + mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page) if mobj is not None: mobj.group(1) + ' ' + mobj.group(2) upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2)) From 8117df4cd9e49a3c7369db3cab6c0b94365c7786 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jun 2015 20:55:25 +0600 Subject: [PATCH 0987/2721] [vk] Extract view count --- youtube_dl/extractor/vk.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index f974f8fef..38ff3c1a9 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -13,6 +13,7 @@ from ..compat import ( from ..utils import ( ExtractorError, orderedSet, + str_to_int, unescapeHTML, unified_strdate, ) @@ -34,6 +35,7 @@ class VKIE(InfoExtractor): 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', 'duration': 195, 'upload_date': '20120212', + 'view_count': int, }, }, { @@ -45,7 +47,8 @@ class VKIE(InfoExtractor): 'uploader': 'Tom Cruise', 'title': 'No name', 'duration': 9, - 'upload_date': '20130721' + 'upload_date': '20130721', + 'view_count': int, } }, { @@ -59,6 +62,7 @@ class VKIE(InfoExtractor): 'title': 'Lin Dan', 'duration': 101, 'upload_date': '20120730', + 'view_count': int, } }, { @@ -73,7 +77,8 @@ class VKIE(InfoExtractor): 'uploader': 'Триллеры', 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]', 'duration': 8352, - 'upload_date': '20121218' + 'upload_date': '20121218', + 'view_count': int, }, 'skip': 'Requires vk account credentials', }, @@ -100,6 +105,7 @@ class VKIE(InfoExtractor): 'title': 'Книга Илая', 'duration': 6771, 'upload_date': '20140626', + 'view_count': int, }, 'skip': 'Only works from Russia', }, @@ -194,6 +200,10 @@ class VKIE(InfoExtractor): mobj.group(1) + ' ' + mobj.group(2) upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2)) + view_count = str_to_int(self._search_regex( + r'"mv_views_count_number"[^>]*>([\d,.]+) views<', + info_page, 'view count', fatal=False)) + formats = [{ 'format_id': k, 'url': v, @@ -210,6 +220,7 @@ class VKIE(InfoExtractor): 'uploader': data.get('md_author'), 'duration': data.get('duration'), 'upload_date': upload_date, + 'view_count': view_count, } From 02175a7986c4223a0ed27a872c1ca16926913e05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jun 2015 21:01:26 +0600 Subject: [PATCH 0988/2721] [youtube:search] Fix search query (Closes #5988) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 3448bec4f..9e2671192 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1504,7 +1504,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): for pagenum in itertools.count(1): url_query = { - 'search_query': query, + 'search_query': query.encode('utf-8'), 'page': pagenum, 'spf': 'navigate', } From c203be3fb4f00388c81564dc0c85ff8a10ff4553 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Jun 2015 23:28:59 +0800 Subject: [PATCH 0989/2721] [youku] Better handling for Python 2/3 compatibility --- youtube_dl/extractor/youku.py | 37 ++++++++++++----------------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 4e47fca8a..26e5baadc 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -7,9 +7,10 @@ import base64 from .common import InfoExtractor from ..utils import ExtractorError -from ..compat import compat_urllib_parse - -bytes_is_str = (bytes == str) # for compatible +from ..compat import ( + compat_urllib_parse, + compat_ord, +) class YoukuIE(InfoExtractor): @@ -37,26 +38,20 @@ class YoukuIE(InfoExtractor): ls = list(range(256)) t = 0 for i in range(256): - t = (t + ls[i] + ord(s1[i % len(s1)])) % 256 + t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256 ls[i], ls[t] = ls[t], ls[i] - s = '' if not bytes_is_str else b'' + s = bytearray() x, y = 0, 0 for i in range(len(s2)): y = (y + 1) % 256 x = (x + ls[y]) % 256 ls[x], ls[y] = ls[y], ls[x] - if isinstance(s2[i], int): - s += chr(s2[i] ^ ls[(ls[x] + ls[y]) % 256]) - else: - s += chr(ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256]) - return s + s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256]) + return bytes(s) sid, token = yk_t( - 'becaf9be', - base64.b64decode(bytes(data2['ep'], 'ascii')) - if not bytes_is_str - else base64.b64decode(data2['ep']) - ).split('_') + b'becaf9be', base64.b64decode(data2['ep'].encode('ascii')) + ).decode('ascii').split('_') # get oip oip = data2['ip'] @@ -89,16 +84,10 @@ class YoukuIE(InfoExtractor): def generate_ep(format, n): fileid = get_fileid(format, n) ep_t = yk_t( - 'bf7e5f01', - bytes('%s_%s_%s' % (sid, fileid, token), 'ascii') - if not bytes_is_str - else ('%s_%s_%s' % (sid, fileid, token)) + b'bf7e5f01', + ('%s_%s_%s' % (sid, fileid, token)).encode('ascii') ) - ep = base64.b64encode( - bytes(ep_t, 'latin') - if not bytes_is_str - else ep_t - ).decode() + ep = base64.b64encode(ep_t).decode('ascii') return ep # generate video_urls From 99e6833c85868b78df7c810603ffdccdaeb4eaf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jun 2015 21:30:27 +0600 Subject: [PATCH 0990/2721] [francetv] Update f4m manifest token URL (Closes #5981, Closes #5989) --- youtube_dl/extractor/francetv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index edf555b29..db0bbec1e 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -60,7 +60,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor): continue video_url_parsed = compat_urllib_parse_urlparse(video_url) f4m_url = self._download_webpage( - 'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path, + 'http://hdfauth.francetv.fr/esi/TA?url=%s' % video_url_parsed.path, video_id, 'Downloading f4m manifest token', fatal=False) if f4m_url: formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id)) From 9383e66f9475eca0e64c09972c1392d92d17570c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Jun 2015 23:31:30 +0800 Subject: [PATCH 0991/2721] [youku] Use _match_id --- youtube_dl/extractor/youku.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 26e5baadc..e41b48369 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re import base64 from .common import InfoExtractor @@ -154,8 +153,7 @@ class YoukuIE(InfoExtractor): return _dict[fm] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) # request basic data data1_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id From ee69799262e8344742b9d8b492fe792b4d586f6a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Jun 2015 23:36:28 +0800 Subject: [PATCH 0992/2721] [youku] Add a v.swf test case --- youtube_dl/extractor/youku.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index e41b48369..d8162a0c5 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -21,7 +21,7 @@ class YoukuIE(InfoExtractor): (?P[A-Za-z0-9]+)(?:\.html|/v\.swf|) ''' - _TEST = { + _TESTS = [{ 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', 'md5': '5f3af4192eabacc4501508d54a8cabd7', 'info_dict': { @@ -29,7 +29,10 @@ class YoukuIE(InfoExtractor): 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', 'ext': 'flv' } - } + }, { + 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf', + 'only_matching': True, + }] def construct_video_urls(self, data1, data2): # get sid, token From 4fd35ee072a39654f11e794db5c50ee375c9a7c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jun 2015 21:36:30 +0600 Subject: [PATCH 0993/2721] [safari] Add test for #5985 --- youtube_dl/extractor/safari.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 20ba6fa33..f3c80708c 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -100,6 +100,10 @@ class SafariIE(SafariBaseIE): }, { 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', 'only_matching': True, + }, { + # non-digits in course id + 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', + 'only_matching': True, }] def _real_extract(self, url): From f1e66cb2eb40b48c6508acbe57207a2d99792bf0 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Jun 2015 23:46:07 +0800 Subject: [PATCH 0994/2721] [youku] Change video_id and add a multipart test case --- youtube_dl/extractor/youku.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index d8162a0c5..d5b73ebce 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -25,13 +25,20 @@ class YoukuIE(InfoExtractor): 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', 'md5': '5f3af4192eabacc4501508d54a8cabd7', 'info_dict': { - 'id': 'XMTc1ODE5Njcy', + 'id': 'XMTc1ODE5Njcy_part1', 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', 'ext': 'flv' } }, { 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf', 'only_matching': True, + }, { + 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html', + 'info_dict': { + 'id': 'XODgxNjg1Mzk2', + 'title': '武媚娘传奇 85', + }, + 'playlist_count': 11, }] def construct_video_urls(self, data1, data2): @@ -201,20 +208,14 @@ class YoukuIE(InfoExtractor): for i in range(len(entries)): entries[i].update( { - 'id': '_part%d' % (i + 1), + 'id': '%s_part%d' % (video_id, i + 1), 'title': title, } ) - if len(entries) > 1: - info = { - '_type': 'multi_video', - 'id': video_id, - 'title': title, - 'entries': entries, - } - else: - info = entries[0] - info['id'] = video_id - - return info + return { + '_type': 'multi_video', + 'id': video_id, + 'title': title, + 'entries': entries, + } From 04e7596680bce28beae2436bac0f6d1f01a45210 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 15 Jun 2015 23:54:55 +0800 Subject: [PATCH 0995/2721] [youku] Better error handling --- youtube_dl/extractor/youku.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index d5b73ebce..91f9f6bff 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -176,13 +176,15 @@ class YoukuIE(InfoExtractor): error_code = data1.get('error_code') if error_code: - # -8 means blocked outside China. - # Chinese and English, separated by newline. error = data1.get('error') - raise ExtractorError( - error or 'Server reported error %i' % - error_code, - expected=True) + if error is not None and '因版权原因无法观看此视频' in error: + raise ExtractorError( + 'Youku said: Sorry, this video is available in China only', expected=True) + else: + msg = 'Youku server reported error %i' % error_code + if error is not None: + msg += ': ' + error + raise ExtractorError(msg) title = data1['title'] From 5228b756af2c2bfc2962a5b1bb6db1e6a41c9e05 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 16 Jun 2015 00:06:23 +0800 Subject: [PATCH 0996/2721] [youku] Add cn_verification_proxy support and add a georestricted test case --- youtube_dl/extractor/youku.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 91f9f6bff..ea37dc8b2 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -9,6 +9,7 @@ from ..utils import ExtractorError from ..compat import ( compat_urllib_parse, compat_ord, + compat_urllib_request, ) @@ -39,6 +40,14 @@ class YoukuIE(InfoExtractor): 'title': '武媚娘传奇 85', }, 'playlist_count': 11, + }, { + 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html', + 'info_dict': { + 'id': 'XMTI1OTczNDM5Mg', + 'title': '花千骨 04', + }, + 'playlist_count': 13, + 'skip': 'Available in China only', }] def construct_video_urls(self, data1, data2): @@ -165,14 +174,23 @@ class YoukuIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - # request basic data - data1_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id - data2_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id + def retrieve_data(req_url, note): + req = compat_urllib_request.Request(req_url) - raw_data1 = self._download_json(data1_url, video_id) - raw_data2 = self._download_json(data2_url, video_id) - data1 = raw_data1['data'][0] - data2 = raw_data2['data'][0] + cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') + if cn_verification_proxy: + req.add_header('Ytdl-request-proxy', cn_verification_proxy) + + raw_data = self._download_json(req, video_id, note=note) + return raw_data['data'][0] + + # request basic data + data1 = retrieve_data( + 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id, + 'Downloading JSON metadata 1') + data2 = retrieve_data( + 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id, + 'Downloading JSON metadata 2') error_code = data1.get('error_code') if error_code: From a155b7e76c5a71c650f62c4716d23a24943fc373 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 16 Jun 2015 00:15:09 +0800 Subject: [PATCH 0997/2721] [youku] Coding style --- youtube_dl/extractor/youku.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index ea37dc8b2..cab5be3a4 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -216,22 +216,18 @@ class YoukuIE(InfoExtractor): for i in range(len(video_urls)): if len(entries) < i + 1: entries.append({'formats': []}) - entries[i]['formats'].append( - { - 'url': video_urls[i], - 'format_id': self.get_format_name(fm), - 'ext': self.parse_ext_l(fm), - 'filesize': int(data1['segs'][fm][i]['size']) - } - ) + entries[i]['formats'].append({ + 'url': video_urls[i], + 'format_id': self.get_format_name(fm), + 'ext': self.parse_ext_l(fm), + 'filesize': int(data1['segs'][fm][i]['size']) + }) for i in range(len(entries)): - entries[i].update( - { - 'id': '%s_part%d' % (video_id, i + 1), - 'title': title, - } - ) + entries[i].update({ + 'id': '%s_part%d' % (video_id, i + 1), + 'title': title, + }) return { '_type': 'multi_video', From 0501bfa159db5b5e8ed7fd1ed966b9989becb3e9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 16 Jun 2015 00:15:30 +0800 Subject: [PATCH 0998/2721] [YoutubeDL] Youku extractor now uses the standard format selection --- youtube_dl/YoutubeDL.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index aacec2958..6e4b6f566 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1033,12 +1033,6 @@ class YoutubeDL(object): info_dict['id'], info_dict.get('subtitles'), info_dict.get('automatic_captions')) - # This extractors handle format selection themselves - if info_dict['extractor'] in ['Youku']: - if download: - self.process_info(info_dict) - return info_dict - # We now pick which formats have to be downloaded if info_dict.get('formats') is None: # There's only one format available From 7f0172b3e5e0da2a19708fdf3ec1b521a6e2656f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 15 Jun 2015 22:29:41 +0600 Subject: [PATCH 0999/2721] Credit @jackyzy823 for iqiyi --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index bf2a25cb8..889d599a2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -127,3 +127,4 @@ Julian Richen Ping O. Mister Hat Peter Ding +jackyzy823 From f3aecb27a4d7b178ae66b4a294cff5dbe9bb2b18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 16 Jun 2015 14:41:52 +0200 Subject: [PATCH 1000/2721] [youku] Simplify a bit the 'entries' construction Mainly avoid having to use an index. --- youtube_dl/extractor/youku.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index cab5be3a4..ced3a10cd 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -210,25 +210,23 @@ class YoukuIE(InfoExtractor): video_urls_dict = self.construct_video_urls(data1, data2) # construct info - entries = [] + entries = [{ + 'id': '%s_part%d' % (video_id, i + 1), + 'title': title, + 'formats': [], + # some formats are not available for all parts, we have to detect + # which one has all + } for i in range(max(len(v) for v in data1['segs'].values()))] for fm in data1['streamtypes']: video_urls = video_urls_dict[fm] - for i in range(len(video_urls)): - if len(entries) < i + 1: - entries.append({'formats': []}) - entries[i]['formats'].append({ - 'url': video_urls[i], + for video_url, seg, entry in zip(video_urls, data1['segs'][fm], entries): + entry['formats'].append({ + 'url': video_url, 'format_id': self.get_format_name(fm), 'ext': self.parse_ext_l(fm), - 'filesize': int(data1['segs'][fm][i]['size']) + 'filesize': int(seg['size']), }) - for i in range(len(entries)): - entries[i].update({ - 'id': '%s_part%d' % (video_id, i + 1), - 'title': title, - }) - return { '_type': 'multi_video', 'id': video_id, From 447053668fbed993f6f4fd2e06d9282ea30224bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 16 Jun 2015 21:19:18 +0600 Subject: [PATCH 1001/2721] [spankwire] Fix extraction --- youtube_dl/extractor/spankwire.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 06d6e6640..bff75d6b2 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -27,7 +27,7 @@ class SpankwireIE(InfoExtractor): 'description': 'Crazy Bitch X rated music video.', 'uploader': 'oreusz', 'uploader_id': '124697', - 'upload_date': '20070508', + 'upload_date': '20070507', 'age_limit': 18, } } @@ -44,7 +44,7 @@ class SpankwireIE(InfoExtractor): title = self._html_search_regex( r'

    ([^<]+)', webpage, 'title') description = self._html_search_regex( - r'([^<]+)<', + r'(?s)(.+?)', webpage, 'description', fatal=False) thumbnail = self._html_search_regex( r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']', @@ -64,12 +64,12 @@ class SpankwireIE(InfoExtractor): r'
    ([\d,\.]+) views
    ', webpage, 'view count', fatal=False)) comment_count = str_to_int(self._html_search_regex( - r'Comments]+>\s*\(([\d,\.]+)\)', + r']*>([\d,\.]+)', webpage, 'comment count', fatal=False)) video_urls = list(map( compat_urllib_parse.unquote, - re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*["\']([^"\']+)["\']', webpage))) + re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage))) if webpage.find('flashvars\.encrypted = "true"') != -1: password = self._search_regex( r'flashvars\.video_title = "([^"]+)', From 14835de9fb41798c8e6e731a3f07ae871770666f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lio=20A=2E=20Heckert?= Date: Tue, 16 Jun 2015 18:10:31 -0300 Subject: [PATCH 1002/2721] Use shlex.split for --pp-params and update related docs. --- README.md | 2 +- youtube_dl/YoutubeDL.py | 1 + youtube_dl/__init__.py | 6 ++++-- youtube_dl/options.py | 4 ++-- youtube_dl/postprocessor/common.py | 3 ++- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 726ec9cf2..813ac4a15 100644 --- a/README.md +++ b/README.md @@ -214,7 +214,7 @@ which means you can modify it, redistribute it or use it however you like. --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5) --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|xvid) - --pp-params Extra parameters for video post-processor. The params will be splited on spaces. + --pp-params Extra parameters for video post-processor. -k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default --no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default --embed-subs Embed subtitles in the video (only for mkv and mp4 videos) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index b1f792d4e..3bfe30c76 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -261,6 +261,7 @@ class YoutubeDL(object): The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, otherwise prefer avconv. + pp_params: Extra parameters for external apps, like avconv. """ params = None diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5b28e4817..8b54d4ae2 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -171,8 +171,10 @@ def _real_main(argv=None): if opts.recodevideo is not None: if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'xvid']: parser.error('invalid video recode format specified') - if opts.pp_params is not None: - opts.pp_params = opts.pp_params.split() + if opts.pp_params is None: + opts.pp_params = [] + else: + opts.pp_params = shlex.split(opts.pp_params) if opts.convertsubtitles is not None: if opts.convertsubtitles not in ['srt', 'vtt', 'ass']: parser.error('invalid subtitle format specified') diff --git a/youtube_dl/options.py b/youtube_dl/options.py index ceb4b5f38..fbba9b9d8 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -689,8 +689,8 @@ def parseOpts(overrideArguments=None): help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|xvid)') postproc.add_option( '--pp-params', - dest='pp_params', default=None, - help='Extra parameters for video post-processor. The params will be splited on spaces.') + dest='pp_params', default=None, metavar='ARGS', + help='Extra parameters for video post-processor.') postproc.add_option( '-k', '--keep-video', action='store_true', dest='keepvideo', default=False, diff --git a/youtube_dl/postprocessor/common.py b/youtube_dl/postprocessor/common.py index 3b0e8ddd8..d944d9367 100644 --- a/youtube_dl/postprocessor/common.py +++ b/youtube_dl/postprocessor/common.py @@ -22,7 +22,8 @@ class PostProcessor(object): of the chain is reached. PostProcessor objects follow a "mutual registration" process similar - to InfoExtractor objects. + to InfoExtractor objects. And it can receive parameters from CLI trough + --pp-params. """ _downloader = None From 028a33d7f2a0bc028f533530d2722b57b31dabdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 17 Jun 2015 20:27:38 +0600 Subject: [PATCH 1003/2721] [lifenews] Fix extraction --- youtube_dl/extractor/lifenews.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 42cb6e35f..373122c93 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -82,10 +82,11 @@ class LifeNewsIE(InfoExtractor): view_count = self._html_search_regex( r'
    \s*(\d+)\s*
    ', webpage, 'view count', fatal=False) comment_count = self._html_search_regex( - r'
    \s*\s*(\d+)\s*', webpage, 'comment count', fatal=False) + r'=\'commentCount\'[^>]*>\s*(\d+)\s*<', + webpage, 'comment count', fatal=False) upload_date = self._html_search_regex( - r'
    \s*(.*?)\s*
    ', webpage, 'categories', fatal=False).split(', ') } # find and add the format From 62b742ece3ec6c7d7fd24898b5413b6b98a4ae8f Mon Sep 17 00:00:00 2001 From: George Brighton Date: Sat, 27 Jun 2015 20:51:11 +0100 Subject: [PATCH 1112/2721] [moviefap] Remove redundant comments --- youtube_dl/extractor/moviefap.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/youtube_dl/extractor/moviefap.py b/youtube_dl/extractor/moviefap.py index 23575d30a..b38a8e71f 100644 --- a/youtube_dl/extractor/moviefap.py +++ b/youtube_dl/extractor/moviefap.py @@ -70,19 +70,13 @@ class MovieFapIE(InfoExtractor): def _real_extract(self, url): - # find the video ID video_id = self._match_id(url) - - # retrieve the page HTML webpage = self._download_webpage(url, video_id) - # find the URL of the XML document detailing video download URLs + # find and retrieve the XML document detailing video download URLs info_url = self._html_search_regex(r'flashvars\.config = escape\("(.+?)"', webpage, 'player parameters') - - # download that XML xml = self._download_xml(info_url, video_id) - # create dictionary of properties we know so far, or can find easily info = { 'id': video_id, 'title': self._html_search_regex(r'

    (.*?)

    ', webpage, 'title'), From 43b925ce74efd0a011f7880dcdcc90f4cf3b8f4b Mon Sep 17 00:00:00 2001 From: George Brighton Date: Sat, 27 Jun 2015 20:52:12 +0100 Subject: [PATCH 1113/2721] [moviefap] Replace calls to `find()` with `util.xpath_text()`. --- youtube_dl/extractor/moviefap.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/moviefap.py b/youtube_dl/extractor/moviefap.py index b38a8e71f..6da93dbc9 100644 --- a/youtube_dl/extractor/moviefap.py +++ b/youtube_dl/extractor/moviefap.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import str_to_int +from ..utils import ( + xpath_text, + str_to_int +) class MovieFapIE(InfoExtractor): @@ -82,7 +85,7 @@ class MovieFapIE(InfoExtractor): 'title': self._html_search_regex(r'

    (.*?)

    ', webpage, 'title'), 'display_id': re.compile(self._VALID_URL).match(url).group('name'), 'thumbnails': self.__get_thumbnail_data(xml), - 'thumbnail': xml.find('startThumb').text, + 'thumbnail': xpath_text(xml, 'startThumb', 'thumbnail'), 'description': self._html_search_regex(r'name="description" value="(.*?)"', webpage, 'description', fatal=False), 'uploader_id': self._html_search_regex(r'name="username" value="(.*?)"', webpage, 'uploader_id', fatal=False), 'view_count': str_to_int(self._html_search_regex(r'
    Views ([0-9]+)', webpage, 'view_count, fatal=False')), @@ -102,7 +105,7 @@ class MovieFapIE(InfoExtractor): # work out the video URL(s) if xml.find('videoLink') is not None: # single format available - info['url'] = xml.find('videoLink').text + info['url'] = xpath_text(xml, 'videoLink', 'url', True) else: # multiple formats available info['formats'] = [] @@ -110,8 +113,8 @@ class MovieFapIE(InfoExtractor): # N.B. formats are already in ascending order of quality for item in xml.find('quality').findall('item'): info['formats'].append({ - 'url': item.find('videoLink').text, - 'resolution': item.find('res').text # 480p etc. + 'url': xpath_text(item, 'videoLink', 'url', True), + 'resolution': xpath_text(item, 'res', 'resolution', True) # 480p etc. }) return info From b971abe897ee17fed7e36868fdc8880f6b145d7b Mon Sep 17 00:00:00 2001 From: George Brighton Date: Sat, 27 Jun 2015 21:04:53 +0100 Subject: [PATCH 1114/2721] [moviefap] Replace call to `str()` with `compat.compat_str()` --- youtube_dl/extractor/moviefap.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/moviefap.py b/youtube_dl/extractor/moviefap.py index 6da93dbc9..20a78f3b2 100644 --- a/youtube_dl/extractor/moviefap.py +++ b/youtube_dl/extractor/moviefap.py @@ -7,6 +7,7 @@ from ..utils import ( xpath_text, str_to_int ) +from ..compat import compat_str class MovieFapIE(InfoExtractor): @@ -65,7 +66,7 @@ class MovieFapIE(InfoExtractor): thumbnails = [] for i in range(first, last + 1): thumbnails.append({ - 'url': pattern.replace('#', str(i)), + 'url': pattern.replace('#', compat_str(i)), 'width': width, 'height': height }) From 8a1b49ff19a8a1fdc2c30cf10cc0598ac9bc8819 Mon Sep 17 00:00:00 2001 From: George Brighton Date: Sat, 27 Jun 2015 22:27:06 +0100 Subject: [PATCH 1115/2721] [moviefap] Explicitly sort formats to handle possible site changes --- youtube_dl/extractor/moviefap.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/moviefap.py b/youtube_dl/extractor/moviefap.py index 20a78f3b2..295bfe3f0 100644 --- a/youtube_dl/extractor/moviefap.py +++ b/youtube_dl/extractor/moviefap.py @@ -111,11 +111,14 @@ class MovieFapIE(InfoExtractor): # multiple formats available info['formats'] = [] - # N.B. formats are already in ascending order of quality for item in xml.find('quality').findall('item'): + resolution = xpath_text(item, 'res', 'resolution', True) # 480p etc. info['formats'].append({ 'url': xpath_text(item, 'videoLink', 'url', True), - 'resolution': xpath_text(item, 'res', 'resolution', True) # 480p etc. + 'resolution': resolution, + 'height': int(re.findall(r'\d+', resolution)[0]) }) + self._sort_formats(info['formats']) + return info From 1a5fd4eebc2717b5173df50d65007f90cb05ee30 Mon Sep 17 00:00:00 2001 From: George Brighton Date: Sat, 27 Jun 2015 22:32:56 +0100 Subject: [PATCH 1116/2721] [moviefap] Wrap long lines --- youtube_dl/extractor/moviefap.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/moviefap.py b/youtube_dl/extractor/moviefap.py index 295bfe3f0..9de052a99 100644 --- a/youtube_dl/extractor/moviefap.py +++ b/youtube_dl/extractor/moviefap.py @@ -78,23 +78,32 @@ class MovieFapIE(InfoExtractor): webpage = self._download_webpage(url, video_id) # find and retrieve the XML document detailing video download URLs - info_url = self._html_search_regex(r'flashvars\.config = escape\("(.+?)"', webpage, 'player parameters') + info_url = self._html_search_regex( \ + r'flashvars\.config = escape\("(.+?)"', webpage, 'player parameters') xml = self._download_xml(info_url, video_id) info = { 'id': video_id, - 'title': self._html_search_regex(r'

    (.*?)

    ', webpage, 'title'), + 'title': self._html_search_regex( \ + r'

    (.*?)

    ', webpage, 'title'), 'display_id': re.compile(self._VALID_URL).match(url).group('name'), 'thumbnails': self.__get_thumbnail_data(xml), 'thumbnail': xpath_text(xml, 'startThumb', 'thumbnail'), - 'description': self._html_search_regex(r'name="description" value="(.*?)"', webpage, 'description', fatal=False), - 'uploader_id': self._html_search_regex(r'name="username" value="(.*?)"', webpage, 'uploader_id', fatal=False), - 'view_count': str_to_int(self._html_search_regex(r'
    Views ([0-9]+)', webpage, 'view_count, fatal=False')), - 'average_rating': float(self._html_search_regex(r'Current Rating
    (.*?)', webpage, 'average_rating', fatal=False)), - 'comment_count': str_to_int(self._html_search_regex(r'([0-9]+)', webpage, 'comment_count', fatal=False)), + 'description': self._html_search_regex( \ + r'name="description" value="(.*?)"', webpage, 'description', fatal=False), + 'uploader_id': self._html_search_regex( \ + r'name="username" value="(.*?)"', webpage, 'uploader_id', fatal=False), + 'view_count': str_to_int(self._html_search_regex( \ + r'
    Views ([0-9]+)', webpage, 'view_count, fatal=False')), + 'average_rating': float(self._html_search_regex( \ + r'Current Rating
    (.*?)', webpage, 'average_rating', fatal=False)), + 'comment_count': str_to_int(self._html_search_regex( \ + r'([0-9]+)', webpage, 'comment_count', fatal=False)), 'age_limit': 18, - 'webpage_url': self._html_search_regex(r'name="link" value="(.*?)"', webpage, 'webpage_url', fatal=False), - 'categories': self._html_search_regex(r'
    \s*(.*?)\s*
    ', webpage, 'categories', fatal=False).split(', ') + 'webpage_url': self._html_search_regex( \ + r'name="link" value="(.*?)"', webpage, 'webpage_url', fatal=False), + 'categories': self._html_search_regex( \ + r'
    \s*(.*?)\s*
    ', webpage, 'categories', fatal=False).split(', ') } # find and add the format From 5a9cc19972fb3aae7a67470f65ec5cd30918f4e1 Mon Sep 17 00:00:00 2001 From: George Brighton Date: Sat, 27 Jun 2015 23:03:06 +0100 Subject: [PATCH 1117/2721] [moviefap] Move flv videos to formats in the metadata --- youtube_dl/extractor/moviefap.py | 56 +++++++++++++++++--------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/moviefap.py b/youtube_dl/extractor/moviefap.py index 9de052a99..5e0c701d4 100644 --- a/youtube_dl/extractor/moviefap.py +++ b/youtube_dl/extractor/moviefap.py @@ -82,8 +82,36 @@ class MovieFapIE(InfoExtractor): r'flashvars\.config = escape\("(.+?)"', webpage, 'player parameters') xml = self._download_xml(info_url, video_id) - info = { + # find the video container + if xml.find('videoConfig') is not None: + ext = xml.find('videoConfig').find('type').text + else: + ext = 'flv' # guess... + + # work out the video URL(s) + formats = [] + if xml.find('videoLink') is not None: + # single format available + formats.append({ + 'url': xpath_text(xml, 'videoLink', 'url', True), + 'ext': ext + }) + else: + # multiple formats available + for item in xml.find('quality').findall('item'): + resolution = xpath_text(item, 'res', 'resolution', True) # 480p etc. + formats.append({ + 'url': xpath_text(item, 'videoLink', 'url', True), + 'ext': ext, + 'resolution': resolution, + 'height': int(re.findall(r'\d+', resolution)[0]) + }) + + self._sort_formats(formats) + + return { 'id': video_id, + 'formats': formats, 'title': self._html_search_regex( \ r'

    (.*?)

    ', webpage, 'title'), 'display_id': re.compile(self._VALID_URL).match(url).group('name'), @@ -105,29 +133,3 @@ class MovieFapIE(InfoExtractor): 'categories': self._html_search_regex( \ r'
    \s*(.*?)\s*
    ', webpage, 'categories', fatal=False).split(', ') } - - # find and add the format - if xml.find('videoConfig') is not None: - info['ext'] = xml.find('videoConfig').find('type').text - else: - info['ext'] = 'flv' # guess... - - # work out the video URL(s) - if xml.find('videoLink') is not None: - # single format available - info['url'] = xpath_text(xml, 'videoLink', 'url', True) - else: - # multiple formats available - info['formats'] = [] - - for item in xml.find('quality').findall('item'): - resolution = xpath_text(item, 'res', 'resolution', True) # 480p etc. - info['formats'].append({ - 'url': xpath_text(item, 'videoLink', 'url', True), - 'resolution': resolution, - 'height': int(re.findall(r'\d+', resolution)[0]) - }) - - self._sort_formats(info['formats']) - - return info From db652ea186586e3eda5006ee096161b1a867c0d0 Mon Sep 17 00:00:00 2001 From: George Brighton Date: Sat, 27 Jun 2015 23:04:55 +0100 Subject: [PATCH 1118/2721] [moviefap] Fix `flake8` warnings introduced in 1a5fd4e --- youtube_dl/extractor/moviefap.py | 36 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/moviefap.py b/youtube_dl/extractor/moviefap.py index 5e0c701d4..82b863539 100644 --- a/youtube_dl/extractor/moviefap.py +++ b/youtube_dl/extractor/moviefap.py @@ -78,8 +78,8 @@ class MovieFapIE(InfoExtractor): webpage = self._download_webpage(url, video_id) # find and retrieve the XML document detailing video download URLs - info_url = self._html_search_regex( \ - r'flashvars\.config = escape\("(.+?)"', webpage, 'player parameters') + info_url = self._html_search_regex( + r'flashvars\.config = escape\("(.+?)"', webpage, 'player parameters') xml = self._download_xml(info_url, video_id) # find the video container @@ -112,24 +112,24 @@ class MovieFapIE(InfoExtractor): return { 'id': video_id, 'formats': formats, - 'title': self._html_search_regex( \ - r'

    (.*?)

    ', webpage, 'title'), + 'title': self._html_search_regex( + r'

    (.*?)

    ', webpage, 'title'), 'display_id': re.compile(self._VALID_URL).match(url).group('name'), 'thumbnails': self.__get_thumbnail_data(xml), 'thumbnail': xpath_text(xml, 'startThumb', 'thumbnail'), - 'description': self._html_search_regex( \ - r'name="description" value="(.*?)"', webpage, 'description', fatal=False), - 'uploader_id': self._html_search_regex( \ - r'name="username" value="(.*?)"', webpage, 'uploader_id', fatal=False), - 'view_count': str_to_int(self._html_search_regex( \ - r'
    Views ([0-9]+)', webpage, 'view_count, fatal=False')), - 'average_rating': float(self._html_search_regex( \ - r'Current Rating
    (.*?)', webpage, 'average_rating', fatal=False)), - 'comment_count': str_to_int(self._html_search_regex( \ - r'([0-9]+)', webpage, 'comment_count', fatal=False)), + 'description': self._html_search_regex( + r'name="description" value="(.*?)"', webpage, 'description', fatal=False), + 'uploader_id': self._html_search_regex( + r'name="username" value="(.*?)"', webpage, 'uploader_id', fatal=False), + 'view_count': str_to_int(self._html_search_regex( + r'
    Views ([0-9]+)', webpage, 'view_count, fatal=False')), + 'average_rating': float(self._html_search_regex( + r'Current Rating
    (.*?)', webpage, 'average_rating', fatal=False)), + 'comment_count': str_to_int(self._html_search_regex( + r'([0-9]+)', webpage, 'comment_count', fatal=False)), 'age_limit': 18, - 'webpage_url': self._html_search_regex( \ - r'name="link" value="(.*?)"', webpage, 'webpage_url', fatal=False), - 'categories': self._html_search_regex( \ - r'
    \s*(.*?)\s*
    ', webpage, 'categories', fatal=False).split(', ') + 'webpage_url': self._html_search_regex( + r'name="link" value="(.*?)"', webpage, 'webpage_url', fatal=False), + 'categories': self._html_search_regex( + r'
    \s*(.*?)\s*
    ', webpage, 'categories', fatal=False).split(', ') } From bb512e57dc138b261cf9c71a833b0df5d5ba849f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 28 Jun 2015 13:25:59 +0800 Subject: [PATCH 1119/2721] [twitch:vod] Fix 'Source' format in m3u8 (closes #6115) --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 94bd6345d..3e798e62d 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -215,7 +215,7 @@ class TwitchVodIE(TwitchItemBaseIE): '%s/api/vods/%s/access_token' % (self._API_BASE, item_id), item_id, 'Downloading %s access token' % self._ITEM_TYPE) formats = self._extract_m3u8_formats( - '%s/vod/%s?nauth=%s&nauthsig=%s' + '%s/vod/%s?nauth=%s&nauthsig=%s&allow_source=true' % (self._USHER_BASE, item_id, access_token['token'], access_token['sig']), item_id, 'mp4') self._prefer_source(formats) From ac0474f89d3e6f8c8c1fb3223a16a18a2fd02bcb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 28 Jun 2015 13:31:37 +0800 Subject: [PATCH 1120/2721] [twitch:vod] Update _TEST The original test case is gone --- youtube_dl/extractor/twitch.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 3e798e62d..b56ee2959 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -189,17 +189,17 @@ class TwitchVodIE(TwitchItemBaseIE): _ITEM_SHORTCUT = 'v' _TEST = { - 'url': 'http://www.twitch.tv/ksptv/v/3622000', + 'url': 'http://www.twitch.tv/riotgames/v/6528877', 'info_dict': { - 'id': 'v3622000', + 'id': 'v6528877', 'ext': 'mp4', - 'title': '''KSPTV: Squadcast: "Everyone's on vacation so here's Dahud" Edition!''', + 'title': 'LCK Summer Split - Week 6 Day 1', 'thumbnail': 're:^https?://.*\.jpg$', - 'duration': 6951, - 'timestamp': 1419028564, - 'upload_date': '20141219', - 'uploader': 'KSPTV', - 'uploader_id': 'ksptv', + 'duration': 17208, + 'timestamp': 1435131709, + 'upload_date': '20150624', + 'uploader': 'Riot Games', + 'uploader_id': 'riotgames', 'view_count': int, }, 'params': { From 9603e8a7d998615d3da1af47461ec9c353ec4e7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 28 Jun 2015 22:55:28 +0600 Subject: [PATCH 1121/2721] [YoutubeDL] Handle None width and height similarly to formats --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ef0f71bad..411de9ac9 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1008,7 +1008,7 @@ class YoutubeDL(object): t.get('preference'), t.get('width'), t.get('height'), t.get('id'), t.get('url'))) for i, t in enumerate(thumbnails): - if 'width' in t and 'height' in t: + if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) if t.get('id') is None: t['id'] = '%d' % i From bf42a9906d9a066d32f1cc50e1b033e6676744ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 28 Jun 2015 22:56:07 +0600 Subject: [PATCH 1122/2721] [utils] Add default value for xpath_text --- youtube_dl/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 96490f112..942f76d24 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -62,6 +62,8 @@ std_headers = { } +NO_DEFAULT = object() + ENGLISH_MONTH_NAMES = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] @@ -171,13 +173,15 @@ def xpath_with_ns(path, ns_map): return '/'.join(replaced) -def xpath_text(node, xpath, name=None, fatal=False): +def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT): if sys.version_info < (2, 7): # Crazy 2.6 xpath = xpath.encode('ascii') n = node.find(xpath) if n is None or n.text is None: - if fatal: + if default is not NO_DEFAULT: + return default + elif fatal: name = xpath if name is None else name raise ExtractorError('Could not find XML element %s' % name) else: From c342041fba9283ba5f05f48427aabf79adcf8647 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 28 Jun 2015 22:56:45 +0600 Subject: [PATCH 1123/2721] [extractor/common] Use NO_DEFAULT from utils --- youtube_dl/extractor/common.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 49e4dc710..7fa46d295 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -22,6 +22,7 @@ from ..compat import ( compat_str, ) from ..utils import ( + NO_DEFAULT, age_restricted, bug_reports_message, clean_html, @@ -33,7 +34,7 @@ from ..utils import ( sanitize_filename, unescapeHTML, ) -_NO_DEFAULT = object() + class InfoExtractor(object): @@ -523,7 +524,7 @@ class InfoExtractor(object): video_info['description'] = playlist_description return video_info - def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): + def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None): """ Perform a regex search on the given string, using a single or a list of patterns returning the first matching group. @@ -549,7 +550,7 @@ class InfoExtractor(object): return next(g for g in mobj.groups() if g is not None) else: return mobj.group(group) - elif default is not _NO_DEFAULT: + elif default is not NO_DEFAULT: return default elif fatal: raise RegexNotFoundError('Unable to extract %s' % _name) @@ -557,7 +558,7 @@ class InfoExtractor(object): self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message()) return None - def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): + def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None): """ Like _search_regex, but strips HTML tags and unescapes entities. """ From d16154d16327907279eff48a4018c495726d401a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 28 Jun 2015 23:05:09 +0600 Subject: [PATCH 1124/2721] [tnaflix] Generalize tnaflix extractors --- youtube_dl/extractor/__init__.py | 8 +- youtube_dl/extractor/empflix.py | 31 ---- youtube_dl/extractor/moviefap.py | 135 --------------- youtube_dl/extractor/tnaflix.py | 279 +++++++++++++++++++++++++------ 4 files changed, 234 insertions(+), 219 deletions(-) delete mode 100644 youtube_dl/extractor/empflix.py delete mode 100644 youtube_dl/extractor/moviefap.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d41d277c9..d44339200 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -144,7 +144,6 @@ from .ellentv import ( ) from .elpais import ElPaisIE from .embedly import EmbedlyIE -from .empflix import EMPFlixIE from .engadget import EngadgetIE from .eporner import EpornerIE from .eroprofile import EroProfileIE @@ -311,7 +310,6 @@ from .morningstar import MorningstarIE from .motherless import MotherlessIE from .motorsport import MotorsportIE from .movieclips import MovieClipsIE -from .moviefap import MovieFapIE from .moviezine import MoviezineIE from .movshare import MovShareIE from .mtv import ( @@ -578,7 +576,11 @@ from .tmz import ( TMZIE, TMZArticleIE, ) -from .tnaflix import TNAFlixIE +from .tnaflix import ( + TNAFlixIE, + EMPFlixIE, + MovieFapIE, +) from .thvideo import ( THVideoIE, THVideoPlaylistIE diff --git a/youtube_dl/extractor/empflix.py b/youtube_dl/extractor/empflix.py deleted file mode 100644 index 4827022e0..000000000 --- a/youtube_dl/extractor/empflix.py +++ /dev/null @@ -1,31 +0,0 @@ -from __future__ import unicode_literals - -from .tnaflix import TNAFlixIE - - -class EMPFlixIE(TNAFlixIE): - _VALID_URL = r'https?://(?:www\.)?empflix\.com/videos/(?P.+?)-(?P[0-9]+)\.html' - - _TITLE_REGEX = r'name="title" value="(?P[^"]*)"' - _DESCRIPTION_REGEX = r'name="description" value="([^"]*)"' - _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"' - - _TESTS = [ - { - 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', - 'md5': 'b1bc15b6412d33902d6e5952035fcabc', - 'info_dict': { - 'id': '33051', - 'display_id': 'Amateur-Finger-Fuck', - 'ext': 'mp4', - 'title': 'Amateur Finger Fuck', - 'description': 'Amateur solo finger fucking.', - 'thumbnail': 're:https?://.*\.jpg$', - 'age_limit': 18, - } - }, - { - 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html', - 'only_matching': True, - } - ] diff --git a/youtube_dl/extractor/moviefap.py b/youtube_dl/extractor/moviefap.py deleted file mode 100644 index 82b863539..000000000 --- a/youtube_dl/extractor/moviefap.py +++ /dev/null @@ -1,135 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - xpath_text, - str_to_int -) -from ..compat import compat_str - - -class MovieFapIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?moviefap\.com/videos/(?P<id>[0-9a-f]+)/(?P<name>[a-z-_]+)' - _TESTS = [{ - # normal, multi-format video - 'url': 'http://www.moviefap.com/videos/be9867c9416c19f54a4a/experienced-milf-amazing-handjob.html', - 'md5': '26624b4e2523051b550067d547615906', - 'info_dict': { - 'id': 'be9867c9416c19f54a4a', - 'ext': 'mp4', - 'title': 'Experienced MILF Amazing Handjob', - 'description': 'Experienced MILF giving an Amazing Handjob', - 'thumbnail': 'http://img.moviefap.com/a16:9w990r/thumbs/be/322032-20l.jpg', - 'uploader_id': 'darvinfred06', - 'display_id': 'experienced-milf-amazing-handjob', - 'categories': ['Amateur', 'Masturbation', 'Mature', 'Flashing'] - } - }, { - # quirky single-format case where the extension is given as fid, but the video is really an flv - 'url': 'http://www.moviefap.com/videos/e5da0d3edce5404418f5/jeune-couple-russe.html', - 'md5': 'fa56683e291fc80635907168a743c9ad', - 'info_dict': { - 'id': 'e5da0d3edce5404418f5', - 'ext': 'flv', - 'title': 'Jeune Couple Russe', - 'description': 'Amateur', - 'thumbnail': 'http://pic.moviefap.com/thumbs/e5/949-18l.jpg', - 'uploader_id': 'whiskeyjar', - 'display_id': 'jeune-couple-russe', - 'categories': ['Amateur', 'Teen'] - } - }] - - @staticmethod - def __get_thumbnail_data(xml): - - """ - Constructs a list of video thumbnails from timeline preview images. - :param xml: the information XML document to parse - """ - - timeline = xml.find('timeline') - if timeline is None: - # not all videos have the data - ah well - return [] - - # get the required information from the XML - width = str_to_int(timeline.find('imageWidth').text) - height = str_to_int(timeline.find('imageHeight').text) - first = str_to_int(timeline.find('imageFirst').text) - last = str_to_int(timeline.find('imageLast').text) - pattern = timeline.find('imagePattern').text - - # generate the list of thumbnail information dicts - thumbnails = [] - for i in range(first, last + 1): - thumbnails.append({ - 'url': pattern.replace('#', compat_str(i)), - 'width': width, - 'height': height - }) - return thumbnails - - def _real_extract(self, url): - - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - # find and retrieve the XML document detailing video download URLs - info_url = self._html_search_regex( - r'flashvars\.config = escape\("(.+?)"', webpage, 'player parameters') - xml = self._download_xml(info_url, video_id) - - # find the video container - if xml.find('videoConfig') is not None: - ext = xml.find('videoConfig').find('type').text - else: - ext = 'flv' # guess... - - # work out the video URL(s) - formats = [] - if xml.find('videoLink') is not None: - # single format available - formats.append({ - 'url': xpath_text(xml, 'videoLink', 'url', True), - 'ext': ext - }) - else: - # multiple formats available - for item in xml.find('quality').findall('item'): - resolution = xpath_text(item, 'res', 'resolution', True) # 480p etc. - formats.append({ - 'url': xpath_text(item, 'videoLink', 'url', True), - 'ext': ext, - 'resolution': resolution, - 'height': int(re.findall(r'\d+', resolution)[0]) - }) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'formats': formats, - 'title': self._html_search_regex( - r'<div id="view_title"><h1>(.*?)</h1>', webpage, 'title'), - 'display_id': re.compile(self._VALID_URL).match(url).group('name'), - 'thumbnails': self.__get_thumbnail_data(xml), - 'thumbnail': xpath_text(xml, 'startThumb', 'thumbnail'), - 'description': self._html_search_regex( - r'name="description" value="(.*?)"', webpage, 'description', fatal=False), - 'uploader_id': self._html_search_regex( - r'name="username" value="(.*?)"', webpage, 'uploader_id', fatal=False), - 'view_count': str_to_int(self._html_search_regex( - r'<br>Views <strong>([0-9]+)</strong>', webpage, 'view_count, fatal=False')), - 'average_rating': float(self._html_search_regex( - r'Current Rating<br> <strong>(.*?)</strong>', webpage, 'average_rating', fatal=False)), - 'comment_count': str_to_int(self._html_search_regex( - r'<span id="comCount">([0-9]+)</span>', webpage, 'comment_count', fatal=False)), - 'age_limit': 18, - 'webpage_url': self._html_search_regex( - r'name="link" value="(.*?)"', webpage, 'webpage_url', fatal=False), - 'categories': self._html_search_regex( - r'</div>\s*(.*?)\s*<br>', webpage, 'categories', fatal=False).split(', ') - } diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py index c282865b2..49516abca 100644 --- a/youtube_dl/extractor/tnaflix.py +++ b/youtube_dl/extractor/tnaflix.py @@ -3,39 +3,70 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( - parse_duration, fix_xml_ampersands, + float_or_none, + int_or_none, + parse_duration, + str_to_int, + xpath_text, ) -class TNAFlixIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)' - - _TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos' - _DESCRIPTION_REGEX = r'

    ([^<]+)

    ' - _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"' - - _TESTS = [ - { - 'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878', - 'md5': 'ecf3498417d09216374fc5907f9c6ec0', - 'info_dict': { - 'id': '553878', - 'display_id': 'Carmella-Decesare-striptease', - 'ext': 'mp4', - 'title': 'Carmella Decesare - striptease', - 'description': '', - 'thumbnail': 're:https?://.*\.jpg$', - 'duration': 91, - 'age_limit': 18, - } - }, - { - 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632', - 'only_matching': True, - } +class TNAFlixNetworkBaseIE(InfoExtractor): + # May be overridden in descendants if necessary + _CONFIG_REGEX = [ + r'flashvars\.config\s*=\s*escape\("([^"]+)"', + r']+name="config\d?" value="([^"]+)"', ] + _TITLE_REGEX = r']+name="title" value="([^"]+)"' + _DESCRIPTION_REGEX = r']+name="description" value="([^"]+)"' + _UPLOADER_REGEX = r']+name="username" value="([^"]+)"' + _VIEW_COUNT_REGEX = None + _COMMENT_COUNT_REGEX = None + _AVERAGE_RATING_REGEX = None + _CATEGORIES_REGEX = r']*>\s*]+class="infoTitle"[^>]*>Categories:\s*]+class="listView"[^>]*>(.+?)\s*' + + def _extract_thumbnails(self, flix_xml): + + def get_child(elem, names): + for name in names: + child = elem.find(name) + if child is not None: + return child + + timeline = get_child(flix_xml, ['timeline', 'rolloverBarImage']) + if timeline is None: + return + + pattern_el = get_child(timeline, ['imagePattern', 'pattern']) + if pattern_el is None or not pattern_el.text: + return + + first_el = get_child(timeline, ['imageFirst', 'first']) + last_el = get_child(timeline, ['imageLast', 'last']) + if first_el is None or last_el is None: + return + + first_text = first_el.text + last_text = last_el.text + if not first_text.isdigit() or not last_text.isdigit(): + return + + first = int(first_text) + last = int(last_text) + if first > last: + return + + width = int_or_none(xpath_text(timeline, './imageWidth', 'thumbnail width')) + height = int_or_none(xpath_text(timeline, './imageHeight', 'thumbnail height')) + + return [{ + 'url': self._proto_relative_url(pattern_el.text.replace('#', compat_str(i)), 'http:'), + 'width': width, + 'height': height, + } for i in range(first, last + 1)] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -44,39 +75,64 @@ class TNAFlixIE(InfoExtractor): webpage = self._download_webpage(url, display_id) + cfg_url = self._proto_relative_url(self._html_search_regex( + self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:') + + cfg_xml = self._download_xml( + cfg_url, display_id, 'Downloading metadata', + transform_source=fix_xml_ampersands) + + formats = [] + + def extract_video_url(vl): + return re.sub('speed=\d+', 'speed=', vl.text) + + video_link = cfg_xml.find('./videoLink') + if video_link is not None: + formats.append({ + 'url': extract_video_url(video_link), + 'ext': xpath_text(cfg_xml, './videoConfig/type', 'type', default='flv'), + }) + + for item in cfg_xml.findall('./quality/item'): + video_link = item.find('./videoLink') + if video_link is None: + continue + res = item.find('res') + format_id = None if res is None else res.text + height = int_or_none(self._search_regex( + r'^(\d+)[pP]', format_id, 'height', default=None)) + formats.append({ + 'url': self._proto_relative_url(extract_video_url(video_link), 'http:'), + 'format_id': format_id, + 'height': height, + }) + + self._sort_formats(formats) + + thumbnail = self._proto_relative_url( + xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:') + thumbnails = self._extract_thumbnails(cfg_xml) + title = self._html_search_regex( self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage) - description = self._html_search_regex( - self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='') age_limit = self._rta_search(webpage) duration = parse_duration(self._html_search_meta( 'duration', webpage, 'duration', default=None)) - cfg_url = self._proto_relative_url(self._html_search_regex( - self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:') + def extract_field(pattern, name): + return self._html_search_regex(pattern, webpage, name, default=None) if pattern else None - cfg_xml = self._download_xml( - cfg_url, display_id, note='Downloading metadata', - transform_source=fix_xml_ampersands) + description = extract_field(self._DESCRIPTION_REGEX, 'description') + uploader = extract_field(self._UPLOADER_REGEX, 'uploader') + view_count = str_to_int(extract_field(self._VIEW_COUNT_REGEX, 'view count')) + comment_count = str_to_int(extract_field(self._COMMENT_COUNT_REGEX, 'comment count')) + average_rating = float_or_none(extract_field(self._AVERAGE_RATING_REGEX, 'average rating')) - thumbnail = self._proto_relative_url( - cfg_xml.find('./startThumb').text, 'http:') - - formats = [] - for item in cfg_xml.findall('./quality/item'): - video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text) - format_id = item.find('res').text - fmt = { - 'url': self._proto_relative_url(video_url, 'http:'), - 'format_id': format_id, - } - m = re.search(r'^(\d+)', format_id) - if m: - fmt['height'] = int(m.group(1)) - formats.append(fmt) - self._sort_formats(formats) + categories_str = extract_field(self._CATEGORIES_REGEX, 'categories') + categories = categories_str.split(', ') if categories_str is not None else [] return { 'id': video_id, @@ -84,7 +140,130 @@ class TNAFlixIE(InfoExtractor): 'title': title, 'description': description, 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'duration': duration, 'age_limit': age_limit, + 'uploader': uploader, + 'view_count': view_count, + 'comment_count': comment_count, + 'average_rating': average_rating, + 'categories': categories, 'formats': formats, } + + +class TNAFlixIE(TNAFlixNetworkBaseIE): + _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P[^/]+)/video(?P\d+)' + + _TITLE_REGEX = r'(.+?) - TNAFlix Porn Videos' + _DESCRIPTION_REGEX = r'

    ([^<]+)

    ' + _UPLOADER_REGEX = r'(?s)]+class="infoTitle"[^>]*>Uploaded By:(.+?).+?)-(?P[0-9]+)\.html' + + _UPLOADER_REGEX = r']+class="infoTitle"[^>]*>Uploaded By:(.+?)' + + _TESTS = [{ + 'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html', + 'md5': 'b1bc15b6412d33902d6e5952035fcabc', + 'info_dict': { + 'id': '33051', + 'display_id': 'Amateur-Finger-Fuck', + 'ext': 'mp4', + 'title': 'Amateur Finger Fuck', + 'description': 'Amateur solo finger fucking.', + 'thumbnail': 're:https?://.*\.jpg$', + 'duration': 83, + 'age_limit': 18, + 'uploader': 'cwbike', + 'categories': ['Amateur', 'Anal', 'Fisting', 'Home made', 'Solo'], + } + }, { + 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html', + 'only_matching': True, + }] + + +class MovieFapIE(TNAFlixNetworkBaseIE): + _VALID_URL = r'https?://(?:www\.)?moviefap\.com/videos/(?P[0-9a-f]+)/(?P[^/]+)\.html' + + _VIEW_COUNT_REGEX = r'
    Views\s*([\d,.]+)' + _COMMENT_COUNT_REGEX = r']+id="comCount"[^>]*>([\d,.]+)' + _AVERAGE_RATING_REGEX = r'Current Rating\s*
    \s*([\d.]+)' + _CATEGORIES_REGEX = r'(?s)]+id="vid_info"[^>]*>\s*]*>.+?
    (.*?)
    ' + + _TESTS = [{ + # normal, multi-format video + 'url': 'http://www.moviefap.com/videos/be9867c9416c19f54a4a/experienced-milf-amazing-handjob.html', + 'md5': '26624b4e2523051b550067d547615906', + 'info_dict': { + 'id': 'be9867c9416c19f54a4a', + 'display_id': 'experienced-milf-amazing-handjob', + 'ext': 'mp4', + 'title': 'Experienced MILF Amazing Handjob', + 'description': 'Experienced MILF giving an Amazing Handjob', + 'thumbnail': 're:https?://.*\.jpg$', + 'age_limit': 18, + 'uploader': 'darvinfred06', + 'view_count': int, + 'comment_count': int, + 'average_rating': float, + 'categories': ['Amateur', 'Masturbation', 'Mature', 'Flashing'], + } + }, { + # quirky single-format case where the extension is given as fid, but the video is really an flv + 'url': 'http://www.moviefap.com/videos/e5da0d3edce5404418f5/jeune-couple-russe.html', + 'md5': 'fa56683e291fc80635907168a743c9ad', + 'info_dict': { + 'id': 'e5da0d3edce5404418f5', + 'display_id': 'jeune-couple-russe', + 'ext': 'flv', + 'title': 'Jeune Couple Russe', + 'description': 'Amateur', + 'thumbnail': 're:https?://.*\.jpg$', + 'age_limit': 18, + 'uploader': 'whiskeyjar', + 'view_count': int, + 'comment_count': int, + 'average_rating': float, + 'categories': ['Amateur', 'Teen'], + } + }] From 507683780eb14d012d4430044dc402d7e08e36b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 28 Jun 2015 23:08:05 +0600 Subject: [PATCH 1125/2721] Credit @gebn for moviefap --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 889d599a2..117b9c219 100644 --- a/AUTHORS +++ b/AUTHORS @@ -128,3 +128,4 @@ Ping O. Mister Hat Peter Ding jackyzy823 +George Brighton From c93d53f5e307dab1b5d03cd3c621a68f40ef840d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Jun 2015 00:48:06 +0600 Subject: [PATCH 1126/2721] [youtube] Fix likes/dislike extraction --- youtube_dl/extractor/youtube.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d9240ff02..8b43e274b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -29,6 +29,7 @@ from ..utils import ( get_element_by_id, int_or_none, orderedSet, + str_to_int, unescapeHTML, unified_strdate, uppercase_escape, @@ -1005,12 +1006,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_description = '' def _extract_count(count_name): - count = self._search_regex( - r'id="watch-%s"[^>]*>.*?([\d,]+)\s*' % re.escape(count_name), - video_webpage, count_name, default=None) - if count is not None: - return int(count.replace(',', '')) - return None + return str_to_int(self._search_regex( + r'-%s-button[^>]+>]+class="yt-uix-button-content"[^>]*>([\d,]+)' + % re.escape(count_name), + video_webpage, count_name, default=None)) + like_count = _extract_count('like') dislike_count = _extract_count('dislike') From 541462379153c19656aa52cc5796dbf05de874ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Jun 2015 00:49:19 +0600 Subject: [PATCH 1127/2721] [extractor/common] Remove superfluous line --- youtube_dl/extractor/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7fa46d295..81623bfe3 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -36,7 +36,6 @@ from ..utils import ( ) - class InfoExtractor(object): """Information Extractor class. From 67134eaba1a56cec4117000acb2fc9284c9cdd9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 28 Jun 2015 22:08:29 +0200 Subject: [PATCH 1128/2721] [YoutubeDL] rework how the format spec is processed The spec string is processed using 'tokenize.tokenize' to split it in words and operators, the filters are still processed using regular expressions. This should make easier to allow grouping operators with parens. --- test/test_YoutubeDL.py | 27 ++-- youtube_dl/YoutubeDL.py | 298 +++++++++++++++++++++++++--------------- youtube_dl/compat.py | 5 + 3 files changed, 209 insertions(+), 121 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index a13c09ef4..8f7aef512 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -229,21 +229,30 @@ class TestFormatSelection(unittest.TestCase): '141', '172', '140', '171', '139', ] - for f1id, f2id in zip(order, order[1:]): - f1 = YoutubeIE._formats[f1id].copy() - f1['format_id'] = f1id - f1['url'] = 'url:' + f1id - f2 = YoutubeIE._formats[f2id].copy() - f2['format_id'] = f2id - f2['url'] = 'url:' + f2id + def format_info(f_id): + info = YoutubeIE._formats[f_id].copy() + info['format_id'] = f_id + info['url'] = 'url:' + f_id + return info + formats_order = [format_info(f_id) for f_id in order] + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': 'bestvideo+bestaudio'}) + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '137+141') + self.assertEqual(downloaded['ext'], 'mp4') + + for f1, f2 in zip(formats_order, formats_order[1:]): info_dict = _make_result([f1, f2], extractor='youtube') ydl = YDL({'format': 'best/bestvideo'}) yie = YoutubeIE(ydl) yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] - self.assertEqual(downloaded['format_id'], f1id) + self.assertEqual(downloaded['format_id'], f1['format_id']) info_dict = _make_result([f2, f1], extractor='youtube') ydl = YDL({'format': 'best/bestvideo'}) @@ -251,7 +260,7 @@ class TestFormatSelection(unittest.TestCase): yie._sort_formats(info_dict['formats']) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] - self.assertEqual(downloaded['format_id'], f1id) + self.assertEqual(downloaded['format_id'], f1['format_id']) def test_format_filtering(self): formats = [ diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ef0f71bad..17a5407b9 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -21,6 +21,7 @@ import subprocess import socket import sys import time +import tokenize import traceback if os.name == 'nt': @@ -34,6 +35,7 @@ from .compat import ( compat_http_client, compat_kwargs, compat_str, + compat_tokenize_tokenize, compat_urllib_error, compat_urllib_request, ) @@ -851,8 +853,8 @@ class YoutubeDL(object): else: raise Exception('Invalid result type: %s' % result_type) - def _apply_format_filter(self, format_spec, available_formats): - " Returns a tuple of the remaining format_spec and filtered formats " + def _build_format_filter(self, filter_spec): + " Returns a function to filter the formats according to the filter_spec " OPERATORS = { '<': operator.lt, @@ -862,13 +864,13 @@ class YoutubeDL(object): '=': operator.eq, '!=': operator.ne, } - operator_rex = re.compile(r'''(?x)\s*\[ + operator_rex = re.compile(r'''(?x)\s* (?Pwidth|height|tbr|abr|vbr|asr|filesize|fps) \s*(?P%s)(?P\s*\?)?\s* (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) - \]$ + $ ''' % '|'.join(map(re.escape, OPERATORS.keys()))) - m = operator_rex.search(format_spec) + m = operator_rex.search(filter_spec) if m: try: comparison_value = int(m.group('value')) @@ -879,7 +881,7 @@ class YoutubeDL(object): if comparison_value is None: raise ValueError( 'Invalid value %r in format specification %r' % ( - m.group('value'), format_spec)) + m.group('value'), filter_spec)) op = OPERATORS[m.group('op')] if not m: @@ -887,85 +889,201 @@ class YoutubeDL(object): '=': operator.eq, '!=': operator.ne, } - str_operator_rex = re.compile(r'''(?x)\s*\[ + str_operator_rex = re.compile(r'''(?x) \s*(?Pext|acodec|vcodec|container|protocol) \s*(?P%s)(?P\s*\?)? \s*(?P[a-zA-Z0-9_-]+) - \s*\]$ + \s*$ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) - m = str_operator_rex.search(format_spec) + m = str_operator_rex.search(filter_spec) if m: comparison_value = m.group('value') op = STR_OPERATORS[m.group('op')] if not m: - raise ValueError('Invalid format specification %r' % format_spec) + raise ValueError('Invalid filter specification %r' % filter_spec) def _filter(f): actual_value = f.get(m.group('key')) if actual_value is None: return m.group('none_inclusive') return op(actual_value, comparison_value) - new_formats = [f for f in available_formats if _filter(f)] + return _filter - new_format_spec = format_spec[:-len(m.group(0))] - if not new_format_spec: - new_format_spec = 'best' + def build_format_selector(self, format_spec): + def syntax_error(note, start): + message = ( + 'Invalid format specification: ' + '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1])) + return SyntaxError(message) - return (new_format_spec, new_formats) + PICKFIRST = 'PICKFIRST' + MERGE = 'MERGE' + SINGLE = 'SINGLE' + FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) - def select_format(self, format_spec, available_formats): - while format_spec.endswith(']'): - format_spec, available_formats = self._apply_format_filter( - format_spec, available_formats) - if not available_formats: - return None + def _parse_filter(tokens): + filter_parts = [] + for type, string, start, _, _ in tokens: + if type == tokenize.OP and string == ']': + return ''.join(filter_parts) + else: + filter_parts.append(string) - if format_spec in ['best', 'worst', None]: - format_idx = 0 if format_spec == 'worst' else -1 - audiovideo_formats = [ - f for f in available_formats - if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] - if audiovideo_formats: - return audiovideo_formats[format_idx] - # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format - elif (all(f.get('acodec') != 'none' for f in available_formats) or - all(f.get('vcodec') != 'none' for f in available_formats)): - return available_formats[format_idx] - elif format_spec == 'bestaudio': - audio_formats = [ - f for f in available_formats - if f.get('vcodec') == 'none'] - if audio_formats: - return audio_formats[-1] - elif format_spec == 'worstaudio': - audio_formats = [ - f for f in available_formats - if f.get('vcodec') == 'none'] - if audio_formats: - return audio_formats[0] - elif format_spec == 'bestvideo': - video_formats = [ - f for f in available_formats - if f.get('acodec') == 'none'] - if video_formats: - return video_formats[-1] - elif format_spec == 'worstvideo': - video_formats = [ - f for f in available_formats - if f.get('acodec') == 'none'] - if video_formats: - return video_formats[0] - else: - extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] - if format_spec in extensions: - filter_f = lambda f: f['ext'] == format_spec - else: - filter_f = lambda f: f['format_id'] == format_spec - matches = list(filter(filter_f, available_formats)) - if matches: - return matches[-1] - return None + def _parse_format_selection(tokens, endwith=[]): + selectors = [] + current_selector = None + for type, string, start, _, _ in tokens: + # ENCODING is only defined in python 3.x + if type == getattr(tokenize, 'ENCODING', None): + continue + elif type in [tokenize.NAME, tokenize.NUMBER]: + current_selector = FormatSelector(SINGLE, string, []) + elif type == tokenize.OP: + if string in endwith: + break + if string == ',': + selectors.append(current_selector) + current_selector = None + elif string == '/': + first_choice = current_selector + second_choice = _parse_format_selection(tokens, [',']) + current_selector = None + selectors.append(FormatSelector(PICKFIRST, (first_choice, second_choice), [])) + elif string == '[': + if not current_selector: + current_selector = FormatSelector(SINGLE, 'best', []) + format_filter = _parse_filter(tokens) + current_selector.filters.append(format_filter) + elif string == '+': + video_selector = current_selector + audio_selector = _parse_format_selection(tokens, [',']) + current_selector = None + selectors.append(FormatSelector(MERGE, (video_selector, audio_selector), [])) + else: + raise syntax_error('Operator not recognized: "{0}"'.format(string), start) + elif type == tokenize.ENDMARKER: + break + if current_selector: + selectors.append(current_selector) + return selectors + + def _build_selector_function(selector): + if isinstance(selector, list): + fs = [_build_selector_function(s) for s in selector] + + def selector_function(formats): + for f in fs: + for format in f(formats): + yield format + return selector_function + elif selector.type == PICKFIRST: + fs = [_build_selector_function(s) for s in selector.selector] + + def selector_function(formats): + for f in fs: + picked_formats = list(f(formats)) + if picked_formats: + return picked_formats + return [] + elif selector.type == SINGLE: + format_spec = selector.selector + + def selector_function(formats): + if format_spec in ['best', 'worst', None]: + format_idx = 0 if format_spec == 'worst' else -1 + audiovideo_formats = [ + f for f in formats + if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] + if audiovideo_formats: + yield audiovideo_formats[format_idx] + # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format + elif (all(f.get('acodec') != 'none' for f in formats) or + all(f.get('vcodec') != 'none' for f in formats)): + yield formats[format_idx] + elif format_spec == 'bestaudio': + audio_formats = [ + f for f in formats + if f.get('vcodec') == 'none'] + if audio_formats: + yield audio_formats[-1] + elif format_spec == 'worstaudio': + audio_formats = [ + f for f in formats + if f.get('vcodec') == 'none'] + if audio_formats: + yield audio_formats[0] + elif format_spec == 'bestvideo': + video_formats = [ + f for f in formats + if f.get('acodec') == 'none'] + if video_formats: + yield video_formats[-1] + elif format_spec == 'worstvideo': + video_formats = [ + f for f in formats + if f.get('acodec') == 'none'] + if video_formats: + yield video_formats[0] + else: + extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] + if format_spec in extensions: + filter_f = lambda f: f['ext'] == format_spec + else: + filter_f = lambda f: f['format_id'] == format_spec + matches = list(filter(filter_f, formats)) + if matches: + yield matches[-1] + elif selector.type == MERGE: + def _merge(formats_info): + format_1, format_2 = [f['format_id'] for f in formats_info] + # The first format must contain the video and the + # second the audio + if formats_info[0].get('vcodec') == 'none': + self.report_error('The first format must ' + 'contain the video, try using ' + '"-f %s+%s"' % (format_2, format_1)) + return + output_ext = ( + formats_info[0]['ext'] + if self.params.get('merge_output_format') is None + else self.params['merge_output_format']) + return { + 'requested_formats': formats_info, + 'format': '%s+%s' % (formats_info[0].get('format'), + formats_info[1].get('format')), + 'format_id': '%s+%s' % (formats_info[0].get('format_id'), + formats_info[1].get('format_id')), + 'width': formats_info[0].get('width'), + 'height': formats_info[0].get('height'), + 'resolution': formats_info[0].get('resolution'), + 'fps': formats_info[0].get('fps'), + 'vcodec': formats_info[0].get('vcodec'), + 'vbr': formats_info[0].get('vbr'), + 'stretched_ratio': formats_info[0].get('stretched_ratio'), + 'acodec': formats_info[1].get('acodec'), + 'abr': formats_info[1].get('abr'), + 'ext': output_ext, + } + video_selector, audio_selector = map(_build_selector_function, selector.selector) + + def selector_function(formats): + formats = list(formats) + for pair in itertools.product(video_selector(formats), audio_selector(formats)): + yield _merge(pair) + + filters = [self._build_format_filter(f) for f in selector.filters] + + def final_selector(formats): + for _filter in filters: + formats = list(filter(_filter, formats)) + return selector_function(formats) + return final_selector + + stream = io.BytesIO(format_spec.encode('utf-8')) + tokens = compat_tokenize_tokenize(stream.readline) + parsed_selector = _parse_format_selection(tokens) + return _build_selector_function(parsed_selector) def _calc_headers(self, info_dict): res = std_headers.copy() @@ -1112,52 +1230,8 @@ class YoutubeDL(object): if req_format == 'all': formats_to_download = formats else: - for rfstr in req_format.split(','): - # We can accept formats requested in the format: 34/5/best, we pick - # the first that is available, starting from left - req_formats = rfstr.split('/') - for rf in req_formats: - if re.match(r'.+?\+.+?', rf) is not None: - # Two formats have been requested like '137+139' - format_1, format_2 = rf.split('+') - formats_info = (self.select_format(format_1, formats), - self.select_format(format_2, formats)) - if all(formats_info): - # The first format must contain the video and the - # second the audio - if formats_info[0].get('vcodec') == 'none': - self.report_error('The first format must ' - 'contain the video, try using ' - '"-f %s+%s"' % (format_2, format_1)) - return - output_ext = ( - formats_info[0]['ext'] - if self.params.get('merge_output_format') is None - else self.params['merge_output_format']) - selected_format = { - 'requested_formats': formats_info, - 'format': '%s+%s' % (formats_info[0].get('format'), - formats_info[1].get('format')), - 'format_id': '%s+%s' % (formats_info[0].get('format_id'), - formats_info[1].get('format_id')), - 'width': formats_info[0].get('width'), - 'height': formats_info[0].get('height'), - 'resolution': formats_info[0].get('resolution'), - 'fps': formats_info[0].get('fps'), - 'vcodec': formats_info[0].get('vcodec'), - 'vbr': formats_info[0].get('vbr'), - 'stretched_ratio': formats_info[0].get('stretched_ratio'), - 'acodec': formats_info[1].get('acodec'), - 'abr': formats_info[1].get('abr'), - 'ext': output_ext, - } - else: - selected_format = None - else: - selected_format = self.select_format(rf, formats) - if selected_format is not None: - formats_to_download.append(selected_format) - break + format_selector = self.build_format_selector(req_format) + formats_to_download = list(format_selector(formats)) if not formats_to_download: raise ExtractorError('requested format not available', expected=True) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index f9529210d..bc218dd71 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -388,6 +388,10 @@ else: pass return _terminal_size(columns, lines) +if sys.version_info >= (3, 0): + from tokenize import tokenize as compat_tokenize_tokenize +else: + from tokenize import generate_tokens as compat_tokenize_tokenize __all__ = [ 'compat_HTTPError', @@ -408,6 +412,7 @@ __all__ = [ 'compat_socket_create_connection', 'compat_str', 'compat_subprocess_get_DEVNULL', + 'compat_tokenize_tokenize', 'compat_urllib_error', 'compat_urllib_parse', 'compat_urllib_parse_unquote', From 5acfa126c812c3ab7088af6c7df79697baee7831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 28 Jun 2015 22:48:02 +0200 Subject: [PATCH 1129/2721] [YoutubeDL] format spec: treat 'all' like a normal specifier So you can use filters with it, for example 'all[width>=400][width<=600]'. --- test/test_YoutubeDL.py | 5 +++++ youtube_dl/YoutubeDL.py | 13 ++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 8f7aef512..709e3100f 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -317,6 +317,11 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'G') + ydl = YDL({'format': 'all[width>=400][width<=600]'}) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['B', 'C', 'D']) + class TestYoutubeDL(unittest.TestCase): def test_subtitles(self): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 17a5407b9..258e612af 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -990,7 +990,10 @@ class YoutubeDL(object): format_spec = selector.selector def selector_function(formats): - if format_spec in ['best', 'worst', None]: + if format_spec == 'all': + for f in formats: + yield f + elif format_spec in ['best', 'worst', None]: format_idx = 0 if format_spec == 'worst' else -1 audiovideo_formats = [ f for f in formats @@ -1226,12 +1229,8 @@ class YoutubeDL(object): req_format_list.append('bestvideo+bestaudio') req_format_list.append('best') req_format = '/'.join(req_format_list) - formats_to_download = [] - if req_format == 'all': - formats_to_download = formats - else: - format_selector = self.build_format_selector(req_format) - formats_to_download = list(format_selector(formats)) + format_selector = self.build_format_selector(req_format) + formats_to_download = list(format_selector(formats)) if not formats_to_download: raise ExtractorError('requested format not available', expected=True) From c4bd188da46a837ddf8f8f8d4766eb799fa2b484 Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Mon, 29 Jun 2015 00:11:31 +0200 Subject: [PATCH 1130/2721] NRK now supports / requires HTTPS Add s? to regexp to support new urls. Update testcases to use HTTPS. --- youtube_dl/extractor/nrk.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index cc70c2950..9e4581cf9 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -13,7 +13,7 @@ from ..utils import ( class NRKIE(InfoExtractor): - _VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P\d+)' + _VALID_URL = r'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P\d+)' _TESTS = [ { @@ -76,7 +76,7 @@ class NRKIE(InfoExtractor): class NRKPlaylistIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P[^/]+)' _TESTS = [{ 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763', @@ -116,11 +116,11 @@ class NRKPlaylistIE(InfoExtractor): class NRKTVIE(InfoExtractor): - _VALID_URL = r'(?Phttp://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P\d+))?' + _VALID_URL = r'(?Phttps?://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P\d+))?' _TESTS = [ { - 'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', + 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 'md5': 'adf2c5454fa2bf032f47a9f8fb351342', 'info_dict': { 'id': 'MUHH48000314', @@ -132,7 +132,7 @@ class NRKTVIE(InfoExtractor): }, }, { - 'url': 'http://tv.nrk.no/program/mdfp15000514', + 'url': 'https://tv.nrk.no/program/mdfp15000514', 'md5': '383650ece2b25ecec996ad7b5bb2a384', 'info_dict': { 'id': 'mdfp15000514', @@ -145,7 +145,7 @@ class NRKTVIE(InfoExtractor): }, { # single playlist video - 'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', + 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', 'md5': 'adbd1dbd813edaf532b0a253780719c2', 'info_dict': { 'id': 'MSPO40010515-part2', @@ -157,7 +157,7 @@ class NRKTVIE(InfoExtractor): 'skip': 'Only works from Norway', }, { - 'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', + 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', 'playlist': [ { 'md5': '9480285eff92d64f06e02a5367970a7a', From bea41c7f3fa4f9072ad2f5354938ab1c8cef0a6d Mon Sep 17 00:00:00 2001 From: corone17 Date: Mon, 29 Jun 2015 00:59:18 +0200 Subject: [PATCH 1131/2721] Update rtlnl.py Better to extract 'http://manifest.us.rtl.nl' from the json, I'd say. And I think it's better to use the default json-url to make it more futureproof. Succesfully tested with tarball. --- youtube_dl/extractor/rtlnl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py index 41d202c28..e708e0093 100644 --- a/youtube_dl/extractor/rtlnl.py +++ b/youtube_dl/extractor/rtlnl.py @@ -51,7 +51,7 @@ class RtlNlIE(InfoExtractor): def _real_extract(self, url): uuid = self._match_id(url) info = self._download_json( - 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid, + 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid, uuid) material = info['material'][0] @@ -60,8 +60,8 @@ class RtlNlIE(InfoExtractor): description = material.get('synopsis') or info['episodes'][0]['synopsis'] # Use unencrypted m3u8 streams (See https://github.com/rg3/youtube-dl/issues/4118) - videopath = material['videopath'].replace('.f4m', '.m3u8') - m3u8_url = 'http://manifest.us.rtl.nl' + videopath + videopath = material['videopath'].replace('adaptive', 'flash') + m3u8_url = info['meta']['videohost'] + videopath formats = self._extract_m3u8_formats(m3u8_url, uuid, ext='mp4') From 738b92632296a9fee3eb7e0c915f6ea6b395125f Mon Sep 17 00:00:00 2001 From: nawl Date: Sun, 28 Jun 2015 17:24:00 -0600 Subject: [PATCH 1132/2721] [hentaistigma] Fix video extractor --- youtube_dl/extractor/hentaistigma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hentaistigma.py b/youtube_dl/extractor/hentaistigma.py index 63d87b74c..225af8cb3 100644 --- a/youtube_dl/extractor/hentaistigma.py +++ b/youtube_dl/extractor/hentaistigma.py @@ -32,7 +32,7 @@ class HentaiStigmaIE(InfoExtractor): wrap_webpage = self._download_webpage(wrap_url, video_id) video_url = self._html_search_regex( - r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, 'video url') + r'file:"([^"]+)"', wrap_webpage, 'video url') return { 'id': video_id, From 0130afb76e5cb6f470f39f127c8d09eea3e82d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 29 Jun 2015 12:42:02 +0200 Subject: [PATCH 1133/2721] [YoutubeDL] format spec: allow grouping specifiers with parentheses --- test/test_YoutubeDL.py | 24 ++++++++++++++++++++++++ youtube_dl/YoutubeDL.py | 39 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 709e3100f..6f374d7ea 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -245,6 +245,30 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(downloaded['format_id'], '137+141') self.assertEqual(downloaded['ext'], 'mp4') + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'}) + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['137+141', '248+141']) + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'}) + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['136+141', '247+141']) + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'}) + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['248+141']) + for f1, f2 in zip(formats_order, formats_order[1:]): info_dict = _make_result([f1, f2], extractor='youtube') ydl = YDL({'format': 'best/bestvideo'}) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 258e612af..e5b46f87e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -920,6 +920,7 @@ class YoutubeDL(object): PICKFIRST = 'PICKFIRST' MERGE = 'MERGE' SINGLE = 'SINGLE' + GROUP = 'GROUP' FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) def _parse_filter(tokens): @@ -942,6 +943,10 @@ class YoutubeDL(object): elif type == tokenize.OP: if string in endwith: break + elif string == ')': + # ')' will be handled by the parentheses group + tokens.restore_last_token() + break if string == ',': selectors.append(current_selector) current_selector = None @@ -955,6 +960,10 @@ class YoutubeDL(object): current_selector = FormatSelector(SINGLE, 'best', []) format_filter = _parse_filter(tokens) current_selector.filters.append(format_filter) + elif string == '(': + if current_selector: + raise syntax_error('Unexpected "("', start) + current_selector = FormatSelector(GROUP, _parse_format_selection(tokens, [')']), []) elif string == '+': video_selector = current_selector audio_selector = _parse_format_selection(tokens, [',']) @@ -977,6 +986,8 @@ class YoutubeDL(object): for format in f(formats): yield format return selector_function + elif selector.type == GROUP: + selector_function = _build_selector_function(selector.selector) elif selector.type == PICKFIRST: fs = [_build_selector_function(s) for s in selector.selector] @@ -1084,8 +1095,32 @@ class YoutubeDL(object): return final_selector stream = io.BytesIO(format_spec.encode('utf-8')) - tokens = compat_tokenize_tokenize(stream.readline) - parsed_selector = _parse_format_selection(tokens) + try: + tokens = list(compat_tokenize_tokenize(stream.readline)) + except tokenize.TokenError: + raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) + + class TokenIterator(object): + def __init__(self, tokens): + self.tokens = tokens + self.counter = 0 + + def __iter__(self): + return self + + def __next__(self): + if self.counter >= len(self.tokens): + raise StopIteration() + value = self.tokens[self.counter] + self.counter += 1 + return value + + next = __next__ + + def restore_last_token(self): + self.counter -= 1 + + parsed_selector = _parse_format_selection(iter(TokenIterator(tokens))) return _build_selector_function(parsed_selector) def _calc_headers(self, info_dict): From cf386750c9194839e419a0412f45f25f28236c77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 29 Jun 2015 22:21:09 +0600 Subject: [PATCH 1134/2721] [hentaistigma] Modernize --- youtube_dl/extractor/hentaistigma.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/hentaistigma.py b/youtube_dl/extractor/hentaistigma.py index 225af8cb3..f5aa73d18 100644 --- a/youtube_dl/extractor/hentaistigma.py +++ b/youtube_dl/extractor/hentaistigma.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -19,20 +17,19 @@ class HentaiStigmaIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_regex( - r'

    ]*>([^<]+)', + r']+class="posttitle"[^>]*>]*>([^<]+)', webpage, 'title') wrap_url = self._html_search_regex( - r'', start_page, 'xml filename') - xml_decription_url = xml_root + 'xml/' + xml_name - xml_description = self._download_xml(xml_decription_url, display_id) + xml_description_url = xml_root + 'xml/' + xml_name + xml_description = self._download_xml(xml_description_url, display_id) video_title = xml_description.find('./metadata/title').text video_formats = self._parse_mp4(xml_description) From ee114368ad0bb9822449295910263a99f9de4e1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 1 Aug 2015 20:22:13 +0600 Subject: [PATCH 1505/2721] [utils] Make value optional for find_xpath_attr This allows selecting particular attributes by name but without specifying the value and similar to xpath syntax `[@attrib]` --- test/test_utils.py | 9 +++++++++ youtube_dl/utils.py | 13 ++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 65692a9fb..a759b2da9 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -235,12 +235,21 @@ class TestUtil(unittest.TestCase): + ''' doc = xml.etree.ElementTree.fromstring(testxml) + self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None) self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None) + self.assertEqual(find_xpath_attr(doc, './/node', 'n'), None) + self.assertEqual(find_xpath_attr(doc, './/node', 'n', 'v'), None) + self.assertEqual(find_xpath_attr(doc, './/node', 'x'), doc[1]) self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1]) + self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'b'), doc[3]) + self.assertEqual(find_xpath_attr(doc, './/node', 'y'), doc[2]) self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) + self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'd'), doc[3]) + self.assertEqual(find_xpath_attr(doc, './/node', 'x', ''), doc[4]) def test_xpath_with_ns(self): testxml = ''' diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 88f9f9070..78dc2b449 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -139,21 +139,24 @@ def write_json_file(obj, fn): if sys.version_info >= (2, 7): - def find_xpath_attr(node, xpath, key, val): + def find_xpath_attr(node, xpath, key, val=None): """ Find the xpath xpath[@key=val] """ assert re.match(r'^[a-zA-Z-]+$', key) - assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val) - expr = xpath + "[@%s='%s']" % (key, val) + if val: + assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val) + expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val)) return node.find(expr) else: - def find_xpath_attr(node, xpath, key, val): + def find_xpath_attr(node, xpath, key, val=None): # Here comes the crazy part: In 2.6, if the xpath is a unicode, # .//node does not match if a node is a direct child of . ! if isinstance(xpath, compat_str): xpath = xpath.encode('ascii') for f in node.findall(xpath): - if f.attrib.get(key) == val: + if key not in f.attrib: + continue + if val is None or f.attrib.get(key) == val: return f return None From 3f125c8c70e8109bc90d4446b40740133e343b85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 1 Aug 2015 21:43:33 +0600 Subject: [PATCH 1506/2721] [nbcnews] Extend _VALID_URL --- youtube_dl/extractor/nbc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index dc2091be0..ccdbfb6c9 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -124,7 +124,7 @@ class NBCSportsIE(InfoExtractor): class NBCNewsIE(InfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ (?:video/.+?/(?P\d+)| - (?:feature|nightly-news)/[^/]+/(?P.+)) + (?:watch|feature|nightly-news)/[^/]+/(?P<title>.+)) ''' _TESTS = [ @@ -169,6 +169,10 @@ class NBCNewsIE(InfoExtractor): 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', }, }, + { + 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952', + 'only_matching': True, + }, ] def _real_extract(self, url): From 55eae65b39d754d699ad9de3f9c99fcdf62e0176 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 2 Aug 2015 00:42:23 +0800 Subject: [PATCH 1507/2721] Credit @cyb3r for the ir90tv extractor --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index aa6b88cc0..d16d34272 100644 --- a/AUTHORS +++ b/AUTHORS @@ -136,3 +136,4 @@ sceext Zach Bruggeman Tjark Saul slangangular +Behrouz Abbasi From a107193e4b7a3d5414dd7422263c34ac0e309ec4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Aug 2015 01:13:21 +0600 Subject: [PATCH 1508/2721] [extractor/common] Extract f4m and m3u8 formats, subtitles and info --- youtube_dl/extractor/common.py | 200 ++++++++++++++++++++++++--------- 1 file changed, 149 insertions(+), 51 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index dc5080504..f9578b838 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -18,6 +18,7 @@ from ..compat import ( compat_HTTPError, compat_http_client, compat_urllib_error, + compat_urllib_parse, compat_urllib_parse_urlparse, compat_urllib_request, compat_urlparse, @@ -37,6 +38,7 @@ from ..utils import ( RegexNotFoundError, sanitize_filename, unescapeHTML, + url_basename, ) @@ -978,69 +980,165 @@ class InfoExtractor(object): self._sort_formats(formats) return formats - # TODO: improve extraction - def _extract_smil_formats(self, smil_url, video_id, fatal=True): - smil = self._download_xml( - smil_url, video_id, 'Downloading SMIL file', - 'Unable to download SMIL file', fatal=fatal) + @staticmethod + def _xpath_ns(path, namespace=None): + if not namespace: + return path + out = [] + for c in path.split('/'): + if not c or c == '.': + out.append(c) + else: + out.append('{%s}%s' % (namespace, c)) + return '/'.join(out) + + def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None): + smil = self._download_smil(smil_url, video_id, fatal=fatal) + if smil is False: assert not fatal return [] - base = smil.find('./head/meta').get('base') + namespace = self._search_regex( + r'{([^}]+)?}smil', smil.tag, 'namespace', default=None) + + return self._parse_smil_formats( + smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) + + def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None): + smil = self._download_smil(smil_url, video_id, fatal=fatal) + if smil is False: + return {} + return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params) + + def _download_smil(self, smil_url, video_id, fatal=True): + return self._download_xml( + smil_url, video_id, 'Downloading SMIL file', + 'Unable to download SMIL file', fatal=fatal) + + def _parse_smil(self, smil, smil_url, video_id, f4m_params=None): + namespace = self._search_regex( + r'{([^}]+)?}smil', smil.tag, 'namespace', default=None) + + formats = self._parse_smil_formats( + smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) + subtitles = self._parse_smil_subtitles(smil, namespace=namespace) + + video_id = os.path.splitext(url_basename(smil_url))[0] + title = None + description = None + for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): + name = meta.attrib.get('name') + content = meta.attrib.get('content') + if not name or not content: + continue + if not title and name == 'title': + title = content + elif not description and name in ('description', 'abstract'): + description = content + + return { + 'id': video_id, + 'title': title or video_id, + 'description': description, + 'formats': formats, + 'subtitles': subtitles, + } + + def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None): + base = smil_url + for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): + b = meta.get('base') or meta.get('httpBase') + if b: + base = b + break formats = [] rtmp_count = 0 - if smil.findall('./body/seq/video'): - video = smil.findall('./body/seq/video')[0] - fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count) - formats.extend(fmts) - else: - for video in smil.findall('./body/switch/video'): - fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count) - formats.extend(fmts) + http_count = 0 + + videos = smil.findall(self._xpath_ns('.//video', namespace)) + for video in videos: + src = video.get('src') + if not src: + continue + + bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) + filesize = int_or_none(video.get('size') or video.get('fileSize')) + width = int_or_none(video.get('width')) + height = int_or_none(video.get('height')) + proto = video.get('proto') + ext = video.get('ext') + src_ext = determine_ext(src) + streamer = video.get('streamer') or base + + if proto == 'rtmp' or streamer.startswith('rtmp'): + rtmp_count += 1 + formats.append({ + 'url': streamer, + 'play_path': src, + 'ext': 'flv', + 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate), + 'tbr': bitrate, + 'filesize': filesize, + 'width': width, + 'height': height, + }) + continue + + src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) + + if proto == 'm3u8' or src_ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src_url, video_id, ext or 'mp4', m3u8_id='hls')) + continue + + if src_ext == 'f4m': + f4m_url = src_url + if not f4m_params: + f4m_params = { + 'hdcore': '3.2.0', + 'plugin': 'flowplayer-3.2.0.1', + } + f4m_url += '&' if '?' in f4m_url else '?' + f4m_url += compat_urllib_parse.urlencode(f4m_params).encode('utf-8') + formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds')) + continue + + if src_url.startswith('http'): + http_count += 1 + formats.append({ + 'url': src_url, + 'ext': ext or src_ext or 'flv', + 'format_id': 'http-%d' % (bitrate or http_count), + 'tbr': bitrate, + 'filesize': filesize, + 'width': width, + 'height': height, + }) + continue self._sort_formats(formats) return formats - def _parse_smil_video(self, video, video_id, base, rtmp_count): - src = video.get('src') - if not src: - return [], rtmp_count - bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) - width = int_or_none(video.get('width')) - height = int_or_none(video.get('height')) - proto = video.get('proto') - if not proto: - if base: - if base.startswith('rtmp'): - proto = 'rtmp' - elif base.startswith('http'): - proto = 'http' - ext = video.get('ext') - if proto == 'm3u8': - return self._extract_m3u8_formats(src, video_id, ext), rtmp_count - elif proto == 'rtmp': - rtmp_count += 1 - streamer = video.get('streamer') or base - return ([{ - 'url': streamer, - 'play_path': src, - 'ext': 'flv', - 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate), - 'tbr': bitrate, - 'width': width, - 'height': height, - }], rtmp_count) - elif proto.startswith('http'): - return ([{ - 'url': base + src, - 'ext': ext or 'flv', - 'tbr': bitrate, - 'width': width, - 'height': height, - }], rtmp_count) + def _parse_smil_subtitles(self, smil, namespace=None): + subtitles = {} + for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))): + src = textstream.get('src') + if not src: + continue + ext = textstream.get('ext') or determine_ext(src) + if not ext: + type_ = textstream.get('type') + if type_ == 'text/srt': + ext = 'srt' + lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') + subtitles.setdefault(lang, []).append({ + 'url': src, + 'ext': ext, + }) + return subtitles def _live_title(self, name): """ Generate the title for a live video """ From e5e8d20a3a65832c74b002f247866fcbb92e9246 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Aug 2015 01:13:59 +0600 Subject: [PATCH 1509/2721] [extractor/generic] Improve generic SMIL detection --- youtube_dl/extractor/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8cef61c3c..6900ed96f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1110,11 +1110,13 @@ class GenericIE(InfoExtractor): self.report_extraction(video_id) - # Is it an RSS feed? + # Is it an RSS feed or a SMIL file? try: doc = parse_xml(webpage) if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) + elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): + return self._parse_smil(doc, url, video_id) except compat_xml_parse_error: pass From 308cfe0ab3ec7122602ba2d6a4e3acd2caa7a757 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Aug 2015 01:14:41 +0600 Subject: [PATCH 1510/2721] [test_downloader] Respect --force-generic-extractor --- test/test_download.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_download.py b/test/test_download.py index 1110357a7..284418834 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -136,7 +136,9 @@ def generator(test_case): # We're not using .download here sine that is just a shim # for outside error handling, and returns the exit code # instead of the result dict. - res_dict = ydl.extract_info(test_case['url']) + res_dict = ydl.extract_info( + test_case['url'], + force_generic_extractor=params.get('force_generic_extractor', False)) except (DownloadError, ExtractorError) as err: # Check if the exception is not a network related one if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503): From 645f814544f9d40386e504a1eb8cf3558f2c109e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Aug 2015 01:15:33 +0600 Subject: [PATCH 1511/2721] [test/helper] Allow dicts for mincount --- test/helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/helper.py b/test/helper.py index e1129e58f..c8b34654d 100644 --- a/test/helper.py +++ b/test/helper.py @@ -133,8 +133,8 @@ def expect_info_dict(self, got_dict, expected_dict): elif isinstance(expected, compat_str) and expected.startswith('mincount:'): got = got_dict.get(info_field) self.assertTrue( - isinstance(got, list), - 'Expected field %s to be a list, but it is of type %s' % ( + isinstance(got, (list, dict)), + 'Expected field %s to be a list or a dict, but it is of type %s' % ( info_field, type(got).__name__)) expected_num = int(expected.partition(':')[2]) assertGreaterEqual( From 8765222d2211cd6f2a40611249181af0bbb2d531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Aug 2015 01:16:21 +0600 Subject: [PATCH 1512/2721] [extractor/generic] Add generic SMIL tests --- youtube_dl/extractor/generic.py | 68 +++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6900ed96f..27584c44c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -130,6 +130,74 @@ class GenericIE(InfoExtractor): 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } }, + # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng + { + 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml', + 'info_dict': { + 'id': 'smil', + 'ext': 'mp4', + 'title': 'Automatics, robotics and biocybernetics', + 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482', + 'formats': 'mincount:16', + 'subtitles': 'mincount:1', + }, + 'params': { + 'force_generic_extractor': True, + 'skip_download': True, + }, + }, + # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html + { + 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil', + 'info_dict': { + 'id': 'hds', + 'ext': 'flv', + 'title': 'hds', + 'formats': 'mincount:1', + }, + 'params': { + 'skip_download': True, + }, + }, + # SMIL from https://www.restudy.dk/video/play/id/1637 + { + 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml', + 'info_dict': { + 'id': 'video_1637', + 'ext': 'flv', + 'title': 'video_1637', + 'formats': 'mincount:3', + }, + 'params': { + 'skip_download': True, + }, + }, + # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm + { + 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil', + 'info_dict': { + 'id': 'smil-service', + 'ext': 'flv', + 'title': 'smil-service', + 'formats': 'mincount:1', + }, + 'params': { + 'skip_download': True, + }, + }, + # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370 + { + 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil', + 'info_dict': { + 'id': '4719370', + 'ext': 'mp4', + 'title': '571de1fd-47bc-48db-abf9-238872a58d1f', + 'formats': 'mincount:3', + }, + 'params': { + 'skip_download': True, + }, + }, # google redirect { 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', From 41c3a5a7beebbf5f60c5edb5093d564f0829c5c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Aug 2015 01:20:49 +0600 Subject: [PATCH 1513/2721] [extractor/common] Fix python 3 --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f9578b838..c123d9fca 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1101,7 +1101,7 @@ class InfoExtractor(object): 'plugin': 'flowplayer-3.2.0.1', } f4m_url += '&' if '?' in f4m_url else '?' - f4m_url += compat_urllib_parse.urlencode(f4m_params).encode('utf-8') + f4m_url += compat_urllib_parse.urlencode(f4m_params) formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds')) continue From 17712eeb1933f53696c1fc53606174e988a96472 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Aug 2015 01:31:17 +0600 Subject: [PATCH 1514/2721] [extractor/common] Extract namespace parse routine --- youtube_dl/extractor/common.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c123d9fca..717dcec7b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -999,8 +999,7 @@ class InfoExtractor(object): assert not fatal return [] - namespace = self._search_regex( - r'{([^}]+)?}smil', smil.tag, 'namespace', default=None) + namespace = self._parse_smil_namespace(smil) return self._parse_smil_formats( smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) @@ -1017,8 +1016,7 @@ class InfoExtractor(object): 'Unable to download SMIL file', fatal=fatal) def _parse_smil(self, smil, smil_url, video_id, f4m_params=None): - namespace = self._search_regex( - r'{([^}]+)?}smil', smil.tag, 'namespace', default=None) + namespace = self._parse_smil_namespace(smil) formats = self._parse_smil_formats( smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) @@ -1045,6 +1043,10 @@ class InfoExtractor(object): 'subtitles': subtitles, } + def _parse_smil_namespace(self, smil): + return self._search_regex( + r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None) + def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None): base = smil_url for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): From fa7a1cc5ef52a8dd9a355ab37a74be55ac2ddc1f Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Wed, 22 Jul 2015 12:34:42 +0100 Subject: [PATCH 1515/2721] [screenwavemedia] fix info extraction (fixes #6270) Closes #6330. --- youtube_dl/extractor/screenwavemedia.py | 84 +++++++++++-------------- 1 file changed, 36 insertions(+), 48 deletions(-) diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index d1ab66b32..09c085dcf 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -1,12 +1,11 @@ # encoding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( int_or_none, unified_strdate, + js_to_json, ) @@ -22,59 +21,48 @@ class ScreenwaveMediaIE(InfoExtractor): video_id = self._match_id(url) playerdata = self._download_webpage( - 'http://player.screenwavemedia.com/play/player.php?id=%s' % video_id, + 'http://player.screenwavemedia.com/player.php?id=%s' % video_id, video_id, 'Downloading player webpage') vidtitle = self._search_regex( r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/') - vidurl = self._search_regex( - r'\'vidurl\'\s*:\s*"([^"]+)"', playerdata, 'vidurl').replace('\\/', '/') - videolist_url = None + playerconfig = self._download_webpage( + 'http://player.screenwavemedia.com/player.js', + video_id, 'Downloading playerconfig webpage') - mobj = re.search(r"'videoserver'\s*:\s*'(?P<videoserver>[^']+)'", playerdata) - if mobj: - videoserver = mobj.group('videoserver') - mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata) - vidid = mobj.group('vidid') if mobj else video_id - videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid) - else: - mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata) - if mobj: - videolist_url = mobj.group('smil') + videoserver = self._search_regex(r"'videoserver'\s*:\s*'([^']+)", playerconfig, 'videoserver') - if videolist_url: - videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML') - formats = [] - baseurl = vidurl[:vidurl.rfind('/') + 1] - for video in videolist.findall('.//video'): - src = video.get('src') - if not src: - continue - file_ = src.partition(':')[-1] - width = int_or_none(video.get('width')) - height = int_or_none(video.get('height')) - bitrate = int_or_none(video.get('system-bitrate'), scale=1000) - format = { - 'url': baseurl + file_, - 'format_id': src.rpartition('.')[0].rpartition('_')[-1], - } - if width or height: - format.update({ - 'tbr': bitrate, - 'width': width, - 'height': height, - }) - else: - format.update({ - 'abr': bitrate, - 'vcodec': 'none', - }) - formats.append(format) - else: - formats = [{ - 'url': vidurl, - }] + sources = self._parse_json( + js_to_json( + self._search_regex( + r"sources\s*:\s*(\[[^\]]+?\])", playerconfig, + 'sources', + ).replace( + "' + thisObj.options.videoserver + '", + videoserver + ).replace( + "' + playerVidId + '", + video_id + ) + ), + video_id + ) + + formats = [] + for source in sources: + if source['type'] == 'hls': + formats.extend(self._extract_m3u8_formats(source['file'], video_id)) + else: + format_label = source.get('label') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]', format_label, 'height', default=None)) + formats.append({ + 'url': source['file'], + 'format': format_label, + 'ext': source.get('type'), + 'height': height, + }) self._sort_formats(formats) return { From 9cc93c64aa321260475a2bdf7d8626cdd16bf8ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 1 Aug 2015 22:15:43 +0200 Subject: [PATCH 1516/2721] [screenwavemedia] Use the IP for the videoserver (fixes #6397) For http://cinemassacre.com/2015/07/28/avgn-seaman-for-dreamcast/ the other server returns a 403 error. --- youtube_dl/extractor/screenwavemedia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index 09c085dcf..3bc84989e 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -31,7 +31,7 @@ class ScreenwaveMediaIE(InfoExtractor): 'http://player.screenwavemedia.com/player.js', video_id, 'Downloading playerconfig webpage') - videoserver = self._search_regex(r"'videoserver'\s*:\s*'([^']+)", playerconfig, 'videoserver') + videoserver = self._search_regex(r"\[ipaddress\]\s*=>\s*([\d\.]+)", playerdata, 'videoserver') sources = self._parse_json( js_to_json( From cdc682d5a467b7188eb13b5eeb76eb5dd544d1f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Aug 2015 04:21:16 +0600 Subject: [PATCH 1517/2721] [nowtv] Fix extraction (Closes #6357) --- youtube_dl/extractor/nowtv.py | 63 +++++++++++++++-------------------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index 0b5ff4760..de6bc6d96 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( ExtractorError, + determine_ext, int_or_none, parse_iso8601, parse_duration, @@ -15,7 +16,7 @@ from ..utils import ( class NowTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?P<station>rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/player' + _VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/player' _TESTS = [{ # rtl @@ -23,7 +24,7 @@ class NowTVIE(InfoExtractor): 'info_dict': { 'id': '203519', 'display_id': 'bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Die neuen Bauern und eine Hochzeit', 'description': 'md5:e234e1ed6d63cf06be5c070442612e7e', 'thumbnail': 're:^https?://.*\.jpg$', @@ -32,7 +33,7 @@ class NowTVIE(InfoExtractor): 'duration': 2786, }, 'params': { - # m3u8 download + # rtmp download 'skip_download': True, }, }, { @@ -41,7 +42,7 @@ class NowTVIE(InfoExtractor): 'info_dict': { 'id': '203481', 'display_id': 'berlin-tag-nacht/berlin-tag-nacht-folge-934', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Berlin - Tag & Nacht (Folge 934)', 'description': 'md5:c85e88c2e36c552dfe63433bc9506dd0', 'thumbnail': 're:^https?://.*\.jpg$', @@ -50,7 +51,7 @@ class NowTVIE(InfoExtractor): 'duration': 2641, }, 'params': { - # m3u8 download + # rtmp download 'skip_download': True, }, }, { @@ -59,7 +60,7 @@ class NowTVIE(InfoExtractor): 'info_dict': { 'id': '165780', 'display_id': 'alarm-fuer-cobra-11-die-autobahnpolizei/hals-und-beinbruch-2014-08-23-21-10-00', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Hals- und Beinbruch', 'description': 'md5:b50d248efffe244e6f56737f0911ca57', 'thumbnail': 're:^https?://.*\.jpg$', @@ -68,7 +69,7 @@ class NowTVIE(InfoExtractor): 'duration': 2742, }, 'params': { - # m3u8 download + # rtmp download 'skip_download': True, }, }, { @@ -77,7 +78,7 @@ class NowTVIE(InfoExtractor): 'info_dict': { 'id': '99205', 'display_id': 'medicopter-117/angst', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Angst!', 'description': 'md5:30cbc4c0b73ec98bcd73c9f2a8c17c4e', 'thumbnail': 're:^https?://.*\.jpg$', @@ -86,7 +87,7 @@ class NowTVIE(InfoExtractor): 'duration': 3025, }, 'params': { - # m3u8 download + # rtmp download 'skip_download': True, }, }, { @@ -95,7 +96,7 @@ class NowTVIE(InfoExtractor): 'info_dict': { 'id': '203521', 'display_id': 'ratgeber-geld/thema-ua-der-erste-blick-die-apple-watch', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Thema u.a.: Der erste Blick: Die Apple Watch', 'description': 'md5:4312b6c9d839ffe7d8caf03865a531af', 'thumbnail': 're:^https?://.*\.jpg$', @@ -104,7 +105,7 @@ class NowTVIE(InfoExtractor): 'duration': 1083, }, 'params': { - # m3u8 download + # rtmp download 'skip_download': True, }, }, { @@ -113,7 +114,7 @@ class NowTVIE(InfoExtractor): 'info_dict': { 'id': '128953', 'display_id': 'der-hundeprofi/buero-fall-chihuahua-joel', - 'ext': 'mp4', + 'ext': 'flv', 'title': "Büro-Fall / Chihuahua 'Joel'", 'description': 'md5:e62cb6bf7c3cc669179d4f1eb279ad8d', 'thumbnail': 're:^https?://.*\.jpg$', @@ -122,15 +123,13 @@ class NowTVIE(InfoExtractor): 'duration': 3092, }, 'params': { - # m3u8 download + # rtmp download 'skip_download': True, }, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('id') - station = mobj.group('station') + display_id = self._match_id(url) info = self._download_json( 'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id, @@ -148,29 +147,19 @@ class NowTVIE(InfoExtractor): raise ExtractorError( 'Video %s is not available for free' % video_id, expected=True) - f = info.get('format', {}) - station = f.get('station') or station - - STATIONS = { - 'rtl': 'rtlnow', - 'rtl2': 'rtl2now', - 'vox': 'voxnow', - 'nitro': 'rtlnitronow', - 'ntv': 'n-tvnow', - 'superrtl': 'superrtlnow' - } - formats = [] for item in files['items']: - item_path = remove_start(item['path'], '/') - tbr = int_or_none(item['bitrate']) - m3u8_url = 'http://hls.fra.%s.de/hls-vod-enc/%s.m3u8' % (STATIONS[station], item_path) - m3u8_url = m3u8_url.replace('now/', 'now/videos/') + if determine_ext(item['path']) != 'f4v': + continue + app, play_path = remove_start(item['path'], '/').split('/', 1) formats.append({ - 'url': m3u8_url, - 'format_id': '%s-%sk' % (item['id'], tbr), - 'ext': 'mp4', - 'tbr': tbr, + 'url': 'rtmpe://fms.rtl.de', + 'app': app, + 'play_path': 'mp4:%s' % play_path, + 'ext': 'flv', + 'page_url': url, + 'player_url': 'http://rtl-now.rtl.de/includes/nc_player.swf', + 'tbr': int_or_none(item.get('bitrate')), }) self._sort_formats(formats) @@ -178,6 +167,8 @@ class NowTVIE(InfoExtractor): description = info.get('articleLong') or info.get('articleShort') timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ') duration = parse_duration(info.get('duration')) + + f = info.get('format', {}) thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo') return { From e422d7f4f78994de8483d2207ab4e00174a2408c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Aug 2015 04:26:59 +0600 Subject: [PATCH 1518/2721] [nowtv] Expand _VALID_URL --- youtube_dl/extractor/nowtv.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index de6bc6d96..11ce37168 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -16,7 +16,7 @@ from ..utils import ( class NowTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/player' + _VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)' _TESTS = [{ # rtl @@ -126,6 +126,9 @@ class NowTVIE(InfoExtractor): # rtmp download 'skip_download': True, }, + }, { + 'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview', + 'only_matching': True, }] def _real_extract(self, url): From d41d04c0f513ad3b83ab6aee60cf2201710b6063 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 2 Aug 2015 06:35:35 +0600 Subject: [PATCH 1519/2721] [videolectures] Fix _VALID_URL --- youtube_dl/extractor/videolecturesnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/videolecturesnet.py b/youtube_dl/extractor/videolecturesnet.py index d6a7eb203..24584dc80 100644 --- a/youtube_dl/extractor/videolecturesnet.py +++ b/youtube_dl/extractor/videolecturesnet.py @@ -12,7 +12,7 @@ from ..utils import ( class VideoLecturesNetIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/' + _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)(?:/?[#?].*)?$' IE_NAME = 'videolectures.net' _TEST = { From 5c45bbe57bd791debfd64052ab030298a7c6b718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 2 Aug 2015 15:19:30 +0200 Subject: [PATCH 1520/2721] [nowtv] Remove unused import --- youtube_dl/extractor/nowtv.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index 11ce37168..ad938fb62 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( From 25a4c5a9ed59eca0241922363e83e61172527658 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 2 Aug 2015 15:19:57 +0200 Subject: [PATCH 1521/2721] [dailymotion:playlist] Use an iterator for the entries So that using '--playlist-end' only downloads the required pages (reported in #2175). --- youtube_dl/extractor/dailymotion.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 85d945509..2d90b2224 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -15,7 +15,6 @@ from ..utils import ( ExtractorError, determine_ext, int_or_none, - orderedSet, parse_iso8601, str_to_int, unescapeHTML, @@ -278,7 +277,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): }] def _extract_entries(self, id): - video_ids = [] + video_ids = set() processed_urls = set() for pagenum in itertools.count(1): page_url = self._PAGE_TEMPLATE % (id, pagenum) @@ -291,12 +290,13 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): processed_urls.add(urlh.geturl()) - video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage)) + for video_id in re.findall(r'data-xid="(.+?)"', webpage): + if video_id not in video_ids: + yield self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') + video_ids.add(video_id) if re.search(self._MORE_PAGES_INDICATOR, webpage) is None: break - return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion') - for video_id in orderedSet(video_ids)] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From d7d2a9a3dbf1cef78c5085a4aab5d2f336c64cff Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Sun, 2 Aug 2015 03:28:04 +0100 Subject: [PATCH 1522/2721] [utils] restart download if server does not support byte ranges --- youtube_dl/downloader/http.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index b7f144af9..b2e82cfde 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -57,6 +57,20 @@ class HttpFD(FileDownloader): # Establish connection try: data = self.ydl.urlopen(request) + + if resume_len > 0: + content_range = data.headers.get('Content-Range') + if content_range: + content_range_m = re.search(r'bytes (\d+)-', content_range) + if content_range_m: + # Content-Range is correct - go on + if resume_len == int(content_range_m.group(1)): + break + + # Content-Range is invalid - wipe the file and do entire redownload + resume_len = 0 + open_mode = 'wb' + break except (compat_urllib_error.HTTPError, ) as err: if (err.code < 500 or err.code >= 600) and err.code != 416: From 8d5b8b477e4b1051482b21ea451f0de1ce23bce7 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Sun, 2 Aug 2015 03:58:02 +0100 Subject: [PATCH 1523/2721] [utils] import re --- youtube_dl/downloader/http.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index b2e82cfde..f796ee113 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -4,6 +4,7 @@ import errno import os import socket import time +import re from .common import FileDownloader from ..compat import ( From c3124c3085e6a9a83ee31ace3a7d528a324c42da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 3 Aug 2015 02:25:08 +0600 Subject: [PATCH 1524/2721] [downloader/http] Simplify --- youtube_dl/downloader/http.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index f796ee113..0862e90bb 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -58,20 +58,16 @@ class HttpFD(FileDownloader): # Establish connection try: data = self.ydl.urlopen(request) - if resume_len > 0: content_range = data.headers.get('Content-Range') if content_range: content_range_m = re.search(r'bytes (\d+)-', content_range) - if content_range_m: - # Content-Range is correct - go on - if resume_len == int(content_range_m.group(1)): - break - + # Content-Range is correct - go on + if content_range_m and resume_len == int(content_range_m.group(1)): + break # Content-Range is invalid - wipe the file and do entire redownload resume_len = 0 open_mode = 'wb' - break except (compat_urllib_error.HTTPError, ) as err: if (err.code < 500 or err.code >= 600) and err.code != 416: From 10eaa8ef1d2a9699052af9262aa472456548e99b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 3 Aug 2015 02:25:40 +0600 Subject: [PATCH 1525/2721] [downloader/http] Report unable to resume --- youtube_dl/downloader/http.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 0862e90bb..2f8490f02 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -66,6 +66,7 @@ class HttpFD(FileDownloader): if content_range_m and resume_len == int(content_range_m.group(1)): break # Content-Range is invalid - wipe the file and do entire redownload + self.report_unable_to_resume() resume_len = 0 open_mode = 'wb' break From 84bc4dcb0f678f0a8c9f993e101b9769e3959f76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 3 Aug 2015 02:27:47 +0600 Subject: [PATCH 1526/2721] [downloader/http] Clarify rationale for Content-Range check (#6426) --- youtube_dl/downloader/http.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 2f8490f02..a29f5cf31 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -58,14 +58,21 @@ class HttpFD(FileDownloader): # Establish connection try: data = self.ydl.urlopen(request) + # When trying to resume, Content-Range HTTP header of response has to be checked + # to match the value of requested Range HTTP header. This is due to a webservers + # that don't support resuming and serve a whole file with no Content-Range + # set in response despite of requested Range (see + # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799) if resume_len > 0: content_range = data.headers.get('Content-Range') if content_range: content_range_m = re.search(r'bytes (\d+)-', content_range) - # Content-Range is correct - go on - if content_range_m and resume_len == int(content_range_m.group(1)): - break - # Content-Range is invalid - wipe the file and do entire redownload + # Content-Range is present and matches requested Range, resume is possible + if content_range_m and resume_len == int(content_range_m.group(1)): + break + # Content-Range is either not present or invalid. Assuming remote webserver is + # trying to send the whole file, resume is not possible, so wiping the local file + # and performing entire redownload self.report_unable_to_resume() resume_len = 0 open_mode = 'wb' From 754e70cf3e74218ae5d840985fbf07bbe274332a Mon Sep 17 00:00:00 2001 From: George Brighton <george@gebn.co.uk> Date: Sun, 2 Aug 2015 19:21:10 +0100 Subject: [PATCH 1527/2721] [pornhub] Fix video url regular expression. PornHub seems to have subtly changed their JavaScript. Before, video URL strings were embedded directly in the video's `flashvars_*` object, but they are now assigned to variables of the form `player_quality_*`, which are then added to this object later under the relevant quality key. --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 0b7886840..fbaa830d6 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -81,7 +81,7 @@ class PornHubIE(InfoExtractor): comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') - video_urls = list(map(compat_urllib_parse_unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) + video_urls = list(map(compat_urllib_parse_unquote, re.findall(r"var player_quality_[0-9]{3}p = '([^']+)'", webpage))) if webpage.find('"encrypted":true') != -1: password = compat_urllib_parse_unquote_plus( self._search_regex(r'"video_title":"([^"]+)', webpage, 'password')) From 524229a2975c20887a9a71cae77132e775003537 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 3 Aug 2015 02:41:17 +0600 Subject: [PATCH 1528/2721] [pornhub] Improve --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index fbaa830d6..fec493046 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -81,7 +81,7 @@ class PornHubIE(InfoExtractor): comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') - video_urls = list(map(compat_urllib_parse_unquote, re.findall(r"var player_quality_[0-9]{3}p = '([^']+)'", webpage))) + video_urls = list(map(compat_urllib_parse_unquote, re.findall(r"player_quality_[0-9]{3}p\s*=\s*'([^']+)'", webpage))) if webpage.find('"encrypted":true') != -1: password = compat_urllib_parse_unquote_plus( self._search_regex(r'"video_title":"([^"]+)', webpage, 'password')) From 51a575159a5a83e4477b03544f419dcf2e9ff0fa Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Sun, 2 Aug 2015 22:52:12 +0100 Subject: [PATCH 1529/2721] [facebook] extract uploader --- youtube_dl/extractor/facebook.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index e17bb9aea..734de4da2 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -17,6 +17,8 @@ from ..utils import ( int_or_none, limit_length, urlencode_postdata, + get_element_by_id, + clean_html, ) @@ -161,6 +163,7 @@ class FacebookIE(InfoExtractor): video_title = limit_length(video_title, 80) if not video_title: video_title = 'Facebook video #%s' % video_id + uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage)) return { 'id': video_id, @@ -168,4 +171,5 @@ class FacebookIE(InfoExtractor): 'formats': formats, 'duration': int_or_none(video_data.get('video_duration')), 'thumbnail': video_data.get('thumbnail_src'), + 'uploader': uploader, } From 67b8a28a2f69764259cf2e90c0a3785c05c55551 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Mon, 3 Aug 2015 00:09:21 +0100 Subject: [PATCH 1530/2721] [facebook] add uploader value to the tests --- youtube_dl/extractor/facebook.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 734de4da2..178a7ca4c 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -44,6 +44,7 @@ class FacebookIE(InfoExtractor): 'id': '637842556329505', 'ext': 'mp4', 'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam', + 'uploader': 'Tennis on Facebook', } }, { 'note': 'Video without discernible title', @@ -52,6 +53,7 @@ class FacebookIE(InfoExtractor): 'id': '274175099429670', 'ext': 'mp4', 'title': 'Facebook video #274175099429670', + 'uploader': 'Asif Nawab Butt', }, 'expected_warnings': [ 'title' From 8de922724b8f3ad31ff7249799de371ff8a5c3ad Mon Sep 17 00:00:00 2001 From: "Sergey M." <dstftw@gmail.com> Date: Mon, 3 Aug 2015 05:36:17 +0600 Subject: [PATCH 1531/2721] [README.md] Clarify using cookies --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index ac54d7b67..2db3139ee 100644 --- a/README.md +++ b/README.md @@ -439,6 +439,12 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt youtube-dl -- -wNyEUrxzFU youtube-dl "http://www.youtube.com/watch?v=-wNyEUrxzFU" +### How do I pass cookies to youtube-dl? + +Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Note that cookies file must be in Mozilla/Netscape format and the first line of cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in cookies file and convert newlines if necessary to correspond your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. + +Passing cookies to youtube-dl is a good way to workaround login when particular extractor does not implement it explicitly. + ### Can you add support for this anime video site, or site which shows current movies for free? As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl. From 47a8b7c14a085ce558db3b5a85ded850cd5df642 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 3 Aug 2015 12:00:08 +0200 Subject: [PATCH 1532/2721] [mdr] Change XPath to make it work in python 2.6 (fixes #6443) The 'progressiveDownloadUrl' element is a direct child, so they should be equivalent. --- youtube_dl/extractor/mdr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py index 5fdd19027..fc7499958 100644 --- a/youtube_dl/extractor/mdr.py +++ b/youtube_dl/extractor/mdr.py @@ -29,7 +29,7 @@ class MDRIE(InfoExtractor): doc = self._download_xml(domain + xmlurl, video_id) formats = [] for a in doc.findall('./assets/asset'): - url_el = a.find('.//progressiveDownloadUrl') + url_el = a.find('./progressiveDownloadUrl') if url_el is None: continue abr = int(a.find('bitrateAudio').text) // 1000 From 8f5639afcbb967f276fb8b35a24559cdcc3b6d32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 3 Aug 2015 19:37:48 +0600 Subject: [PATCH 1533/2721] [pornhub] Improve video quality regex --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index fec493046..7b0cdc41a 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -94,7 +94,7 @@ class PornHubIE(InfoExtractor): format = path.split('/')[5].split('_')[:2] format = "-".join(format) - m = re.match(r'^(?P<height>[0-9]+)P-(?P<tbr>[0-9]+)K$', format) + m = re.match(r'^(?P<height>[0-9]+)[pP]-(?P<tbr>[0-9]+)[kK]$', format) if m is None: height = None tbr = None From e704f87f869b98bbed56d7dd0fe27710306c8272 Mon Sep 17 00:00:00 2001 From: Niklas Haas <git@nand.wakku.to> Date: Mon, 3 Aug 2015 01:54:21 +0200 Subject: [PATCH 1534/2721] [twitch] Parse start_time from 't' (closes #6441) Eg. for VOD links like http://www.twitch.tv/gamesdonequick/v/9136645?t=14h29m15s --- youtube_dl/extractor/twitch.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 73ce335b7..a2b6a35aa 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -7,12 +7,15 @@ import random from .common import InfoExtractor from ..compat import ( + compat_parse_qs, compat_str, compat_urllib_parse, + compat_urllib_parse_urlparse, compat_urllib_request, ) from ..utils import ( ExtractorError, + parse_duration, parse_iso8601, ) @@ -185,7 +188,7 @@ class TwitchVodIE(TwitchItemBaseIE): _ITEM_SHORTCUT = 'v' _TEST = { - 'url': 'http://www.twitch.tv/riotgames/v/6528877', + 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s', 'info_dict': { 'id': 'v6528877', 'ext': 'mp4', @@ -197,6 +200,7 @@ class TwitchVodIE(TwitchItemBaseIE): 'uploader': 'Riot Games', 'uploader_id': 'riotgames', 'view_count': int, + 'start_time': 310, }, 'params': { # m3u8 download @@ -216,6 +220,12 @@ class TwitchVodIE(TwitchItemBaseIE): item_id, 'mp4') self._prefer_source(formats) info['formats'] = formats + + parsed_url = compat_urllib_parse_urlparse(url) + query = compat_parse_qs(parsed_url.query) + if 't' in query: + info['start_time'] = parse_duration(query['t'][0]) + return info From d96d604e5311628ece0234733dbbfe73a58c8d18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 3 Aug 2015 23:04:11 +0200 Subject: [PATCH 1535/2721] YoutubeDL: format spec: don't accept a bare '/' (#6124) --- test/test_YoutubeDL.py | 1 + youtube_dl/YoutubeDL.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 20f45f439..9a3c28f8c 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -311,6 +311,7 @@ class TestFormatSelection(unittest.TestCase): assert_syntax_error('bestvideo,,best') assert_syntax_error('+bestaudio') assert_syntax_error('bestvideo+') + assert_syntax_error('/') def test_format_filtering(self): formats = [ diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index efa3254ce..c608ff91a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -960,6 +960,8 @@ class YoutubeDL(object): selectors.append(current_selector) current_selector = None elif string == '/': + if not current_selector: + raise syntax_error('"/" must follow a format selector', start) first_choice = current_selector second_choice = _parse_format_selection(tokens, inside_choice=True) current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), []) From a346b1ff57a94382e80fd4edd5a6d4b91a7cb45e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 4 Aug 2015 20:44:22 +0600 Subject: [PATCH 1536/2721] [bbc] Add support for vxp-playlist-data embeds (Closes #6453) --- youtube_dl/extractor/bbc.py | 45 ++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 9a1b6e3dc..abc5a44a1 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -526,6 +526,18 @@ class BBCIE(BBCCoUkIE): 'params': { 'skip_download': True, } + }, { + # single video from video playlist embedded with vxp-playlist-data JSON + 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376', + 'info_dict': { + 'id': 'p02w6qjc', + 'ext': 'mp4', + 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''', + 'duration': 56, + }, + 'params': { + 'skip_download': True, + } }, { # single video story with digitalData 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret', @@ -695,13 +707,36 @@ class BBCIE(BBCCoUkIE): if not medias: # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international) - media_asset_page = self._parse_json( + media_asset = self._search_regex( + r'mediaAssetPage\.init\(\s*({.+?}), "/', + webpage, 'media asset', default=None) + if media_asset: + media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False) + medias = [] + for video in media_asset_page.get('videos', {}).values(): + medias.extend(video.values()) + + if not medias: + # Multiple video playlist with single `now playing` entry (e.g. + # http://www.bbc.com/news/video_and_audio/must_see/33767813) + vxp_playlist = self._parse_json( self._search_regex( - r'mediaAssetPage\.init\(\s*({.+?}), "/', webpage, 'media asset'), + r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>', + webpage, 'playlist data'), playlist_id) - medias = [] - for video in media_asset_page.get('videos', {}).values(): - medias.extend(video.values()) + playlist_medias = [] + for item in vxp_playlist: + media = item.get('media') + if not media: + continue + playlist_medias.append(media) + # Download single video if found media with asset id matching the video id from URL + if item.get('advert', {}).get('assetId') == playlist_id: + medias = [media] + break + # Fallback to the whole playlist + if not medias: + medias = playlist_medias entries = [] for num, media_meta in enumerate(medias, start=1): From 232541df441741d3d55605f03e28ec3c34249a5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 4 Aug 2015 22:29:23 +0200 Subject: [PATCH 1537/2721] [YoutubeDL] format spec: correctly handle dashes and other unused operators 'mp4-baseline-16x9' must be handled as a single string, but the '-' was treated as an operator. --- test/test_YoutubeDL.py | 6 ++++++ youtube_dl/YoutubeDL.py | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 9a3c28f8c..0388c0bf3 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -105,6 +105,7 @@ class TestFormatSelection(unittest.TestCase): def test_format_selection(self): formats = [ {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, + {'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL}, {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL}, {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL}, {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL}, @@ -136,6 +137,11 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], '35') + ydl = YDL({'format': 'example-with-dashes'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'example-with-dashes') + def test_format_selection_audio(self): formats = [ {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c608ff91a..1446b3254 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -933,6 +933,37 @@ class YoutubeDL(object): else: filter_parts.append(string) + def _remove_unused_ops(tokens): + # Remove operators that we don't use and join them with the sourrounding strings + # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' + ALLOWED_OPS = ('/', '+', ',', '(', ')') + last_string, last_start, last_end, last_line = None, None, None, None + for type, string, start, end, line in tokens: + if type == tokenize.OP and string == '[': + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string, start, end, line + # everything inside brackets will be handled by _parse_filter + for type, string, start, end, line in tokens: + yield type, string, start, end, line + if type == tokenize.OP and string == ']': + break + elif type == tokenize.OP and string in ALLOWED_OPS: + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string, start, end, line + elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: + if not last_string: + last_string = string + last_start = start + last_end = end + else: + last_string += string + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): selectors = [] current_selector = None @@ -1111,7 +1142,7 @@ class YoutubeDL(object): stream = io.BytesIO(format_spec.encode('utf-8')) try: - tokens = list(compat_tokenize_tokenize(stream.readline)) + tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) except tokenize.TokenError: raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) From 8a5601e42f6974e6694f01089b4c7e014b6a1b7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 5 Aug 2015 19:52:04 +0600 Subject: [PATCH 1538/2721] [lynda] Fix login (Closes #6462) --- youtube_dl/extractor/lynda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index a00f6e5e5..39214de2f 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -17,7 +17,7 @@ from ..utils import ( class LyndaBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' - _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true' + _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn\s*:\s*true' _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' _NETRC_MACHINE = 'lynda' From 5b7dab2dd640c93ec0f63ca8b901e701679a4c7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 5 Aug 2015 20:06:48 +0600 Subject: [PATCH 1539/2721] [lynda] Make login more robust --- youtube_dl/extractor/lynda.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 39214de2f..deead220a 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -17,7 +17,6 @@ from ..utils import ( class LyndaBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' - _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn\s*:\s*true' _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' _NETRC_MACHINE = 'lynda' @@ -41,7 +40,7 @@ class LyndaBaseIE(InfoExtractor): request, None, 'Logging in as %s' % username) # Not (yet) logged in - m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page) + m = re.search(r'loginResultJson\s*=\s*\'(?P<json>[^\']+)\';', login_page) if m is not None: response = m.group('json') response_json = json.loads(response) @@ -70,7 +69,7 @@ class LyndaBaseIE(InfoExtractor): request, None, 'Confirming log in and log out from another device') - if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: + if all(not re.search(p, login_page) for p in ('isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): raise ExtractorError('Unable to log in') From 354b4b8604ec13ccf4bd89b9d1b77cb7246fe379 Mon Sep 17 00:00:00 2001 From: vijayanand nandam <vijay@cybrilla.com> Date: Wed, 5 Aug 2015 19:37:59 +0530 Subject: [PATCH 1540/2721] fixing xhamster file extraction --- youtube_dl/extractor/xhamster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index b4ad513a0..9d025530f 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -47,7 +47,7 @@ class XHamsterIE(InfoExtractor): def _real_extract(self, url): def extract_video_url(webpage): - mp4 = re.search(r'<video\s+.*?file="([^"]+)".*?>', webpage) + mp4 = re.search(r'file:\s+\'([^\']+)\'', webpage) if mp4 is None: raise ExtractorError('Unable to extract media URL') else: From be7a8379b47c35afe66abcc02aee597e5143b1d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 5 Aug 2015 20:32:44 +0600 Subject: [PATCH 1541/2721] [xhamster] Make more robust --- youtube_dl/extractor/xhamster.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 9d025530f..481d79b89 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -46,12 +46,12 @@ class XHamsterIE(InfoExtractor): ] def _real_extract(self, url): - def extract_video_url(webpage): - mp4 = re.search(r'file:\s+\'([^\']+)\'', webpage) - if mp4 is None: - raise ExtractorError('Unable to extract media URL') - else: - return mp4.group(1) + def extract_video_url(webpage, name): + return self._search_regex( + [r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''', + r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''', + r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''], + webpage, name, group='mp4') def is_hd(webpage): return '<div class=\'icon iconHD\'' in webpage @@ -97,7 +97,9 @@ class XHamsterIE(InfoExtractor): hd = is_hd(webpage) - video_url = extract_video_url(webpage) + format_id = 'hd' if hd else 'sd' + + video_url = extract_video_url(webpage, format_id) formats = [{ 'url': video_url, 'format_id': 'hd' if hd else 'sd', @@ -108,7 +110,7 @@ class XHamsterIE(InfoExtractor): mrss_url = self._search_regex(r'<link rel="canonical" href="([^"]+)', webpage, 'mrss_url') webpage = self._download_webpage(mrss_url + '?hd', video_id, note='Downloading HD webpage') if is_hd(webpage): - video_url = extract_video_url(webpage) + video_url = extract_video_url(webpage, 'hd') formats.append({ 'url': video_url, 'format_id': 'hd', From 251a44b776264c17d7799e017b856143c6cacd9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 5 Aug 2015 20:36:37 +0600 Subject: [PATCH 1542/2721] [xhamster] Fix thumbnail extraction --- youtube_dl/extractor/xhamster.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 481d79b89..b57e7c813 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -78,7 +78,10 @@ class XHamsterIE(InfoExtractor): uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)', webpage, 'uploader id', default='anonymous') - thumbnail = self._html_search_regex(r'<video\s+.*?poster="([^"]+)".*?>', webpage, 'thumbnail', fatal=False) + thumbnail = self._search_regex( + [r'''thumb\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''', + r'''<video[^>]+poster=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''], + webpage, 'thumbnail', fatal=False, group='thumbnail') duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>', webpage, 'duration', fatal=False)) From 3e4852247744b131600ba43275ab321eb1b32bb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 5 Aug 2015 20:41:40 +0600 Subject: [PATCH 1543/2721] [xhamster] Fix uploader extraction --- youtube_dl/extractor/xhamster.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index b57e7c813..06fedf840 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -22,7 +22,7 @@ class XHamsterIE(InfoExtractor): 'ext': 'mp4', 'title': 'FemaleAgent Shy beauty takes the bait', 'upload_date': '20121014', - 'uploader_id': 'Ruseful2011', + 'uploader': 'Ruseful2011', 'duration': 893, 'age_limit': 18, } @@ -34,7 +34,7 @@ class XHamsterIE(InfoExtractor): 'ext': 'mp4', 'title': 'Britney Spears Sexy Booty', 'upload_date': '20130914', - 'uploader_id': 'jojo747400', + 'uploader': 'jojo747400', 'duration': 200, 'age_limit': 18, } @@ -75,8 +75,9 @@ class XHamsterIE(InfoExtractor): if upload_date: upload_date = unified_strdate(upload_date) - uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)', - webpage, 'uploader id', default='anonymous') + uploader = self._html_search_regex( + r"<a href='[^']+xhamster\.com/user/[^>]+>(?P<uploader>[^<]+)", + webpage, 'uploader', default='anonymous') thumbnail = self._search_regex( [r'''thumb\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''', @@ -127,7 +128,7 @@ class XHamsterIE(InfoExtractor): 'title': title, 'description': description, 'upload_date': upload_date, - 'uploader_id': uploader_id, + 'uploader': uploader, 'thumbnail': thumbnail, 'duration': duration, 'view_count': view_count, From 54a9328b205e8a2c916d59fd81bdb1ede25cf87a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 5 Aug 2015 21:19:52 +0600 Subject: [PATCH 1544/2721] [generic] Expand jwplayer support --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8cef61c3c..6df89f814 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1655,7 +1655,7 @@ class GenericIE(InfoExtractor): if not found: # Broaden the findall a little bit: JWPlayer JS loader found = filter_video(re.findall( - r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)) + r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)) if not found: # Flow player found = filter_video(re.findall(r'''(?xs) From c71a3195afa8c2a9ed5fe0ffa56ff6c969147d91 Mon Sep 17 00:00:00 2001 From: Delon <liuxi326@qq.com> Date: Wed, 5 Aug 2015 18:22:25 +0800 Subject: [PATCH 1545/2721] [tudou] Fix extracion --- youtube_dl/extractor/tudou.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index c89de5ba4..9b934cb57 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -29,6 +29,8 @@ class TudouIE(InfoExtractor): } }] + _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' + def _url_for_id(self, id, quality=None): info_url = "http://v2.tudou.com/f?id=" + str(id) if quality: @@ -76,6 +78,9 @@ class TudouIE(InfoExtractor): 'ext': ext, 'title': title, 'thumbnail': thumbnail_url, + 'http_headers': { + 'Referer': self._PLAYER_URL, + }, } result.append(part_info) From 238755752f4f9169a1edda91067c8627afe19cce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 5 Aug 2015 23:07:52 +0600 Subject: [PATCH 1546/2721] [tudou] Extract player URL from the webpage --- youtube_dl/extractor/tudou.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 9b934cb57..84fe71aef 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -30,7 +30,7 @@ class TudouIE(InfoExtractor): }] _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' - + def _url_for_id(self, id, quality=None): info_url = "http://v2.tudou.com/f?id=" + str(id) if quality: @@ -56,6 +56,10 @@ class TudouIE(InfoExtractor): thumbnail_url = self._search_regex( r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False) + player_url = self._search_regex( + r"playerUrl\s*:\s*['\"](.+?\.swf)[\"']", + webpage, 'player URL', default=self._PLAYER_URL) + segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments') segments = json.loads(segs_json) # It looks like the keys are the arguments that have to be passed as @@ -79,7 +83,7 @@ class TudouIE(InfoExtractor): 'title': title, 'thumbnail': thumbnail_url, 'http_headers': { - 'Referer': self._PLAYER_URL, + 'Referer': player_url, }, } result.append(part_info) From f535ec8278c8f465b47919d3f451571ae8ccfc7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 5 Aug 2015 23:08:26 +0600 Subject: [PATCH 1547/2721] [xhamster] Remove unused import --- youtube_dl/extractor/xhamster.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 06fedf840..f76ee8fd4 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -4,7 +4,6 @@ import re from .common import InfoExtractor from ..utils import ( - ExtractorError, unified_strdate, str_to_int, int_or_none, From c73cdd800f0dc7b465ac0b36d338875bb80c23aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 5 Aug 2015 23:08:55 +0600 Subject: [PATCH 1548/2721] [xhamster] flake8 --- youtube_dl/extractor/xhamster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index f76ee8fd4..97315750f 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -81,7 +81,7 @@ class XHamsterIE(InfoExtractor): thumbnail = self._search_regex( [r'''thumb\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''', r'''<video[^>]+poster=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''], - webpage, 'thumbnail', fatal=False, group='thumbnail') + webpage, 'thumbnail', fatal=False, group='thumbnail') duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>', webpage, 'duration', fatal=False)) From 51f267d9d4d26c3cd67f318a2040513946f2b4d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 6 Aug 2015 22:01:01 +0600 Subject: [PATCH 1549/2721] [YoutubeDL:utils] Move percent encode non-ASCII URLs workaround to http_request and simplify (Closes #6457) --- youtube_dl/YoutubeDL.py | 21 --------------------- youtube_dl/utils.py | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1446b3254..079d42ce8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1860,27 +1860,6 @@ class YoutubeDL(object): def urlopen(self, req): """ Start an HTTP download """ - - # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not - # always respected by websites, some tend to give out URLs with non percent-encoded - # non-ASCII characters (see telemb.py, ard.py [#3412]) - # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) - # To work around aforementioned issue we will replace request's original URL with - # percent-encoded one - req_is_string = isinstance(req, compat_basestring) - url = req if req_is_string else req.get_full_url() - url_escaped = escape_url(url) - - # Substitute URL if any change after escaping - if url != url_escaped: - if req_is_string: - req = url_escaped - else: - req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request - req = req_type( - url_escaped, data=req.data, headers=req.headers, - origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) - return self._opener.open(req, timeout=self._socket_timeout) def print_debug_header(self): diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 78dc2b449..c7db75f80 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -651,6 +651,26 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): return ret def http_request(self, req): + # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not + # always respected by websites, some tend to give out URLs with non percent-encoded + # non-ASCII characters (see telemb.py, ard.py [#3412]) + # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) + # To work around aforementioned issue we will replace request's original URL with + # percent-encoded one + # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) + # the code of this workaround has been moved here from YoutubeDL.urlopen() + url = req.get_full_url() + url_escaped = escape_url(url) + + # Substitute URL if any change after escaping + if url != url_escaped: + req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request + new_req = req_type( + url_escaped, data=req.data, headers=req.headers, + origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) + new_req.timeout = req.timeout + req = new_req + for h, v in std_headers.items(): # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275 # The dict keys are capitalized because of this bug by urllib From bd690a9f9368095f561184778fb2f3ef12c66342 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 6 Aug 2015 22:01:31 +0600 Subject: [PATCH 1550/2721] [southpark:de] Add test for non-ASCII in URLs --- youtube_dl/extractor/southpark.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 7fb165a87..87b650468 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -45,6 +45,14 @@ class SouthParkDeIE(SouthParkIE): 'title': 'The Government Won\'t Respect My Privacy', 'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.', }, + }, { + # non-ASCII characters in initial URL + 'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen', + 'playlist_count': 4, + }, { + # non-ASCII characters in redirect URL + 'url': 'http://www.southpark.de/alle-episoden/s18e09', + 'playlist_count': 4, }] From 4f34cdb0a87a506d25a352ff265678c86cb9b979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 6 Aug 2015 23:56:44 +0600 Subject: [PATCH 1551/2721] [southpark:de] Skip test --- youtube_dl/extractor/southpark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index 87b650468..ad63a8785 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -53,6 +53,7 @@ class SouthParkDeIE(SouthParkIE): # non-ASCII characters in redirect URL 'url': 'http://www.southpark.de/alle-episoden/s18e09', 'playlist_count': 4, + 'skip': 'Broken python 3', }] From 671302b5c0ff8cefa5f26e599423ef7799b19631 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 7 Aug 2015 00:08:11 +0600 Subject: [PATCH 1552/2721] [YoutubeDL] Remove unused imports --- youtube_dl/YoutubeDL.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 079d42ce8..cad6b026e 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -28,7 +28,6 @@ if os.name == 'nt': import ctypes from .compat import ( - compat_basestring, compat_cookiejar, compat_expanduser, compat_get_terminal_size, @@ -40,7 +39,6 @@ from .compat import ( compat_urllib_request, ) from .utils import ( - escape_url, ContentTooShortError, date_from_str, DateRange, @@ -51,7 +49,6 @@ from .utils import ( ExtractorError, format_bytes, formatSeconds, - HEADRequest, locked_file, make_HTTPS_handler, MaxDownloadsReached, From cd6b555e19c601d575679dd29da0080eda7f8890 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 6 Aug 2015 19:17:50 +0100 Subject: [PATCH 1553/2721] [dcn] add origin to api request and fix the test and check with flake8 --- youtube_dl/extractor/dcn.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index f76ebda9e..d44e8cef0 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -1,4 +1,9 @@ +# coding: utf-8 +from __future__ import unicode_literals + from .common import InfoExtractor +from ..compat import compat_urllib_request + class DcnIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P<id>\d+)/?' @@ -9,24 +14,29 @@ class DcnIE(InfoExtractor): 'id': '17375', 'ext': 'm3u8', 'title': 'رحلة العمر : الحلقة 1', - 'description': '"في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة1"', + 'description': 'في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة\n1', 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', 'duration': '2041' - } + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, } def _real_extract(self, url): video_id = self._match_id(url) - json_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/video?id='+video_id, - video_id + request = compat_urllib_request.Request( + 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=' + video_id, + headers={'Origin': 'http://www.dcndigital.ae'} ) - title = json_data['title_ar']; - thumbnail = 'http://admin.mangomolo.com/analytics/'+json_data['img']; - duration = json_data['duration']; - description = json_data['description_ar']; + json_data = self._download_json(request, video_id) + title = json_data['title_ar'] + thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data['img'] + duration = json_data['duration'] + description = json_data['description_ar'] webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id='+json_data['id']+'&user_id='+json_data['user_id']+'&countries=Q0M=&w=100%&h=100%&filter=DENY&signature='+json_data['signature'], + 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id=' + json_data['id'] + '&user_id=' + json_data['user_id'] + '&countries=Q0M=&w=100%&h=100%&filter=DENY&signature=' + json_data['signature'], video_id ) m3u8_url = self._html_search_regex( From 3be3c622dc1d3d7b92c5268a079d202a9f2b0a5a Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 6 Aug 2015 19:37:45 +0100 Subject: [PATCH 1554/2721] [shahid] generic errors handling and check with flake8 --- youtube_dl/extractor/shahid.py | 42 ++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index b3b45da24..57c159833 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -1,3 +1,6 @@ +# coding: utf-8 +from __future__ import unicode_literals + from .common import InfoExtractor from ..utils import ( js_to_json, @@ -5,6 +8,7 @@ from ..utils import ( int_or_none ) + class ShahidIE(InfoExtractor): _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?' _TESTS = [ @@ -23,7 +27,7 @@ class ShahidIE(InfoExtractor): } }, { - #shahid plus subscriber only + # shahid plus subscriber only 'url': 'https://shahid.mbc.net/ar/series/90497/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011.html', 'only_matching': True } @@ -32,31 +36,15 @@ class ShahidIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + player_info = '' - for line in self._search_regex( 'var flashvars = ({[^}]+})', webpage, 'flashvars').splitlines(): + for line in self._search_regex('var flashvars = ({[^}]+})', webpage, 'flashvars').splitlines(): if '+' not in line and '(' not in line and ')' not in line: player_info += line player_info = self._parse_json(js_to_json(player_info), video_id) video_id = player_info['id'] player_type = player_info['playerType'] - video_info = self._download_json( - player_info['url'] + '/' + player_type + '/' + video_id + - '?apiKey=sh%40hid0nlin3&hash=b2wMCTHpSmyxGqQjJFOycRmLSex%2BBpTK%2Fooxy6vHaqs%3D', - video_id - )['data'] - if video_info['error']: - for error in video_info['error']: - raise ExtractorError(error) - video_info = video_info[player_type] - if video_info.get('availabilities').get('plus'): - raise ExtractorError('plus members only') - title = video_info['title'] - thumbnail = video_info.get('thumbnailUrl') - categories = [category['name'] for category in video_info.get('genres')] - description = video_info.get('description') - duration = int_or_none(video_info.get('duration')) - player_json_data = self._download_json( 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-' + video_id + '.type-' + player_info['type'] + '.html', video_id @@ -66,8 +54,22 @@ class ShahidIE(InfoExtractor): else: for error in player_json_data['error'].values(): raise ExtractorError(error) - return formats = self._extract_m3u8_formats(m3u8_url, video_id) + + video_info = self._download_json( + player_info['url'] + '/' + player_type + '/' + video_id + '?apiKey=sh%40hid0nlin3&hash=b2wMCTHpSmyxGqQjJFOycRmLSex%2BBpTK%2Fooxy6vHaqs%3D', + video_id + )['data'] + if video_info.get('error'): + for error in video_info['error']: + raise ExtractorError(error) + video_info = video_info[player_type] + title = video_info['title'] + thumbnail = video_info.get('thumbnailUrl') + categories = [category['name'] for category in video_info.get('genres')] + description = video_info.get('description') + duration = int_or_none(video_info.get('duration')) + return { 'id': video_id, 'title': title, From 5a4d9ddb218e761fe7ab15d197690e0cb132a536 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 7 Aug 2015 01:26:40 +0600 Subject: [PATCH 1555/2721] [utils] Percent-encode redirect URL of Location header (Closes #6457) --- youtube_dl/utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c7db75f80..e265c7574 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -715,6 +715,17 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): gz = io.BytesIO(self.deflate(resp.read())) resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg + # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 + if 300 <= resp.code < 400: + location = resp.headers.get('Location') + if location: + # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 + if sys.version_info >= (3, 0): + location = location.encode('iso-8859-1').decode('utf-8') + location_escaped = escape_url(location) + if location != location_escaped: + del resp.headers['Location'] + resp.headers['Location'] = location_escaped return resp https_request = http_request From 9663bd3abb78911bddad75742bd41006677d628e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 7 Aug 2015 01:27:07 +0600 Subject: [PATCH 1556/2721] [southpark:de] Enable non-ASCII redirect URL test --- youtube_dl/extractor/southpark.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py index ad63a8785..87b650468 100644 --- a/youtube_dl/extractor/southpark.py +++ b/youtube_dl/extractor/southpark.py @@ -53,7 +53,6 @@ class SouthParkDeIE(SouthParkIE): # non-ASCII characters in redirect URL 'url': 'http://www.southpark.de/alle-episoden/s18e09', 'playlist_count': 4, - 'skip': 'Broken python 3', }] From 3eb5fdb58112032a9831eda1d2e3b8a151ea217f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 6 Aug 2015 22:55:43 +0200 Subject: [PATCH 1557/2721] release 2015.08.06 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index fa157cadb..b81d5e658 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.07.28' +__version__ = '2015.08.06' From 430b092a5f59fbe407b92ebcb0c42b9f7062a334 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 6 Aug 2015 23:06:21 +0200 Subject: [PATCH 1558/2721] release 2015.08.06.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b81d5e658..9f209499c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.08.06' +__version__ = '2015.08.06.1' From 6d30cf04db9c9662dbb30c2490e24eb5c6dca4c3 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 7 Aug 2015 10:01:18 +0100 Subject: [PATCH 1559/2721] [dcn] fix type and key errors --- youtube_dl/extractor/dcn.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index d44e8cef0..22ff35b56 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_urllib_request +from ..utils import int_or_none class DcnIE(InfoExtractor): @@ -16,7 +17,7 @@ class DcnIE(InfoExtractor): 'title': 'رحلة العمر : الحلقة 1', 'description': 'في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة\n1', 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', - 'duration': '2041' + 'duration': 2041 }, 'params': { # m3u8 download @@ -32,9 +33,9 @@ class DcnIE(InfoExtractor): ) json_data = self._download_json(request, video_id) title = json_data['title_ar'] - thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data['img'] - duration = json_data['duration'] - description = json_data['description_ar'] + thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data.get('img') + duration = int_or_none(json_data.get('duration')) + description = json_data.get('description_ar') webpage = self._download_webpage( 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id=' + json_data['id'] + '&user_id=' + json_data['user_id'] + '&countries=Q0M=&w=100%&h=100%&filter=DENY&signature=' + json_data['signature'], video_id From 8002ac9e0a88d918735c06599dbf8f2005f79666 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 7 Aug 2015 19:04:44 +0600 Subject: [PATCH 1560/2721] [nowtv] Add support for .at TLD --- youtube_dl/extractor/nowtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index ad938fb62..78e8851c0 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -14,7 +14,7 @@ from ..utils import ( class NowTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nowtv\.de/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)' + _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)' _TESTS = [{ # rtl From acc1adbe7ab93657cd4d303cee1fba4464931a50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 7 Aug 2015 19:50:54 +0600 Subject: [PATCH 1561/2721] [nowtv] Add support for .ch TLD --- youtube_dl/extractor/nowtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index 78e8851c0..fc21d8e3f 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -14,7 +14,7 @@ from ..utils import ( class NowTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)' + _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<id>.+?)/(?:player|preview)' _TESTS = [{ # rtl From 0f422256d6eea5aff062a4c35d7434cd118c7a0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 7 Aug 2015 19:51:09 +0600 Subject: [PATCH 1562/2721] [nowtv] Add .at test --- youtube_dl/extractor/nowtv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index fc21d8e3f..66c627bec 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -127,6 +127,9 @@ class NowTVIE(InfoExtractor): }, { 'url': 'http://www.nowtv.de/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview', 'only_matching': True, + }, { + 'url': 'http://www.nowtv.at/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit/preview?return=/rtl/bauer-sucht-frau/die-neuen-bauern-und-eine-hochzeit', + 'only_matching': True, }] def _real_extract(self, url): From f94639fadf91312bf3365802981f506ecba698dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 00:06:03 +0600 Subject: [PATCH 1563/2721] [dcn] Improve --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/dcn.py | 78 ++++++++++++++++++++++---------- 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index eb8ef1fe3..922d9b3d8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -118,7 +118,7 @@ from .dailymotion import ( ) from .daum import DaumIE from .dbtv import DBTVIE -from .dcn import DcnIE +from .dcn import DCNIE from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE from .dfb import DFBIE diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 22ff35b56..b98a6c032 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -2,22 +2,30 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_request -from ..utils import int_or_none +from ..compat import ( + compat_urllib_parse, + compat_urllib_request, +) +from ..utils import ( + int_or_none, + parse_iso8601, +) -class DcnIE(InfoExtractor): +class DCNIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P<id>\d+)/?' _TEST = { 'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887', 'info_dict': { 'id': '17375', - 'ext': 'm3u8', + 'ext': 'mp4', 'title': 'رحلة العمر : الحلقة 1', - 'description': 'في هذه الحلقة من برنامج رحلة العمر يقدّم الدكتور عمر عبد الكافي تبسيطاً لمناسك الحج والعمرة ويجيب مباشرة على استفسارات حجاج بيت الله الحرام بخصوص مناسك الحج والعمرة\n1', - 'thumbnail': 'http://admin.mangomolo.com/analytics/uploads/71/images/media/2/2cefc09d7bec80afa754682f40e49503.jpg', - 'duration': 2041 + 'description': 'md5:0156e935d870acb8ef0a66d24070c6d6', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 2041, + 'timestamp': 1227504126, + 'upload_date': '20081124', }, 'params': { # m3u8 download @@ -27,30 +35,50 @@ class DcnIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + request = compat_urllib_request.Request( - 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=' + video_id, - headers={'Origin': 'http://www.dcndigital.ae'} - ) - json_data = self._download_json(request, video_id) - title = json_data['title_ar'] - thumbnail = 'http://admin.mangomolo.com/analytics/' + json_data.get('img') - duration = int_or_none(json_data.get('duration')) - description = json_data.get('description_ar') + 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, + headers={'Origin': 'http://www.dcndigital.ae'}) + + video = self._download_json(request, video_id) + title = video.get('title_en') or video['title_ar'] + webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?id=' + json_data['id'] + '&user_id=' + json_data['user_id'] + '&countries=Q0M=&w=100%&h=100%&filter=DENY&signature=' + json_data['signature'], - video_id - ) - m3u8_url = self._html_search_regex( - r'file:\s*"([^"]+)', - webpage, - 'm3u8_url' - ) - formats = self._extract_m3u8_formats(m3u8_url, video_id) + 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + + compat_urllib_parse.urlencode({ + 'id': video['id'], + 'user_id': video['user_id'], + 'signature': video['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }), video_id) + + m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url') + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') + + rtsp_url = self._search_regex( + r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) + if rtsp_url: + formats.append({ + 'url': rtsp_url, + 'format_id': 'rtsp', + }) + + self._sort_formats(formats) + + img = video.get('img') + thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None + duration = int_or_none(video.get('duration')) + description = video.get('description_en') or video.get('description_ar') + timestamp = parse_iso8601(video.get('create_time') or video.get('update_time'), ' ') + return { 'id': video_id, 'title': title, + 'description': description, 'thumbnail': thumbnail, 'duration': duration, - 'description': description, + 'timestamp': timestamp, 'formats': formats, } From 4a7434d0b09e14b773c2d278c8299efa6225b84e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 00:19:40 +0600 Subject: [PATCH 1564/2721] [dcn] Simplify _VALID_URL --- youtube_dl/extractor/dcn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index b98a6c032..82261e25c 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -13,7 +13,7 @@ from ..utils import ( class DCNIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P<id>\d+)/?' + _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/.+|show/\d+/.+?)/(?P<id>\d+)' _TEST = { 'url': 'http://www.dcndigital.ae/#/show/199074/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375/6887', 'info_dict': From fd5d8270dcd6d8baada3390a4a1cae5bdbcb6da4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 01:10:41 +0600 Subject: [PATCH 1565/2721] [clipfish] Fix extraction, minimize requests, get rid of drm hds, extract m3u8 and more metadata --- youtube_dl/extractor/clipfish.py | 56 ++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py index 09dfaac60..7af903571 100644 --- a/youtube_dl/extractor/clipfish.py +++ b/youtube_dl/extractor/clipfish.py @@ -1,18 +1,19 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( - ExtractorError, + determine_ext, int_or_none, js_to_json, - determine_ext, + parse_iso8601, + remove_end, ) class ClipfishIE(InfoExtractor): - IE_NAME = 'clipfish' - - _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/' + _VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/', 'md5': '79bc922f3e8a9097b3d68a93780fd475', @@ -20,35 +21,48 @@ class ClipfishIE(InfoExtractor): 'id': '3966754', 'ext': 'mp4', 'title': 'FIFA 14 - E3 2013 Trailer', + 'timestamp': 1370938118, + 'upload_date': '20130611', 'duration': 82, } } def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_info = self._parse_json( - js_to_json(self._html_search_regex('var videoObject = ({[^}]+?})', webpage, 'videoObject')), - video_id - ) - info_url = self._parse_json( - js_to_json(self._html_search_regex('var globalFlashvars = ({[^}]+?})', webpage, 'globalFlashvars')), - video_id - )['data'] - doc = self._download_xml( - info_url, video_id, note='Downloading info page') - title = doc.find('title').text - video_url = doc.find('filename').text - thumbnail = doc.find('imageurl').text - duration = int_or_none(video_info['length']) - formats = [{'url': video_info['videourl']},{'url': video_url}] + webpage = self._download_webpage(url, video_id) + + video_info = self._parse_json( + js_to_json(self._html_search_regex( + '(?s)videoObject\s*=\s*({.+?});', webpage, 'video object')), + video_id) + + formats = [] + for video_url in re.findall(r'var\s+videourl\s*=\s*"([^"]+)"', webpage): + ext = determine_ext(video_url) + if ext == 'm3u8': + formats.append({ + 'url': video_url.replace('de.hls.fra.clipfish.de', 'hls.fra.clipfish.de'), + 'ext': 'mp4', + 'format_id': 'hls', + }) + else: + formats.append({ + 'url': video_url, + 'format_id': ext, + }) self._sort_formats(formats) + title = remove_end(self._og_search_title(webpage), ' - Video') + thumbnail = self._og_search_thumbnail(webpage) + duration = int_or_none(video_info.get('length')) + timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage, 'upload date')) + return { 'id': video_id, 'title': title, 'formats': formats, 'thumbnail': thumbnail, 'duration': duration, + 'timestamp': timestamp, } From 8a37aa1517ccc474b3e2831b77e48534cb8ed47c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 01:55:59 +0600 Subject: [PATCH 1566/2721] [extractor/generic] Expand ooyala regex (Closes #6485) --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 6df89f814..649c0bce6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1320,7 +1320,7 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url')) # Look for Ooyala videos - mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or + mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage)) From bf94d763ba73e09fd77d25110c7219254b63c786 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 02:00:49 +0600 Subject: [PATCH 1567/2721] [extractor/generic] Add test for #6485 --- youtube_dl/extractor/generic.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 649c0bce6..469909a51 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -236,6 +236,19 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Ooyala'], }, + { + # ooyala video embedded with http://player.ooyala.com/iframe.js + 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/', + 'info_dict': { + 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB', + 'ext': 'mp4', + 'title': '"Steve Jobs: Man in the Machine" trailer', + 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."', + }, + 'params': { + 'skip_download': True, + }, + }, # multiple ooyala embeds on SBN network websites { 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', From c29458f3ec77072e9c17169b78871bf4473134d6 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 7 Aug 2015 21:38:50 +0100 Subject: [PATCH 1568/2721] [shahid] change the tests --- youtube_dl/extractor/shahid.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index 57c159833..b2050525e 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -13,13 +13,13 @@ class ShahidIE(InfoExtractor): _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?' _TESTS = [ { - 'url': 'https://shahid.mbc.net/ar/episode/108084/%D8%AE%D9%88%D8%A7%D8%B7%D8%B1-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-11-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', + 'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html', 'info_dict': { - 'id': '108084', + 'id': '90574', 'ext': 'm3u8', - 'title': 'خواطر الموسم 11 الحلقة 1', - 'description': 'بسم الله', - 'duration': 1166, + 'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3', + 'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان', + 'duration': 2972, }, 'params': { # m3u8 download @@ -28,7 +28,7 @@ class ShahidIE(InfoExtractor): }, { # shahid plus subscriber only - 'url': 'https://shahid.mbc.net/ar/series/90497/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011.html', + 'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', 'only_matching': True } ] From e0ac521438218e978b9c4bbcd92cfc2d5fef79cb Mon Sep 17 00:00:00 2001 From: vijayanand nandam <vijay@cybrilla.com> Date: Thu, 6 Aug 2015 22:42:58 +0530 Subject: [PATCH 1569/2721] adding support for axel download manager --- youtube_dl/downloader/external.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 1d5cc9904..30699934b 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -83,6 +83,16 @@ class CurlFD(ExternalFD): return cmd +class AxelFD(ExternalFD): + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-o', tmpfilename] + for key, val in info_dict['http_headers'].items(): + cmd += ['-H', '%s: %s' % (key, val)] + cmd += self._configuration_args() + cmd += ['--', info_dict['url']] + return cmd + + class WgetFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] From 5b0c40da24b5ddb789428de731e02ac8759a363c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 03:36:29 +0600 Subject: [PATCH 1570/2721] [extractor/common] Expand meta regex --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index dc5080504..507ea5ec0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -636,7 +636,7 @@ class InfoExtractor(object): @staticmethod def _meta_regex(prop): return r'''(?isx)<meta - (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) + (?=[^>]+(?:itemprop|name|property|id)=(["\']?)%s\1) [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop) def _og_search_property(self, prop, html, name=None, **kargs): From 3550821fb4ca2f0e47542a7fa16b6543b06df724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 03:38:55 +0600 Subject: [PATCH 1571/2721] [periscope] Add extractor (Closes #5850, closes #6459) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/periscope.py | 66 +++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 youtube_dl/extractor/periscope.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 922d9b3d8..bd86a5be2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -432,6 +432,7 @@ from .orf import ( from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE +from .periscope import PeriscopeIE from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py new file mode 100644 index 000000000..5219e1a75 --- /dev/null +++ b/youtube_dl/extractor/periscope.py @@ -0,0 +1,66 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + unescapeHTML, +) + + +class PeriscopeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)' + _TEST = { + 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', + 'md5': '65b57957972e503fcbbaeed8f4fa04ca', + 'info_dict': { + 'id': '56102209', + 'ext': 'mp4', + 'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗', + 'timestamp': 1438978559, + 'upload_date': '20150807', + 'uploader': 'Bec Boop', + 'uploader_id': '1465763', + }, + 'skip': 'Expires in 24 hours', + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + replay = self._download_json( + 'https://api.periscope.tv/api/v2/getAccessPublic?token=%s' % video_id, video_id) + + video_url = replay['replay_url'] + + webpage = self._download_webpage(url, video_id) + + broadcast_data = self._parse_json( + unescapeHTML(self._html_search_meta( + 'broadcast-data', webpage, 'broadcast data', fatal=True)), + video_id) + + broadcast = broadcast_data['broadcast'] + status = broadcast['status'] + + uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name') + uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id') + + title = '%s - %s' % (uploader, status) if uploader else status + timestamp = parse_iso8601(broadcast.get('created_at')) + + thumbnails = [{ + 'url': broadcast[image], + } for image in ('image_url', 'image_url_small') if broadcast.get(image)] + + return { + 'id': broadcast.get('id') or video_id, + 'url': video_url, + 'ext': 'mp4', + 'protocol': 'm3u8_native', + 'title': title, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'thumbnails': thumbnails, + } From 621d6a9516e0f9cd8c45e12904f4d4b7615e7fb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 04:00:52 +0600 Subject: [PATCH 1572/2721] [periscope] Switch to API for broadcast data --- youtube_dl/extractor/periscope.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 5219e1a75..11648a511 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -25,21 +25,17 @@ class PeriscopeIE(InfoExtractor): 'skip': 'Expires in 24 hours', } + def _call_api(self, method, token): + return self._download_json( + 'https://api.periscope.tv/api/v2/%s?token=%s' % (method, token), token) + def _real_extract(self, url): - video_id = self._match_id(url) - - replay = self._download_json( - 'https://api.periscope.tv/api/v2/getAccessPublic?token=%s' % video_id, video_id) + token = self._match_id(url) + replay = self._call_api('getAccessPublic', token) video_url = replay['replay_url'] - webpage = self._download_webpage(url, video_id) - - broadcast_data = self._parse_json( - unescapeHTML(self._html_search_meta( - 'broadcast-data', webpage, 'broadcast data', fatal=True)), - video_id) - + broadcast_data = self._call_api('getBroadcastPublic', token) broadcast = broadcast_data['broadcast'] status = broadcast['status'] @@ -54,7 +50,7 @@ class PeriscopeIE(InfoExtractor): } for image in ('image_url', 'image_url_small') if broadcast.get(image)] return { - 'id': broadcast.get('id') or video_id, + 'id': broadcast.get('id') or token, 'url': video_url, 'ext': 'mp4', 'protocol': 'm3u8_native', From 1e83741c9a5d67e8bbe65510d41b558361496fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 05:33:53 +0600 Subject: [PATCH 1573/2721] [periscope] Add support for running streams --- youtube_dl/extractor/periscope.py | 34 +++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 11648a511..de53b752d 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -2,13 +2,15 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - parse_iso8601, - unescapeHTML, +from ..compat import ( + compat_urllib_parse, + compat_urllib_request, ) +from ..utils import parse_iso8601 class PeriscopeIE(InfoExtractor): + IE_DESC = 'Periscope' _VALID_URL = r'https?://(?:www\.)?periscope\.tv/w/(?P<id>[^/?#]+)' _TEST = { 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', @@ -32,9 +34,6 @@ class PeriscopeIE(InfoExtractor): def _real_extract(self, url): token = self._match_id(url) - replay = self._call_api('getAccessPublic', token) - video_url = replay['replay_url'] - broadcast_data = self._call_api('getBroadcastPublic', token) broadcast = broadcast_data['broadcast'] status = broadcast['status'] @@ -43,20 +42,37 @@ class PeriscopeIE(InfoExtractor): uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id') title = '%s - %s' % (uploader, status) if uploader else status + state = broadcast.get('state').lower() + if state == 'running': + title = self._live_title(title) timestamp = parse_iso8601(broadcast.get('created_at')) thumbnails = [{ 'url': broadcast[image], } for image in ('image_url', 'image_url_small') if broadcast.get(image)] + stream = self._call_api('getAccessPublic', token) + + formats = [] + for format_id in ('replay', 'rtmp', 'hls', 'https_hls'): + video_url = stream.get(format_id + '_url') + if not video_url: + continue + f = { + 'url': video_url, + 'ext': 'flv' if format_id == 'rtmp' else 'mp4', + } + if format_id != 'rtmp': + f['protocol'] = 'm3u8_native' if state == 'ended' else 'm3u8' + formats.append(f) + self._sort_formats(formats) + return { 'id': broadcast.get('id') or token, - 'url': video_url, - 'ext': 'mp4', - 'protocol': 'm3u8_native', 'title': title, 'timestamp': timestamp, 'uploader': uploader, 'uploader_id': uploader_id, 'thumbnails': thumbnails, + 'formats': formats, } From 428e4e4a850df81031e8267dddf759da605639e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 05:37:38 +0600 Subject: [PATCH 1574/2721] [quickscope] Add extractor --- youtube_dl/extractor/__init__.py | 5 ++++- youtube_dl/extractor/periscope.py | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bd86a5be2..e38e77a27 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -432,7 +432,10 @@ from .orf import ( from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE -from .periscope import PeriscopeIE +from .periscope import ( + PeriscopeIE, + QuickscopeIE, +) from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index de53b752d..578b53a24 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -76,3 +76,24 @@ class PeriscopeIE(InfoExtractor): 'thumbnails': thumbnails, 'formats': formats, } + + +class QuickscopeIE(InfoExtractor): + IE_DESC = 'Quisck Scope' + _VALID_URL = r'https?://watchonperiscope\.com/broadcast/(?P<id>\d+)' + _TEST = { + 'url': 'https://watchonperiscope.com/broadcast/56180087', + 'only_matching': True, + } + + def _real_extract(self, url): + broadcast_id = self._match_id(url) + request = compat_urllib_request.Request( + 'https://watchonperiscope.com/api/accessChannel', compat_urllib_parse.urlencode({ + 'broadcast_id': broadcast_id, + 'entry_ticket': '', + 'from_push': 'false', + 'uses_sessions': 'true', + }).encode('utf-8')) + return self.url_result( + self._download_json(request, broadcast_id)['share_url'], 'Periscope') From b2f82948ee5eadc483c01dc589b82426bb32ba68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 05:40:41 +0600 Subject: [PATCH 1575/2721] [quickscope] Fix typo --- youtube_dl/extractor/periscope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 578b53a24..8ad936758 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -79,7 +79,7 @@ class PeriscopeIE(InfoExtractor): class QuickscopeIE(InfoExtractor): - IE_DESC = 'Quisck Scope' + IE_DESC = 'Quick Scope' _VALID_URL = r'https?://watchonperiscope\.com/broadcast/(?P<id>\d+)' _TEST = { 'url': 'https://watchonperiscope.com/broadcast/56180087', From 59e89e62d7b45554cef502dc4986f35618110679 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Sat, 8 Aug 2015 12:59:10 +0100 Subject: [PATCH 1576/2721] [shahid] add default fallbacks for extracting api vars --- youtube_dl/extractor/shahid.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index b2050525e..399140189 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -33,20 +33,30 @@ class ShahidIE(InfoExtractor): } ] + _api_vars = { + 'type': 'player', + 'url': 'http://api.shahid.net/api/v1_1', + 'playerType': 'episode', + } + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) player_info = '' - for line in self._search_regex('var flashvars = ({[^}]+})', webpage, 'flashvars').splitlines(): - if '+' not in line and '(' not in line and ')' not in line: - player_info += line - player_info = self._parse_json(js_to_json(player_info), video_id) - video_id = player_info['id'] - player_type = player_info['playerType'] + flash_vars = self._search_regex('var flashvars = ({[^}]+})', webpage, 'flashvars', None) + if flash_vars is not None: + for line in flash_vars.splitlines(): + if '+' not in line and '(' not in line and ')' not in line: + player_info += line + player_info = self._parse_json(player_info, video_id, js_to_json, False) + if player_info is not None: + for key in self._api_vars: + if key in player_info: + self._api_vars[key] = player_info[key] player_json_data = self._download_json( - 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-' + video_id + '.type-' + player_info['type'] + '.html', + 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-' + video_id + '.type-' + self._api_vars['type'] + '.html', video_id )['data'] if 'url' in player_json_data: @@ -57,13 +67,13 @@ class ShahidIE(InfoExtractor): formats = self._extract_m3u8_formats(m3u8_url, video_id) video_info = self._download_json( - player_info['url'] + '/' + player_type + '/' + video_id + '?apiKey=sh%40hid0nlin3&hash=b2wMCTHpSmyxGqQjJFOycRmLSex%2BBpTK%2Fooxy6vHaqs%3D', + self._api_vars['url'] + '/' + self._api_vars['playerType'] + '/' + video_id + '?apiKey=sh%40hid0nlin3&hash=b2wMCTHpSmyxGqQjJFOycRmLSex%2BBpTK%2Fooxy6vHaqs%3D', video_id )['data'] if video_info.get('error'): for error in video_info['error']: raise ExtractorError(error) - video_info = video_info[player_type] + video_info = video_info[self._api_vars['playerType']] title = video_info['title'] thumbnail = video_info.get('thumbnailUrl') categories = [category['name'] for category in video_info.get('genres')] From 154655a85ae8b7740aa9fe7821544050fd65641b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 19:21:05 +0600 Subject: [PATCH 1577/2721] [downloader/external] Respect --no-check-certificate for wget --- youtube_dl/downloader/external.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 30699934b..07ce59f7d 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -51,6 +51,9 @@ class ExternalFD(FileDownloader): return [] return [command_option, source_address] + def _no_check_certificate(self, command_option): + return [command_option] if self.params.get('nocheckcertificate', False) else [] + def _configuration_args(self, default=[]): ex_args = self.params.get('external_downloader_args') if ex_args is None: @@ -99,6 +102,7 @@ class WgetFD(ExternalFD): for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--bind-address') + cmd += self._no_check_certificate('--no-check-certificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd From b465083f45e63fe8aeb0255b5cea7dfbf0770a2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 21:27:10 +0600 Subject: [PATCH 1578/2721] [sexykarma] Fix test --- youtube_dl/extractor/sexykarma.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/sexykarma.py b/youtube_dl/extractor/sexykarma.py index 6446d26dc..e33483674 100644 --- a/youtube_dl/extractor/sexykarma.py +++ b/youtube_dl/extractor/sexykarma.py @@ -29,6 +29,7 @@ class SexyKarmaIE(InfoExtractor): 'view_count': int, 'comment_count': int, 'categories': list, + 'age_limit': 18, } }, { 'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html', From b61b7787cbef408154695bbb9f5c3d29a70fdd38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 21:30:57 +0600 Subject: [PATCH 1579/2721] [91porn] Extract age limit --- youtube_dl/extractor/porn91.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py index 72d1b2718..3e15533e9 100644 --- a/youtube_dl/extractor/porn91.py +++ b/youtube_dl/extractor/porn91.py @@ -22,6 +22,7 @@ class Porn91IE(InfoExtractor): 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!', 'ext': 'mp4', 'duration': 431, + 'age_limit': 18, } } @@ -68,4 +69,5 @@ class Porn91IE(InfoExtractor): 'url': video_url, 'duration': duration, 'comment_count': comment_count, + 'age_limit': self._rta_search(webpage), } From 8e2b1be12791b4e62c463562b570661e7b2c5852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 21:42:50 +0600 Subject: [PATCH 1580/2721] [test/helper] Make age_limit checkable field --- test/helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/helper.py b/test/helper.py index c8b34654d..cb6eec8d9 100644 --- a/test/helper.py +++ b/test/helper.py @@ -160,7 +160,7 @@ def expect_info_dict(self, got_dict, expected_dict): # Are checkable fields missing from the test case definition? test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) for key, value in got_dict.items() - if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location')) + if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location', 'age_limit')) missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) if missing_keys: def _repr(v): From 18c3281f9e1e32e00c778b149137fc91accb3b1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 21:43:20 +0600 Subject: [PATCH 1581/2721] [24video] Fix test --- youtube_dl/extractor/fourtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index b2284ab01..3bb4f6239 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -32,6 +32,7 @@ class FourTubeIE(InfoExtractor): 'view_count': int, 'like_count': int, 'categories': list, + 'age_limit': 18, } } From 464e792496665b2e3dcabf5df43a45604673730a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 21:51:21 +0600 Subject: [PATCH 1582/2721] [vpro] Override npo IE_NAME --- youtube_dl/extractor/npo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 0c2d02c10..eb12fb810 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -407,6 +407,7 @@ class NPORadioFragmentIE(InfoExtractor): class VPROIE(NPOIE): + IE_NAME = 'vpro' _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html' _TESTS = [ From d7bb8884afc8651b0ad86046dcd56a5330c98dd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 21:58:24 +0600 Subject: [PATCH 1583/2721] [break] Add age_limit to test --- youtube_dl/extractor/breakcom.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py index 809287d14..aa08051b1 100644 --- a/youtube_dl/extractor/breakcom.py +++ b/youtube_dl/extractor/breakcom.py @@ -18,6 +18,7 @@ class BreakIE(InfoExtractor): 'id': '2468056', 'ext': 'mp4', 'title': 'When Girls Act Like D-Bags', + 'age_limit': 13, } }, { 'url': 'http://www.break.com/video/ugc/baby-flex-2773063', From 9f2e7c2f34c48942a2a3e55532dd0d0ef8ed4d98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 22:04:48 +0600 Subject: [PATCH 1584/2721] [ok] Add age_limit to tests --- youtube_dl/extractor/odnoklassniki.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 215ffe87b..e5fd1ba04 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -25,6 +25,7 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader_id': '330537914540', 'uploader': 'Виталий Добровольский', 'like_count': int, + 'age_limit': 0, }, }, { # metadataUrl @@ -38,6 +39,7 @@ class OdnoklassnikiIE(InfoExtractor): 'uploader_id': '534380003155', 'uploader': 'Андрей Мещанинов', 'like_count': int, + 'age_limit': 0, }, }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', From 887e9bc7b561f9b2b97dec8f99f9c04392d95d40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 22:08:54 +0600 Subject: [PATCH 1585/2721] [ok] Update tests --- youtube_dl/extractor/odnoklassniki.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index e5fd1ba04..003d27de7 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -16,12 +16,13 @@ class OdnoklassnikiIE(InfoExtractor): _TESTS = [{ # metadata in JSON 'url': 'http://ok.ru/video/20079905452', - 'md5': '8e24ad2da6f387948e7a7d44eb8668fe', + 'md5': '6ba728d85d60aa2e6dd37c9e70fdc6bc', 'info_dict': { 'id': '20079905452', 'ext': 'mp4', 'title': 'Культура меняет нас (прекрасный ролик!))', 'duration': 100, + 'upload_date': '20141207', 'uploader_id': '330537914540', 'uploader': 'Виталий Добровольский', 'like_count': int, @@ -36,8 +37,9 @@ class OdnoklassnikiIE(InfoExtractor): 'ext': 'mp4', 'title': 'Девушка без комплексов ...', 'duration': 191, + 'upload_date': '20150518', 'uploader_id': '534380003155', - 'uploader': 'Андрей Мещанинов', + 'uploader': '☭ Андрей Мещанинов ☭', 'like_count': int, 'age_limit': 0, }, From c8d1be772daa496759bd85cb95c4ec799294c7f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 8 Aug 2015 22:11:06 +0600 Subject: [PATCH 1586/2721] [rutube] Add age_limit to test --- youtube_dl/extractor/rutube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 5b1c3577a..d94dc7399 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -30,6 +30,7 @@ class RutubeIE(InfoExtractor): 'uploader': 'NTDRussian', 'uploader_id': '29790', 'upload_date': '20131016', + 'age_limit': 0, }, 'params': { # It requires ffmpeg (m3u8 download) From 08df685fe7764ef9f7dc271075340e4effc5e621 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Aug 2015 08:51:37 +0600 Subject: [PATCH 1587/2721] [videolectures] Fix _VALID_URL for test_no_duplicates to pass --- youtube_dl/extractor/videolecturesnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/videolecturesnet.py b/youtube_dl/extractor/videolecturesnet.py index 24584dc80..ef2da5632 100644 --- a/youtube_dl/extractor/videolecturesnet.py +++ b/youtube_dl/extractor/videolecturesnet.py @@ -12,7 +12,7 @@ from ..utils import ( class VideoLecturesNetIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)(?:/?[#?].*)?$' + _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/*(?:[#?].*)?$' IE_NAME = 'videolectures.net' _TEST = { From 12bb392a0ff8adbde2ced75b0c4976d0aabc7f4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Aug 2015 17:10:40 +0600 Subject: [PATCH 1588/2721] [vimeo] Fix password protected videos (Closes #6507) --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 10d6745af..4c4e3c72a 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -203,7 +203,7 @@ class VimeoIE(VimeoBaseInfoExtractor): url = url.replace('http://', 'https://') password_request = compat_urllib_request.Request(url + '/password', data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - password_request.add_header('Cookie', 'xsrft=%s' % token) + password_request.add_header('Referer', url) return self._download_webpage( password_request, video_id, 'Verifying the password', 'Wrong password') From 8d6765cf48138cc44fdbaee4e8c7a199ae348bb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Aug 2015 19:07:18 +0600 Subject: [PATCH 1589/2721] [extractor/generic] Add generic support for xspf playist extraction --- youtube_dl/extractor/common.py | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index def6caa0d..e201ea6db 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -39,6 +39,8 @@ from ..utils import ( sanitize_filename, unescapeHTML, url_basename, + xpath_text, + xpath_with_ns, ) @@ -1142,6 +1144,45 @@ class InfoExtractor(object): }) return subtitles + def _extract_xspf_playlist(self, playlist_url, playlist_id): + playlist = self._download_xml( + playlist_url, playlist_id, 'Downloading xpsf playlist', + 'Unable to download xspf manifest') + + NS_MAP = { + 'xspf': 'http://xspf.org/ns/0/', + 's1': 'http://static.streamone.nl/player/ns/0', + } + + entries = [] + for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): + title = xpath_text( + track, xpath_with_ns('./xspf:title', NS_MAP), 'title') + description = xpath_text( + track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description') + thumbnail = xpath_text( + track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail') + duration = float_or_none( + xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000) + + formats = [{ + 'url': location.text, + 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), + 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), + 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), + } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] + self._sort_formats(formats) + + entries.append({ + 'id': playlist_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + }) + return entries + def _live_title(self, name): """ Generate the title for a live video """ now = datetime.datetime.now() From e0b9d78fab76e2c2819c8a9a7512ad4533319b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Aug 2015 19:09:50 +0600 Subject: [PATCH 1590/2721] [extractor/common] Clarify playlists can have description field --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e201ea6db..9b4775e0a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -204,8 +204,8 @@ class InfoExtractor(object): There must be a key "entries", which is a list, an iterable, or a PagedList object, each element of which is a valid dictionary by this specification. - Additionally, playlists can have "title" and "id" attributes with the same - semantics as videos (see above). + Additionally, playlists can have "title", "description" and "id" attributes + with the same semantics as videos (see above). _type "multi_video" indicates that there are multiple videos that From 3a30508b943c044e5f684b703ff58ac352686f63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Aug 2015 19:11:23 +0600 Subject: [PATCH 1591/2721] [telegraaf] Add extractor (Closes #6492) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/telegraaf.py | 35 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 youtube_dl/extractor/telegraaf.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e38e77a27..dad3ec87f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -596,6 +596,7 @@ from .techtalks import TechTalksIE from .ted import TEDIE from .telebruxelles import TeleBruxellesIE from .telecinco import TelecincoIE +from .telegraaf import TelegraafIE from .telemb import TeleMBIE from .teletask import TeleTaskIE from .tenplay import TenPlayIE diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py new file mode 100644 index 000000000..6f8333cfc --- /dev/null +++ b/youtube_dl/extractor/telegraaf.py @@ -0,0 +1,35 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import remove_end + + +class TelegraafIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html' + _TEST = { + 'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html', + 'md5': '83245a9779bcc4a24454bfd53c65b6dc', + 'info_dict': { + 'id': '24353229', + 'ext': 'mp4', + 'title': 'Tikibad ontruimd wegens brand', + 'description': 'md5:05ca046ff47b931f9b04855015e163a4', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 33, + }, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + playlist_url = self._search_regex( + r"iframe\.loadPlayer\('([^']+)'", webpage, 'player') + + entries = self._extract_xspf_playlist(playlist_url, playlist_id) + title = remove_end(self._og_search_title(webpage), ' - VIDEO') + description = self._og_search_description(webpage) + + return self.playlist_result(entries, playlist_id, title, description) From f32143469fd0a2720bd40908ea8360490983b97d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Aug 2015 19:15:00 +0600 Subject: [PATCH 1592/2721] [tweakers] Use _extract_xspf_playlist --- youtube_dl/extractor/tweakers.py | 42 +++----------------------------- 1 file changed, 4 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dl/extractor/tweakers.py index c80ec15cf..4bbe76e96 100644 --- a/youtube_dl/extractor/tweakers.py +++ b/youtube_dl/extractor/tweakers.py @@ -25,41 +25,7 @@ class TweakersIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._match_id(url) - - playlist = self._download_xml( - 'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % video_id, - video_id) - - NS_MAP = { - 'xspf': 'http://xspf.org/ns/0/', - 's1': 'http://static.streamone.nl/player/ns/0', - } - - track = playlist.find(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)) - - title = xpath_text( - track, xpath_with_ns('./xspf:title', NS_MAP), 'title') - description = xpath_text( - track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description') - thumbnail = xpath_text( - track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail') - duration = float_or_none( - xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), - 1000) - - formats = [{ - 'url': location.text, - 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)), - 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))), - 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))), - } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))] - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - } + playlist_id = self._match_id(url) + entries = self._extract_xspf_playlist( + 'https://tweakers.net/video/s1playlist/%s/playlist.xspf' % playlist_id, playlist_id) + return self.playlist_result(entries, playlist_id) From 0dcb318f622d944ad0f5c23c32c9bc9b00e76aaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Aug 2015 19:15:20 +0600 Subject: [PATCH 1593/2721] [tweakers] Fix test --- youtube_dl/extractor/tweakers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dl/extractor/tweakers.py index 4bbe76e96..6eeffb1cc 100644 --- a/youtube_dl/extractor/tweakers.py +++ b/youtube_dl/extractor/tweakers.py @@ -13,7 +13,7 @@ class TweakersIE(InfoExtractor): _VALID_URL = r'https?://tweakers\.net/video/(?P<id>\d+)' _TEST = { 'url': 'https://tweakers.net/video/9926/new-nintendo-3ds-xl-op-alle-fronten-beter.html', - 'md5': '1b5afa817403bb5baa08359dca31e6df', + 'md5': '3147e4ddad366f97476a93863e4557c8', 'info_dict': { 'id': '9926', 'ext': 'mp4', From 98044462b1035000a44b35a41f4f780b2e844f2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Aug 2015 19:18:50 +0600 Subject: [PATCH 1594/2721] [extractor/common] Use playlist id as default title --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9b4775e0a..be91e03e9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1157,7 +1157,7 @@ class InfoExtractor(object): entries = [] for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): title = xpath_text( - track, xpath_with_ns('./xspf:title', NS_MAP), 'title') + track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id) description = xpath_text( track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description') thumbnail = xpath_text( From fb2f339fec20c35cb62c1da682e0dfd418faef81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 9 Aug 2015 19:21:25 +0600 Subject: [PATCH 1595/2721] [dhm] Use _extract_xspf_playlist --- youtube_dl/extractor/dhm.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py index 3ed1f1663..127eb0439 100644 --- a/youtube_dl/extractor/dhm.py +++ b/youtube_dl/extractor/dhm.py @@ -34,24 +34,14 @@ class DHMIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + playlist_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, playlist_id) playlist_url = self._search_regex( r"file\s*:\s*'([^']+)'", webpage, 'playlist url') - playlist = self._download_xml(playlist_url, video_id) - - track = playlist.find( - './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track') - - video_url = xpath_text( - track, './{http://xspf.org/ns/0/}location', - 'video url', fatal=True) - thumbnail = xpath_text( - track, './{http://xspf.org/ns/0/}image', - 'thumbnail') + entries = self._extract_xspf_playlist(playlist_url, playlist_id) title = self._search_regex( [r'dc:title="([^"]+)"', r'<title> »([^<]+)'], @@ -63,11 +53,10 @@ class DHMIE(InfoExtractor): r'Length\s*\s*:\s*([^<]+)', webpage, 'duration', default=None)) - return { - 'id': video_id, - 'url': video_url, + entries[0].update({ 'title': title, 'description': description, 'duration': duration, - 'thumbnail': thumbnail, - } + }) + + return self.playlist_result(entries, playlist_id) From 942acef594428b5f5c7e0ed7860cb6d725d8f1e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:41:55 +0600 Subject: [PATCH 1596/2721] [extractor/common] Extract _parse_xspf --- youtube_dl/extractor/common.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index be91e03e9..5982055be 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1144,11 +1144,15 @@ class InfoExtractor(object): }) return subtitles - def _extract_xspf_playlist(self, playlist_url, playlist_id): - playlist = self._download_xml( + def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True): + xspf = self._download_xml( playlist_url, playlist_id, 'Downloading xpsf playlist', - 'Unable to download xspf manifest') + 'Unable to download xspf manifest', fatal=fatal) + if xspf is False: + return [] + return self._parse_xspf(xspf, playlist_id) + def _parse_xspf(self, playlist, playlist_id): NS_MAP = { 'xspf': 'http://xspf.org/ns/0/', 's1': 'http://static.streamone.nl/player/ns/0', From 729accb48221bd72e40076939616792c1c6fc15f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:43:42 +0600 Subject: [PATCH 1597/2721] [extractor/generic] Add support for xspf playlists --- youtube_dl/extractor/generic.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 901f77304..a382d6be4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1198,6 +1198,8 @@ class GenericIE(InfoExtractor): return self._extract_rss(url, video_id, doc) elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): return self._parse_smil(doc, url, video_id) + elif doc.tag == '{http://xspf.org/ns/0/}playlist': + return self.playlist_result(self._parse_xspf(doc, video_id), video_id) except compat_xml_parse_error: pass @@ -1799,7 +1801,8 @@ class GenericIE(InfoExtractor): # here's a fun little line of code for you: video_id = os.path.splitext(video_id)[0] - if determine_ext(video_url) == 'smil': + ext = determine_ext(video_url) + if ext == 'smil': entries.append({ 'id': video_id, 'formats': self._extract_smil_formats(video_url, video_id), @@ -1807,6 +1810,8 @@ class GenericIE(InfoExtractor): 'title': video_title, 'age_limit': age_limit, }) + elif ext == 'xspf': + return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id) else: entries.append({ 'id': video_id, From 1de5cd3ba51ce67d9a1cd3b40157058e78e46692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:47:08 +0600 Subject: [PATCH 1598/2721] [extractor/generic] Add test for xspf playlist --- youtube_dl/extractor/generic.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a382d6be4..4756a658f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -198,6 +198,21 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html + { + 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf', + 'info_dict': { + 'id': 'mZlp2ctYIUEB', + 'ext': 'mp4', + 'title': 'Tikibad ontruimd wegens brand', + 'description': 'md5:05ca046ff47b931f9b04855015e163a4', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 33, + }, + 'params': { + 'skip_download': True, + }, + }, # google redirect { 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', From 0791ac1b4415601f464f9656a4485b3ae6b67f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 9 Aug 2015 19:47:58 +0600 Subject: [PATCH 1599/2721] [extractor/generic] Clarify comment --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4756a658f..376feecae 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1206,7 +1206,7 @@ class GenericIE(InfoExtractor): self.report_extraction(video_id) - # Is it an RSS feed or a SMIL file? + # Is it an RSS feed, a SMIL file or a XSPF playlist? try: doc = parse_xml(webpage) if doc.tag == 'rss': From 27c7114af6b82bfe8be6b8e4dfa6e11dd1356044 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 9 Aug 2015 20:13:02 +0200 Subject: [PATCH 1600/2721] release 2015.08.09 --- README.md | 2 +- docs/supportedsites.md | 8 ++++++-- youtube_dl/version.py | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 02b9775f9..15baf75ce 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,7 @@ which means you can modify it, redistribute it or use it however you like. --playlist-reverse Download playlist videos in reverse order --xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental) --hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental) - --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,httpie,wget + --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,axel,curl,httpie,wget --external-downloader-args ARGS Give these arguments to the external downloader ## Filesystem Options: diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 657935dc6..e21471102 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -86,7 +86,7 @@ - **chirbit:profile** - **Cinchcast** - **Cinemassacre** - - **clipfish** + - **Clipfish** - **cliphunter** - **Clipsyndicate** - **Cloudy** @@ -116,6 +116,7 @@ - **DailymotionCloud** - **daum.net** - **DBTV** + - **DCN** - **DctpTv** - **DeezerPlaylist** - **defense.gouv.fr** @@ -351,7 +352,6 @@ - **NowTV** - **nowvideo**: NowVideo - **npo**: npo.nl and ntr.nl - - **npo**: npo.nl and ntr.nl - **npo.nl:live** - **npo.nl:radio** - **npo.nl:radio:fragment** @@ -377,6 +377,7 @@ - **parliamentlive.tv**: UK parliament videos - **Patreon** - **PBS** + - **Periscope**: Periscope - **PhilharmonieDeParis**: Philharmonie de Paris - **Phoenix** - **Photobucket** @@ -406,6 +407,7 @@ - **qqmusic:playlist**: QQ音乐 - 歌单 - **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:toplist**: QQ音乐 - 排行榜 + - **Quickscope**: Quick Scope - **QuickVid** - **R7** - **radio.de** @@ -518,6 +520,7 @@ - **ted** - **TeleBruxelles** - **telecinco.es** + - **Telegraaf** - **TeleMB** - **TeleTask** - **TenPlay** @@ -621,6 +624,7 @@ - **Vodlocker** - **VoiceRepublic** - **Vporn** + - **vpro**: npo.nl and ntr.nl - **VRT** - **vube**: Vube.com - **VuClip** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 9f209499c..6462d4477 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.08.06.1' +__version__ = '2015.08.09' From c5864a8ce6379dca300f447cca12a5a946d67d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Aug 2015 21:38:58 +0600 Subject: [PATCH 1601/2721] [fc2] Fix python 2.6 (Closes #6512) --- youtube_dl/extractor/fc2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 1ccc1a964..e4f7195a8 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -86,7 +86,7 @@ class FC2IE(InfoExtractor): info_url = ( "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&". - format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E'))) + format(video_id, mimi, compat_urllib_request.quote(refer, safe=b'').replace('.', '%2E'))) info_webpage = self._download_webpage( info_url, video_id, note='Downloading info page') From f6c3664d717857a7994f189a01a00402df2b4168 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 10 Aug 2015 23:35:08 +0600 Subject: [PATCH 1602/2721] [vimeo] Fix login (Closes #6488) --- youtube_dl/extractor/vimeo.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 4c4e3c72a..5bce78ac0 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -29,6 +29,7 @@ from ..utils import ( class VimeoBaseInfoExtractor(InfoExtractor): _NETRC_MACHINE = 'vimeo' _LOGIN_REQUIRED = False + _LOGIN_URL = 'https://vimeo.com/log_in' def _login(self): (username, password) = self._get_login_info() @@ -37,21 +38,25 @@ class VimeoBaseInfoExtractor(InfoExtractor): raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) return self.report_login() - login_url = 'https://vimeo.com/log_in' - webpage = self._download_webpage(login_url, None, False) - token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token') + webpage = self._download_webpage(self._LOGIN_URL, None, False) + token = self._extract_xsrft(webpage) data = urlencode_postdata({ + 'action': 'login', 'email': username, 'password': password, - 'action': 'login', 'service': 'vimeo', 'token': token, }) - login_request = compat_urllib_request.Request(login_url, data) + login_request = compat_urllib_request.Request(self._LOGIN_URL, data) login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') - login_request.add_header('Cookie', 'xsrft=%s' % token) + login_request.add_header('Referer', self._LOGIN_URL) self._download_webpage(login_request, None, False, 'Wrong login info') + def _extract_xsrft(self, webpage): + return self._search_regex( + r'xsrft\s*[=:]\s*(?P["\'])(?P.+?)(?P=q)', + webpage, 'login token', group='xsrft') + class VimeoIE(VimeoBaseInfoExtractor): """Information extractor for vimeo.com.""" @@ -193,7 +198,7 @@ class VimeoIE(VimeoBaseInfoExtractor): password = self._downloader.params.get('videopassword', None) if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) - token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token') + token = self._extract_xsrft(webpage) data = urlencode_postdata({ 'password': password, 'token': token, @@ -422,7 +427,7 @@ class VimeoIE(VimeoBaseInfoExtractor): } -class VimeoChannelIE(InfoExtractor): +class VimeoChannelIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:channel' _VALID_URL = r'https://vimeo\.com/channels/(?P[^/?#]+)/?(?:$|[?#])' _MORE_PAGES_INDICATOR = r' Date: Mon, 10 Aug 2015 23:58:01 +0600 Subject: [PATCH 1603/2721] [vimeo:watchlater] Fix extraction (Closes #3886) --- youtube_dl/extractor/vimeo.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 5bce78ac0..1eeb4618e 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -431,6 +431,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:channel' _VALID_URL = r'https://vimeo\.com/channels/(?P[^/?#]+)/?(?:$|[?#])' _MORE_PAGES_INDICATOR = r']+?title="(.*?)"' _TESTS = [{ 'url': 'https://vimeo.com/channels/tributes', @@ -445,7 +446,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): return '%s/videos/page:%d/' % (base_url, pagenum) def _extract_list_title(self, webpage): - return self._html_search_regex(self._TITLE_RE, webpage, 'list title') + return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title') def _login_list_password(self, page_url, list_id, webpage): login_form = self._search_regex( @@ -611,11 +612,11 @@ class VimeoReviewIE(InfoExtractor): class VimeoWatchLaterIE(VimeoChannelIE): IE_NAME = 'vimeo:watchlater' IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)' - _VALID_URL = r'https://vimeo\.com/home/watchlater|:vimeowatchlater' + _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater' + _TITLE = 'Watch Later' _LOGIN_REQUIRED = True - _TITLE_RE = r'href="/home/watchlater".*?>(.*?)<' _TESTS = [{ - 'url': 'https://vimeo.com/home/watchlater', + 'url': 'https://vimeo.com/watchlater', 'only_matching': True, }] @@ -631,7 +632,7 @@ class VimeoWatchLaterIE(VimeoChannelIE): return request def _real_extract(self, url): - return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater') + return self._extract_videos('watchlater', 'https://vimeo.com/watchlater') class VimeoLikesIE(InfoExtractor): From 11b5605815d685263b271b4e061c43f9cb55a08c Mon Sep 17 00:00:00 2001 From: Puck Meerburg Date: Mon, 10 Aug 2015 20:52:38 +0200 Subject: [PATCH 1604/2721] [youtube] Use the first v= argument in the URL This is according to how youtube handles multiple v= values in one URL. Before this, it was possible to make a single URL show up differently on youtube itself, and if you downloaded/viewed it with youtube-dl/mpv --- youtube_dl/extractor/youtube.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 67a1df9a0..eaf058cfb 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -213,7 +213,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): |(?: # or the v= param in all its forms (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:\?|\#!?) # the params delimiter ? or # or #! - (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx) + (?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx) v= ) )) @@ -380,6 +380,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': 'setindia' } }, + { + 'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY', + 'note': 'Use the first video ID in the URL', + 'info_dict': { + 'id': 'BaW_jenozKc', + 'ext': 'mp4', + 'title': 'youtube-dl test video "\'/\\ä↭𝕐', + 'uploader': 'Philipp Hagemeister', + 'uploader_id': 'phihag', + 'upload_date': '20121002', + 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', + 'categories': ['Science & Technology'], + 'tags': ['youtube-dl'], + 'like_count': int, + 'dislike_count': int, + } + }, { 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', 'note': '256k DASH audio (format 141) via DASH manifest', From b29440aee64027b3e4145070b0235193752b4d9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 01:17:41 +0600 Subject: [PATCH 1605/2721] [vimeo:user] Do not match watchlater --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 1eeb4618e..50df79ca1 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -505,7 +505,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): class VimeoUserIE(VimeoChannelIE): IE_NAME = 'vimeo:user' - _VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P[^/]+)(?:/videos|[#?]|$)' + _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P[^/]+)(?:/videos|[#?]|$)' _TITLE_RE = r']+?class="user">([^<>]+?)' _TESTS = [{ 'url': 'https://vimeo.com/nkistudio/videos', From 34a7de2970d8bbceeb3f485d64a57f67489a44d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 01:22:06 +0600 Subject: [PATCH 1606/2721] [youtube] Skip download for multiple v= test --- youtube_dl/extractor/youtube.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index eaf058cfb..01dbbfa3c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -395,7 +395,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'tags': ['youtube-dl'], 'like_count': int, 'dislike_count': int, - } + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.youtube.com/watch?v=a9LDPn-MO4I', From 34952f09e175e0b78c929fddf56f82ccf028dc5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 01:24:53 +0600 Subject: [PATCH 1607/2721] [youtube] Add age limit to tests --- youtube_dl/extractor/youtube.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 01dbbfa3c..e74a39095 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -365,6 +365,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:64249768eec3bc4276236606ea996373', 'uploader': 'justintimberlakeVEVO', 'uploader_id': 'justintimberlakeVEVO', + 'age_limit': 18, } }, { @@ -475,6 +476,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'The Witcher', 'uploader_id': 'WitcherGame', 'upload_date': '20140605', + 'age_limit': 18, }, }, # Age-gate video with encrypted signature @@ -488,6 +490,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'LloydVEVO', 'uploader_id': 'LloydVEVO', 'upload_date': '20110629', + 'age_limit': 18, }, }, # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) From fb0d12c6cbcabd6f9e84d51c82dea6778d0bb863 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 01:46:25 +0600 Subject: [PATCH 1608/2721] [pbs] Add age limit to tests --- youtube_dl/extractor/pbs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index a53479aad..683c81de3 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -92,6 +92,7 @@ class PBSIE(InfoExtractor): 'duration': 3172, 'thumbnail': 're:^https?://.*\.jpg$', 'upload_date': '20140122', + 'age_limit': 10, }, 'params': { 'skip_download': True, # requires ffmpeg From b1ac38fadc65049dc6f9611fa7e9649de1e7eb93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 01:49:23 +0600 Subject: [PATCH 1609/2721] [tvplay] Add age limit to tests --- youtube_dl/extractor/tvplay.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 79863e781..b4683de54 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -104,6 +104,7 @@ class TVPlayIE(InfoExtractor): 'duration': 1492, 'timestamp': 1330522854, 'upload_date': '20120229', + 'age_limit': 18, }, 'params': { # rtmp download From bf812ef71438036c23640f29bd7ae955289720ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 23:00:45 +0600 Subject: [PATCH 1610/2721] [downloader/external] Forward --proxy to wget and aria2c --- youtube_dl/downloader/external.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 07ce59f7d..49d806ee4 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -51,6 +51,14 @@ class ExternalFD(FileDownloader): return [] return [command_option, source_address] + def _option(self, command_option, param): + param = self.params.get(param) + if param is None: + return [] + if isinstance(param, bool): + return [command_option] + return [command_option, param] + def _no_check_certificate(self, command_option): return [command_option] if self.params.get('nocheckcertificate', False) else [] @@ -102,6 +110,7 @@ class WgetFD(ExternalFD): for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--bind-address') + cmd += self._option('--proxy', 'proxy') cmd += self._no_check_certificate('--no-check-certificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] @@ -120,6 +129,7 @@ class Aria2cFD(ExternalFD): for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--interface') + cmd += self._option('--all-proxy', 'proxy') cmd += ['--', info_dict['url']] return cmd From 9f3da138606773339de9accc2bc6522ea88185fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 11 Aug 2015 23:05:04 +0600 Subject: [PATCH 1611/2721] [downloader/external] Use generic _option --- youtube_dl/downloader/external.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 49d806ee4..6c310346c 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -45,12 +45,6 @@ class ExternalFD(FileDownloader): def supports(cls, info_dict): return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') - def _source_address(self, command_option): - source_address = self.params.get('source_address') - if source_address is None: - return [] - return [command_option, source_address] - def _option(self, command_option, param): param = self.params.get(param) if param is None: @@ -59,9 +53,6 @@ class ExternalFD(FileDownloader): return [command_option] return [command_option, param] - def _no_check_certificate(self, command_option): - return [command_option] if self.params.get('nocheckcertificate', False) else [] - def _configuration_args(self, default=[]): ex_args = self.params.get('external_downloader_args') if ex_args is None: @@ -88,7 +79,7 @@ class CurlFD(ExternalFD): cmd = [self.exe, '--location', '-o', tmpfilename] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] - cmd += self._source_address('--interface') + cmd += self._option('--interface', 'source_address') cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -109,9 +100,9 @@ class WgetFD(ExternalFD): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] - cmd += self._source_address('--bind-address') + cmd += self._option('--bind-address', 'source_address') cmd += self._option('--proxy', 'proxy') - cmd += self._no_check_certificate('--no-check-certificate') + cmd += self._option('--no-check-certificate', 'nocheckcertificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -128,7 +119,7 @@ class Aria2cFD(ExternalFD): cmd += ['--out', os.path.basename(tmpfilename)] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] - cmd += self._source_address('--interface') + cmd += self._option('--interface', 'source_address') cmd += self._option('--all-proxy', 'proxy') cmd += ['--', info_dict['url']] return cmd From 0a19d4ccd6914d8547fd3e42fd279c960d9f8fad Mon Sep 17 00:00:00 2001 From: sceext Date: Wed, 12 Aug 2015 14:01:48 +0800 Subject: [PATCH 1612/2721] [iqiyi] update md5 salt (2015-08-10 Zombie) --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index afb7f4e61..dfc6d58a0 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -201,7 +201,7 @@ class IqiyiIE(InfoExtractor): return raw_data def get_enc_key(self, swf_url, video_id): - enc_key = '8e29ab5666d041c3a1ea76e06dabdffb' + enc_key = '3601ba290e4f4662848c710e2122007e' # last update at 2015-08-10 for Zombie return enc_key def _real_extract(self, url): From 1df3186e0e2c49993f4230ec77a9de351177b271 Mon Sep 17 00:00:00 2001 From: ngld Date: Wed, 12 Aug 2015 16:01:47 +0200 Subject: [PATCH 1613/2721] [funnyordie] Handle protocol-relative URLs (fixes #6490) --- youtube_dl/extractor/funnyordie.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index dd87257c4..f5f13689c 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -53,7 +53,7 @@ class FunnyOrDieIE(InfoExtractor): for bitrate in bitrates: for link in links: formats.append({ - 'url': '%s%d.%s' % (link[0], bitrate, link[1]), + 'url': self._proto_relative_url('%s%d.%s' % (link[0], bitrate, link[1])), 'format_id': '%s-%d' % (link[1], bitrate), 'vbr': bitrate, }) From f57b7835e21b00a1b2205b4bcfba50c630ff68b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 12 Aug 2015 21:27:58 +0600 Subject: [PATCH 1614/2721] [youtube] Update tests --- youtube_dl/extractor/youtube.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e74a39095..facd837ad 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -442,7 +442,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'nfWlot6h_JM', 'ext': 'm4a', 'title': 'Taylor Swift - Shake It Off', - 'description': 'md5:2acfda1b285bdd478ccec22f9918199d', + 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3', 'uploader': 'TaylorSwiftVEVO', 'uploader_id': 'TaylorSwiftVEVO', 'upload_date': '20140818', @@ -515,7 +515,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'lqQg6PlCWgI', 'ext': 'mp4', - 'upload_date': '20120731', + 'upload_date': '20120724', 'uploader_id': 'olympic', 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', 'uploader': 'Olympics', @@ -544,7 +544,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'qEJwOuvDf7I', 'info_dict': { 'id': 'qEJwOuvDf7I', - 'ext': 'mp4', + 'ext': 'webm', 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге', 'description': '', 'upload_date': '20150404', From f0f3a6c99d2834ca8af87be4978c0040c3744628 Mon Sep 17 00:00:00 2001 From: ngld Date: Wed, 12 Aug 2015 18:07:27 +0200 Subject: [PATCH 1615/2721] [rtvnhnl] Added new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/rtvnhnl.py | 40 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 youtube_dl/extractor/rtvnhnl.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dad3ec87f..f026a4171 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -491,6 +491,7 @@ from .rtl2 import RTL2IE from .rtp import RTPIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE +from .rtvnhnl import RtvnhNlIE from .ruhd import RUHDIE from .rutube import ( RutubeIE, diff --git a/youtube_dl/extractor/rtvnhnl.py b/youtube_dl/extractor/rtvnhnl.py new file mode 100644 index 000000000..ce84900a0 --- /dev/null +++ b/youtube_dl/extractor/rtvnhnl.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RtvnhNlIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P[0-9]+)' + _TEST = { + 'params': { + 'hls_prefer_native': True + }, + + 'url': 'http://www.rtvnh.nl/video/131946', + 'md5': '6e1d0ab079e2a00b6161442d3ceacfc1', + 'info_dict': { + 'id': '131946', + 'ext': 'mp4', + 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw', + 'thumbnail': 're:^https?://rtvnh-webfiles\.[^.]+\.amazonaws\.com/data/cache/[0-9]+/basedata/pf_image/[0-9.]+/[0-9\-a-f]+\.jpg$' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + meta = self._parse_json(self._download_webpage('http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id) + formats = self._extract_smil_formats('http://www.rtvnh.nl/video/smil?m=' + video_id, video_id) + + for item in meta['source']['fb']: + if item.get('type') == 'hls': + formats.extend(self._extract_m3u8_formats(item['file'], video_id, ext='mp4')) + elif item.get('type') == '': + formats.append({'url': item['file']}) + + return { + 'id': video_id, + 'title': meta['title'].strip(), + 'thumbnail': meta['image'], + 'formats': formats + } From fb124e37419668c34b4056575614776b0c64b401 Mon Sep 17 00:00:00 2001 From: ngld Date: Wed, 12 Aug 2015 20:21:32 +0200 Subject: [PATCH 1616/2721] [rtvnhnl] Relax the thumbnail check --- youtube_dl/extractor/rtvnhnl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtvnhnl.py b/youtube_dl/extractor/rtvnhnl.py index ce84900a0..0921e2648 100644 --- a/youtube_dl/extractor/rtvnhnl.py +++ b/youtube_dl/extractor/rtvnhnl.py @@ -17,7 +17,7 @@ class RtvnhNlIE(InfoExtractor): 'id': '131946', 'ext': 'mp4', 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw', - 'thumbnail': 're:^https?://rtvnh-webfiles\.[^.]+\.amazonaws\.com/data/cache/[0-9]+/basedata/pf_image/[0-9.]+/[0-9\-a-f]+\.jpg$' + 'thumbnail': 're:^http:.*\.jpg$' } } From d9ab5262b137962995af1b444f45f7f32dc33a77 Mon Sep 17 00:00:00 2001 From: ngld Date: Wed, 12 Aug 2015 20:26:13 +0200 Subject: [PATCH 1617/2721] [rtvnh] Renamed rtvnhnl -> rtvnh --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/{rtvnhnl.py => rtvnh.py} | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename youtube_dl/extractor/{rtvnhnl.py => rtvnh.py} (94%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f026a4171..9a6308723 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -491,7 +491,7 @@ from .rtl2 import RTL2IE from .rtp import RTPIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE -from .rtvnhnl import RtvnhNlIE +from .rtvnh import RTVNHIE from .ruhd import RUHDIE from .rutube import ( RutubeIE, diff --git a/youtube_dl/extractor/rtvnhnl.py b/youtube_dl/extractor/rtvnh.py similarity index 94% rename from youtube_dl/extractor/rtvnhnl.py rename to youtube_dl/extractor/rtvnh.py index 0921e2648..f5c0b94a8 100644 --- a/youtube_dl/extractor/rtvnhnl.py +++ b/youtube_dl/extractor/rtvnh.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -class RtvnhNlIE(InfoExtractor): +class RTVNHIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P[0-9]+)' _TEST = { 'params': { @@ -17,7 +17,7 @@ class RtvnhNlIE(InfoExtractor): 'id': '131946', 'ext': 'mp4', 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw', - 'thumbnail': 're:^http:.*\.jpg$' + 'thumbnail': 're:^https?:.*\.jpg$' } } From d7dbfc7cc18c2d54d7e1752def6c4710c58b49fc Mon Sep 17 00:00:00 2001 From: ngld Date: Wed, 12 Aug 2015 20:51:28 +0200 Subject: [PATCH 1618/2721] Use native HLS implementation by default. --- youtube_dl/extractor/rtvnh.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/youtube_dl/extractor/rtvnh.py b/youtube_dl/extractor/rtvnh.py index f5c0b94a8..2799f01a6 100644 --- a/youtube_dl/extractor/rtvnh.py +++ b/youtube_dl/extractor/rtvnh.py @@ -7,10 +7,6 @@ from .common import InfoExtractor class RTVNHIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P[0-9]+)' _TEST = { - 'params': { - 'hls_prefer_native': True - }, - 'url': 'http://www.rtvnh.nl/video/131946', 'md5': '6e1d0ab079e2a00b6161442d3ceacfc1', 'info_dict': { @@ -28,7 +24,7 @@ class RTVNHIE(InfoExtractor): for item in meta['source']['fb']: if item.get('type') == 'hls': - formats.extend(self._extract_m3u8_formats(item['file'], video_id, ext='mp4')) + formats.extend(self._extract_m3u8_formats(item['file'], video_id, ext='mp4', entry_protocol='m3u8_native')) elif item.get('type') == '': formats.append({'url': item['file']}) From 240ca32e57a027ff8cec8617c154bb7100bead1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 01:00:05 +0600 Subject: [PATCH 1619/2721] [rtvnh] Carry long lines --- youtube_dl/extractor/rtvnh.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rtvnh.py b/youtube_dl/extractor/rtvnh.py index 2799f01a6..998a3c53d 100644 --- a/youtube_dl/extractor/rtvnh.py +++ b/youtube_dl/extractor/rtvnh.py @@ -19,12 +19,16 @@ class RTVNHIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - meta = self._parse_json(self._download_webpage('http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id) - formats = self._extract_smil_formats('http://www.rtvnh.nl/video/smil?m=' + video_id, video_id) + + meta = self._parse_json(self._download_webpage( + 'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id) + formats = self._extract_smil_formats( + 'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id) for item in meta['source']['fb']: if item.get('type') == 'hls': - formats.extend(self._extract_m3u8_formats(item['file'], video_id, ext='mp4', entry_protocol='m3u8_native')) + formats.extend(self._extract_m3u8_formats( + item['file'], video_id, ext='mp4', entry_protocol='m3u8_native')) elif item.get('type') == '': formats.append({'url': item['file']}) From f196047832a2da74d5adf75759877b5d95ec5b5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 01:00:25 +0600 Subject: [PATCH 1620/2721] [rtvnh] Make thumbnail optional --- youtube_dl/extractor/rtvnh.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtvnh.py b/youtube_dl/extractor/rtvnh.py index 998a3c53d..d576a3410 100644 --- a/youtube_dl/extractor/rtvnh.py +++ b/youtube_dl/extractor/rtvnh.py @@ -35,6 +35,6 @@ class RTVNHIE(InfoExtractor): return { 'id': video_id, 'title': meta['title'].strip(), - 'thumbnail': meta['image'], + 'thumbnail': meta.get('image'), 'formats': formats } From 60231c65b9a50e08967d748c3ed401488fed3587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 01:02:50 +0600 Subject: [PATCH 1621/2721] [rtvnh] Make SMIL not fatal --- youtube_dl/extractor/rtvnh.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtvnh.py b/youtube_dl/extractor/rtvnh.py index d576a3410..202ea0181 100644 --- a/youtube_dl/extractor/rtvnh.py +++ b/youtube_dl/extractor/rtvnh.py @@ -23,7 +23,7 @@ class RTVNHIE(InfoExtractor): meta = self._parse_json(self._download_webpage( 'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id) formats = self._extract_smil_formats( - 'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id) + 'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id, fatal=False) for item in meta['source']['fb']: if item.get('type') == 'hls': From 2c919adb74893544ab6def1d56ff8ed37c282ecb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 01:11:55 +0600 Subject: [PATCH 1622/2721] [rtvnh] Check status code --- youtube_dl/extractor/rtvnh.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtvnh.py b/youtube_dl/extractor/rtvnh.py index 202ea0181..7c9d4b0cd 100644 --- a/youtube_dl/extractor/rtvnh.py +++ b/youtube_dl/extractor/rtvnh.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ExtractorError class RTVNHIE(InfoExtractor): @@ -22,6 +23,12 @@ class RTVNHIE(InfoExtractor): meta = self._parse_json(self._download_webpage( 'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id) + + status = meta.get('status') + if status != 200: + raise ExtractorError( + '%s returned error code %d' % (self.IE_NAME, status), expected=True) + formats = self._extract_smil_formats( 'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id, fatal=False) @@ -31,7 +38,7 @@ class RTVNHIE(InfoExtractor): item['file'], video_id, ext='mp4', entry_protocol='m3u8_native')) elif item.get('type') == '': formats.append({'url': item['file']}) - + return { 'id': video_id, 'title': meta['title'].strip(), From 3b7130439aade87b628fa6dd727df5860323a68f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 01:15:58 +0600 Subject: [PATCH 1623/2721] Credit @ngld for RTVNH (#6537) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index d16d34272..71c420165 100644 --- a/AUTHORS +++ b/AUTHORS @@ -137,3 +137,4 @@ Zach Bruggeman Tjark Saul slangangular Behrouz Abbasi +ngld From b6b2711298f8d43414deac939f92c7c3477826b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 01:17:15 +0600 Subject: [PATCH 1624/2721] [tweakers] Remove unused imports --- youtube_dl/extractor/tweakers.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dl/extractor/tweakers.py index 6eeffb1cc..f3198fb85 100644 --- a/youtube_dl/extractor/tweakers.py +++ b/youtube_dl/extractor/tweakers.py @@ -1,12 +1,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - xpath_text, - xpath_with_ns, - int_or_none, - float_or_none, -) class TweakersIE(InfoExtractor): From e73c85cb23d278702357412479fd4b162a3abbb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 01:18:49 +0600 Subject: [PATCH 1625/2721] [iqiyi] PEP 8 --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index dfc6d58a0..393e67e35 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -201,7 +201,7 @@ class IqiyiIE(InfoExtractor): return raw_data def get_enc_key(self, swf_url, video_id): - enc_key = '3601ba290e4f4662848c710e2122007e' # last update at 2015-08-10 for Zombie + enc_key = '3601ba290e4f4662848c710e2122007e' # last update at 2015-08-10 for Zombie return enc_key def _real_extract(self, url): From 237c03c8eaa4da1713a635e87f98ac14430b35cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 01:19:23 +0600 Subject: [PATCH 1626/2721] [dhm] Remove unused import --- youtube_dl/extractor/dhm.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py index 127eb0439..44e0c5d4d 100644 --- a/youtube_dl/extractor/dhm.py +++ b/youtube_dl/extractor/dhm.py @@ -1,10 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - xpath_text, - parse_duration, -) +from ..utils import parse_duration class DHMIE(InfoExtractor): From 28479149ccf3425e6a6e35d3a155f6802629728a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 13 Aug 2015 12:56:12 +0800 Subject: [PATCH 1627/2721] [theplatform] Fallback to hardcoded releaseUrl if not available Fixes #6546. Not adding a test case as test_NBC has the same problem. --- youtube_dl/extractor/theplatform.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 83d833e30..0643eccaf 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -108,7 +108,11 @@ class ThePlatformIE(InfoExtractor): config_url = config_url.replace('swf/', 'config/') config_url = config_url.replace('onsite/', 'onsite/config/') config = self._download_json(config_url, video_id, 'Downloading config') - smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' + if 'releaseUrl' in config: + release_url = config['releaseUrl'] + else: + release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path + smil_url = release_url + '&format=SMIL&formats=MPEG4&manifest=f4m' else: smil_url = 'http://link.theplatform.com/s/%s/meta.smil?format=smil&mbr=true' % path From 6828c809e44fca7b19da3c62a11cea313a86b64e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 21:07:14 +0600 Subject: [PATCH 1628/2721] [downloader/fragment] Respect --retries for fragment based downloaders (Closes #6549) --- youtube_dl/downloader/fragment.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 5f9d6796d..5a64b29ee 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -35,6 +35,7 @@ class FragmentFD(FileDownloader): 'quiet': True, 'noprogress': True, 'ratelimit': self.params.get('ratelimit', None), + 'retries': self.params.get('retries', 0), 'test': self.params.get('test', False), } ) From 7393746da213bec686f8425165854e5e383b7eb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 21:10:11 +0600 Subject: [PATCH 1629/2721] [downloader/hls] Add _debug_cmd --- youtube_dl/downloader/hls.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 60dca0ab1..2b6c3370f 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -32,6 +32,8 @@ class HlsFD(FileDownloader): for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] args.append(encodeFilename(tmpfilename, True)) + self._debug_cmd(args) + retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) From cb28e0338665c96b2d5b35d203b1d54a57f3feb1 Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Mon, 10 Aug 2015 19:27:16 +0200 Subject: [PATCH 1630/2721] [indavideo] Add new extractor Closes #2147. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/indavideo.py | 79 +++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 youtube_dl/extractor/indavideo.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 9a6308723..3bcfa93bb 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -242,6 +242,7 @@ from .imdb import ( ) from .imgur import ImgurIE from .ina import InaIE +from .indavideo import IndavideoIE from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE from .internetvideoarchive import InternetVideoArchiveIE diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py new file mode 100644 index 000000000..2a2cf2bd3 --- /dev/null +++ b/youtube_dl/extractor/indavideo.py @@ -0,0 +1,79 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .. import utils +from .common import InfoExtractor + + +class IndavideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?indavideo\.hu/video/(?P.+)' + _TESTS = [ + { + 'url': 'http://indavideo.hu/video/Cicatanc', + 'md5': 'c8a507a1c7410685f83a06eaeeaafeab', + 'info_dict': { + 'id': '1837039', + 'title': 'Cicatánc', + 'ext': 'mp4', + 'display_id': 'Cicatanc', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': '', + 'uploader': 'cukiajanlo', + 'uploader_id': '83729', + 'duration': 72, + 'age_limit': 0, + 'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'] + }, + }, + { + 'url': 'http://indavideo.hu/video/Vicces_cica_1', + 'md5': '8c82244ba85d2a2310275b318eb51eac', + 'info_dict': { + 'id': '1335611', + 'title': 'Vicces cica', + 'ext': 'mp4', + 'display_id': 'Vicces_cica_1', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Játszik a tablettel. :D', + 'uploader': 'Jet_Pack', + 'uploader_id': '491217', + 'duration': 7, + 'age_limit': 0, + 'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'], + }, + }, + ] + + def _real_extract(self, url): + video_disp_id = self._match_id(url) + webpage = self._download_webpage(url, video_disp_id) + + embed_url = self._html_search_regex(r'', webpage, 'embed_url') + video_hash = embed_url.split('/')[-1] + + payload = self._download_json('http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/' + video_hash, video_disp_id) + video_info = payload['data'] + + thumbnails = video_info.get('thumbnails') + if thumbnails: + thumbnails = [{'url': self._proto_relative_url(x)} for x in thumbnails] + + tags = video_info.get('tags') + if tags: + tags = [x['title'] for x in tags] + + return { + 'id': video_info.get('id'), + 'title': video_info['title'], + 'url': video_info['video_file'], + 'ext': 'mp4', + 'display_id': video_disp_id, + 'thumbnails': thumbnails, + 'description': video_info.get('description'), + 'uploader': video_info.get('user_name'), + # TODO: upload date (it's in CET/CEST) + 'uploader_id': video_info.get('user_id'), + 'duration': utils.int_or_none(video_info.get('length')), + 'age_limit': utils.int_or_none(video_info.get('age_limit')), + 'tags': tags, + } From 3c12a027d48a2d6d1162ab515df0308237aef881 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 23:25:47 +0600 Subject: [PATCH 1631/2721] [indavideo] Split in two extractors, extract all formats and fix timestamp --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/indavideo.py | 178 +++++++++++++++++++----------- 2 files changed, 118 insertions(+), 65 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3bcfa93bb..83d21bd15 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -242,7 +242,10 @@ from .imdb import ( ) from .imgur import ImgurIE from .ina import InaIE -from .indavideo import IndavideoIE +from .indavideo import ( + IndavideoIE, + IndavideoEmbedIE, +) from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE from .internetvideoarchive import InternetVideoArchiveIE diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py index 2a2cf2bd3..b75715244 100644 --- a/youtube_dl/extractor/indavideo.py +++ b/youtube_dl/extractor/indavideo.py @@ -3,77 +3,127 @@ from __future__ import unicode_literals from .. import utils from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_age_limit, + parse_iso8601, +) + + +class IndavideoEmbedIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P[\da-f]+)' + _TESTS = [{ + 'url': 'http://indavideo.hu/player/video/1bdc3c6d80/', + 'md5': 'f79b009c66194acacd40712a6778acfa', + 'info_dict': { + 'id': '1837039', + 'ext': 'mp4', + 'title': 'Cicatánc', + 'description': '', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'cukiajanlo', + 'uploader_id': '83729', + 'timestamp': 1439193826, + 'upload_date': '20150810', + 'duration': 72, + 'age_limit': 0, + 'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'], + }, + }, { + 'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1', + 'only_matching': True, + }, { + 'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id, + video_id)['data'] + + video_id = video['id'] + title = video['title'] + + video_urls = video.get('video_files', []) + video_file = video.get('video_file') + if video: + video_urls.append(video_file) + video_urls = list(set(video_urls)) + + video_prefix = video_urls[0].rsplit('/', 1)[0] + + for flv_file in video.get('flv_files', []): + flv_url = '%s/%s' % (video_prefix, flv_file) + if flv_url not in video_urls: + video_urls.append(flv_url) + + formats = [{ + 'url': video_url, + 'height': self._search_regex(r'\.(\d{3,4})\.mp4$', video_url, 'height', default=None), + } for video_url in video_urls] + self._sort_formats(formats) + + timestamp = video.get('date') + if timestamp: + # upload date is in CEST + timestamp = parse_iso8601(timestamp + ' +0200', ' ') + + thumbnails = [{ + 'url': self._proto_relative_url(thumbnail) + } for thumbnail in video.get('thumbnails', [])] + + tags = [tag['title'] for tag in video.get('tags', [])] + + return { + 'id': video_id, + 'title': title, + 'description': video.get('description'), + 'thumbnails': thumbnails, + 'uploader': video.get('user_name'), + 'uploader_id': video.get('user_id'), + 'timestamp': timestamp, + 'duration': int_or_none(video.get('length')), + 'age_limit': parse_age_limit(video.get('age_limit')), + 'tags': tags, + 'formats': formats, + } class IndavideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?indavideo\.hu/video/(?P.+)' - _TESTS = [ - { - 'url': 'http://indavideo.hu/video/Cicatanc', - 'md5': 'c8a507a1c7410685f83a06eaeeaafeab', - 'info_dict': { - 'id': '1837039', - 'title': 'Cicatánc', - 'ext': 'mp4', - 'display_id': 'Cicatanc', - 'thumbnail': 're:^https?://.*\.jpg$', - 'description': '', - 'uploader': 'cukiajanlo', - 'uploader_id': '83729', - 'duration': 72, - 'age_limit': 0, - 'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'] - }, + _VALID_URL = r'https?://(?:www\.)?indavideo\.hu/video/(?P[^/#?]+)' + _TEST = { + 'url': 'http://indavideo.hu/video/Vicces_cica_1', + 'md5': '8c82244ba85d2a2310275b318eb51eac', + 'info_dict': { + 'id': '1335611', + 'display_id': 'Vicces_cica_1', + 'ext': 'mp4', + 'title': 'Vicces cica', + 'description': 'Játszik a tablettel. :D', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'Jet_Pack', + 'uploader_id': '491217', + 'timestamp': 1390821212, + 'upload_date': '20140127', + 'duration': 7, + 'age_limit': 0, + 'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'], }, - { - 'url': 'http://indavideo.hu/video/Vicces_cica_1', - 'md5': '8c82244ba85d2a2310275b318eb51eac', - 'info_dict': { - 'id': '1335611', - 'title': 'Vicces cica', - 'ext': 'mp4', - 'display_id': 'Vicces_cica_1', - 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'Játszik a tablettel. :D', - 'uploader': 'Jet_Pack', - 'uploader_id': '491217', - 'duration': 7, - 'age_limit': 0, - 'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'], - }, - }, - ] + } def _real_extract(self, url): - video_disp_id = self._match_id(url) - webpage = self._download_webpage(url, video_disp_id) + display_id = self._match_id(url) - embed_url = self._html_search_regex(r'', webpage, 'embed_url') - video_hash = embed_url.split('/')[-1] - - payload = self._download_json('http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/' + video_hash, video_disp_id) - video_info = payload['data'] - - thumbnails = video_info.get('thumbnails') - if thumbnails: - thumbnails = [{'url': self._proto_relative_url(x)} for x in thumbnails] - - tags = video_info.get('tags') - if tags: - tags = [x['title'] for x in tags] + webpage = self._download_webpage(url, display_id) + embed_url = self._search_regex( + r']+rel="video_src"[^>]+href="(.+?)"', webpage, 'embed url') return { - 'id': video_info.get('id'), - 'title': video_info['title'], - 'url': video_info['video_file'], - 'ext': 'mp4', - 'display_id': video_disp_id, - 'thumbnails': thumbnails, - 'description': video_info.get('description'), - 'uploader': video_info.get('user_name'), - # TODO: upload date (it's in CET/CEST) - 'uploader_id': video_info.get('user_id'), - 'duration': utils.int_or_none(video_info.get('length')), - 'age_limit': utils.int_or_none(video_info.get('age_limit')), - 'tags': tags, + '_type': 'url_transparent', + 'ie_key': 'IndavideoEmbed', + 'url': embed_url, + 'display_id': display_id, } From a34e19629c407a08cd9065223f26f1f5468a4423 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 23:40:20 +0600 Subject: [PATCH 1632/2721] [indavideo] Relax _VALID_URL to match subdomains and add tests --- youtube_dl/extractor/indavideo.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py index b75715244..550a7001b 100644 --- a/youtube_dl/extractor/indavideo.py +++ b/youtube_dl/extractor/indavideo.py @@ -44,7 +44,6 @@ class IndavideoEmbedIE(InfoExtractor): 'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id, video_id)['data'] - video_id = video['id'] title = video['title'] video_urls = video.get('video_files', []) @@ -78,7 +77,7 @@ class IndavideoEmbedIE(InfoExtractor): tags = [tag['title'] for tag in video.get('tags', [])] return { - 'id': video_id, + 'id': video.get('id') or video_id, 'title': title, 'description': video.get('description'), 'thumbnails': thumbnails, @@ -93,8 +92,8 @@ class IndavideoEmbedIE(InfoExtractor): class IndavideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?indavideo\.hu/video/(?P[^/#?]+)' - _TEST = { + _VALID_URL = r'https?://(?:.+?\.)?indavideo\.hu/video/(?P[^/#?]+)' + _TESTS = [{ 'url': 'http://indavideo.hu/video/Vicces_cica_1', 'md5': '8c82244ba85d2a2310275b318eb51eac', 'info_dict': { @@ -112,7 +111,22 @@ class IndavideoIE(InfoExtractor): 'age_limit': 0, 'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'], }, - } + }, { + 'url': 'http://index.indavideo.hu/video/2015_0728_beregszasz', + 'only_matching': True, + }, { + 'url': 'http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko', + 'only_matching': True, + }, { + 'url': 'http://erotika.indavideo.hu/video/Amator_tini_punci', + 'only_matching': True, + }, { + 'url': 'http://film.indavideo.hu/video/f_hrom_nagymamm_volt', + 'only_matching': True, + }, { + 'url': 'http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) From fb56131dd9cf3bfa31d7d6920a135281d151f803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 23:47:12 +0600 Subject: [PATCH 1633/2721] Credit @nyuszika7h for indavideo (#6517) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 71c420165..ded9e87d2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -138,3 +138,4 @@ Tjark Saul slangangular Behrouz Abbasi ngld +nyuszika7h From 594f51b85934878ff20b608f312d7f564e3a3d71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 13 Aug 2015 23:47:49 +0600 Subject: [PATCH 1634/2721] [indavideo] Remove unused import --- youtube_dl/extractor/indavideo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py index 550a7001b..12fb5e8e1 100644 --- a/youtube_dl/extractor/indavideo.py +++ b/youtube_dl/extractor/indavideo.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -from .. import utils from .common import InfoExtractor from ..utils import ( int_or_none, From 3cafca04aaf2bfc4d31e8255b9cb75e8f1ad4b16 Mon Sep 17 00:00:00 2001 From: reddraggone9 Date: Fri, 14 Aug 2015 00:35:35 -0500 Subject: [PATCH 1635/2721] Updated line numbers in the fragment portion of README links. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 15baf75ce..e91119d84 100644 --- a/README.md +++ b/README.md @@ -544,7 +544,7 @@ If you want to add support for a new site, you can follow this quick list (assum ``` 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. -7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want. +7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62). Add tests and code for as many as you want. 8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8). 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: @@ -572,7 +572,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L69). For a start, if you want to intercept youtube-dl's output, set a `logger` object. +Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L92). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: From 4d2ad866f347086d3a1cf4cb7e0a8cadd3c87748 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 14 Aug 2015 19:18:03 +0800 Subject: [PATCH 1636/2721] [README.md] Document format_id field in output template section (#6557) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 15baf75ce..8fa402ee2 100644 --- a/README.md +++ b/README.md @@ -272,6 +272,7 @@ The `-o` option allows users to indicate a template for the output file names. T - `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero. - `playlist`: The name or the id of the playlist that contains the video. - `playlist_index`: The index of the video in the playlist, a five-digit number. + - `format_id`: The sequence will be replaced by the format code specified by `--format`. The current default template is `%(title)s-%(id)s.%(ext)s`. From 41dbc50f9c7dfaad4084fbeac77192c7ac37daca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 14 Aug 2015 22:07:02 +0600 Subject: [PATCH 1637/2721] [lynda] Capture and output login error (Closes #6556) --- youtube_dl/extractor/lynda.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index deead220a..5b9157ed4 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -11,6 +11,7 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + clean_html, int_or_none, ) @@ -70,6 +71,15 @@ class LyndaBaseIE(InfoExtractor): 'Confirming log in and log out from another device') if all(not re.search(p, login_page) for p in ('isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): + if 'login error' in login_page: + mobj = re.search( + r'(?s)]+class="topmost">(?P[^<]+)</h1>\s*<div>(?P<description>.+?)</div>', + login_page) + if mobj: + raise ExtractorError( + 'lynda returned error: %s - %s' + % (mobj.group('title'), clean_html(mobj.group('description'))), + expected=True) raise ExtractorError('Unable to log in') From 6be5e46994ea5db76d7a2659260606898c265957 Mon Sep 17 00:00:00 2001 From: "Sergey M." <dstftw@gmail.com> Date: Fri, 14 Aug 2015 22:22:39 +0600 Subject: [PATCH 1638/2721] [README.md] Clarify line ranges --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e52cdb941..542a7c26a 100644 --- a/README.md +++ b/README.md @@ -545,7 +545,7 @@ If you want to add support for a new site, you can follow this quick list (assum ``` 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. -7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62). Add tests and code for as many as you want. +7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want. 8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8). 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: @@ -573,7 +573,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L92). For a start, if you want to intercept youtube-dl's output, set a `logger` object. +Most likely, you'll want to use various options. For a list of what can be done, have a look at [youtube_dl/YoutubeDL.py](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L117-L265). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: From d0d6c097fc7859180f16a445536029c600b1e57f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 15 Aug 2015 15:17:27 +0800 Subject: [PATCH 1639/2721] [moniker] Support embed- URLs (#6450) --- youtube_dl/extractor/moniker.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index 88dcd4f73..69e4bcd1a 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -9,7 +9,10 @@ from ..compat import ( compat_urllib_parse, compat_urllib_request, ) -from ..utils import ExtractorError +from ..utils import ( + ExtractorError, + remove_start, +) class MonikerIE(InfoExtractor): @@ -24,6 +27,14 @@ class MonikerIE(InfoExtractor): 'ext': 'mp4', 'title': 'youtube-dl test video', }, + }, { + 'url': 'http://allmyvideos.net/embed-jih3nce3x6wn', + 'md5': '710883dee1bfc370ecf9fa6a89307c88', + 'info_dict': { + 'id': 'jih3nce3x6wn', + 'ext': 'mp4', + 'title': 'youtube-dl test video', + }, }, { 'url': 'http://vidspot.net/l2ngsmhs8ci5', 'md5': '710883dee1bfc370ecf9fa6a89307c88', @@ -38,7 +49,10 @@ class MonikerIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) + orig_video_id = self._match_id(url) + video_id = remove_start(orig_video_id, 'embed-') + url = url.replace(orig_video_id, video_id) + assert re.match(self._VALID_URL, url) is not None orig_webpage = self._download_webpage(url, video_id) if '>File Not Found<' in orig_webpage: From 8b8c1093b65ee02aad859ed8d82217312ed0d9d8 Mon Sep 17 00:00:00 2001 From: Shaun Walbridge <shaun.walbridge@gmail.com> Date: Sat, 18 Apr 2015 00:37:04 -0400 Subject: [PATCH 1640/2721] [EsriVideo] Add new extractor Add extractor for [videos.esri.com](https://videos.esri.com), a collection of videos relating to GIS. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/videoesri.py | 90 +++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 youtube_dl/extractor/videoesri.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 83d21bd15..a4387636f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -695,6 +695,7 @@ from .vice import ViceIE from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE +from .videoesri import VideoEsriIE from .videolecturesnet import VideoLecturesNetIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE diff --git a/youtube_dl/extractor/videoesri.py b/youtube_dl/extractor/videoesri.py new file mode 100644 index 000000000..0f84323a4 --- /dev/null +++ b/youtube_dl/extractor/videoesri.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import os +import re + +from .common import InfoExtractor + +from ..utils import ( + unified_strdate +) + + +class VideoEsriIE(InfoExtractor): + _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://video.esri.com/watch/4228', + 'md5': '170b4d513c2466ed483c150a48384133', + 'info_dict': { + 'id': '4228', + 'ext': 'mp4', + 'title': 'AppStudio for ArcGIS', + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20150310', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title') + + upload_date_raw = self._search_regex( + r'http-equiv="last-modified" content="(.*)"', + webpage, 'upload date') + upload_date = unified_strdate(upload_date_raw) + + settings_info = self._search_regex( + r'evPlayerSettings = {(.*?);\s*$', + webpage, 'settings info', flags=re.MULTILINE | re.DOTALL) + + # thumbnail includes '_x' for large, also has {_m,_t,_s} or + # without size suffix returns full image + thumbnail_path = re.findall( + r'image\': \'(\/thumbs.*)\'', + settings_info)[0] + + if thumbnail_path: + thumbnail = '/'.join(['http://video.esri.com', thumbnail_path]) + + # note that this misses the (exceedly rare) webm files + video_paths = re.findall(r'mp4:(.*)\'', settings_info) + + # find possible http servers of the mp4 files (also has rtsp) + base_url = re.findall( + r'netstreambasepath\':\s\'(h.*)\'', settings_info)[0] + + # these are the numbers used internally, but really map + # to other resolutions, e.g. 960 is 720p. + heights = [480, 720, 960] + videos_by_res = {} + for video_path in video_paths: + url = "{base_url}{video_path}".format( + base_url=base_url, + video_path=video_path) + filename, ext = os.path.splitext(video_path) + height_label = int(filename.split('_')[1]) + videos_by_res[height_label] = { + 'url': url, + 'ext': ext[1:], + 'protocol': 'http', # http-only supported currently + } + + formats = [] + for height in heights: + if height in videos_by_res: + formats.append(videos_by_res[height]) + + result = { + 'id': video_id, + 'title': title, + 'upload_date': upload_date, + 'formats': formats, + } + + if thumbnail: + result['thumbnail'] = thumbnail + + return result From 8b9848ac5678356757f67a412f7ed89a0f559be7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 15:58:30 +0600 Subject: [PATCH 1641/2721] [extractor/common] Expand meta regex --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5982055be..16ae4b98f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -640,7 +640,7 @@ class InfoExtractor(object): @staticmethod def _meta_regex(prop): return r'''(?isx)<meta - (?=[^>]+(?:itemprop|name|property|id)=(["\']?)%s\1) + (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1) [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop) def _og_search_property(self, prop, html, name=None, **kargs): From 3aa697f993e3719cf032c5b1e192a034100b0534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 15:58:56 +0600 Subject: [PATCH 1642/2721] [esri:video] Extract all formats and simplify --- youtube_dl/extractor/videoesri.py | 106 +++++++++++++----------------- 1 file changed, 45 insertions(+), 61 deletions(-) diff --git a/youtube_dl/extractor/videoesri.py b/youtube_dl/extractor/videoesri.py index 0f84323a4..84faba678 100644 --- a/youtube_dl/extractor/videoesri.py +++ b/youtube_dl/extractor/videoesri.py @@ -1,90 +1,74 @@ # coding: utf-8 from __future__ import unicode_literals -import os import re from .common import InfoExtractor - +from ..compat import compat_urlparse from ..utils import ( - unified_strdate + int_or_none, + parse_filesize, + unified_strdate, ) class VideoEsriIE(InfoExtractor): _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)' _TEST = { - 'url': 'https://video.esri.com/watch/4228', - 'md5': '170b4d513c2466ed483c150a48384133', + 'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications', + 'md5': 'd4aaf1408b221f1b38227a9bbaeb95bc', 'info_dict': { - 'id': '4228', + 'id': '1124', 'ext': 'mp4', - 'title': 'AppStudio for ArcGIS', + 'title': 'ArcGIS Online - Developing Applications', + 'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.', 'thumbnail': 're:^https?://.*\.jpg$', - 'upload_date': '20150310', + 'duration': 185, + 'upload_date': '20120419', } } def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title') - - upload_date_raw = self._search_regex( - r'http-equiv="last-modified" content="(.*)"', - webpage, 'upload date') - upload_date = unified_strdate(upload_date_raw) - - settings_info = self._search_regex( - r'evPlayerSettings = {(.*?);\s*$', - webpage, 'settings info', flags=re.MULTILINE | re.DOTALL) - - # thumbnail includes '_x' for large, also has {_m,_t,_s} or - # without size suffix returns full image - thumbnail_path = re.findall( - r'image\': \'(\/thumbs.*)\'', - settings_info)[0] - - if thumbnail_path: - thumbnail = '/'.join(['http://video.esri.com', thumbnail_path]) - - # note that this misses the (exceedly rare) webm files - video_paths = re.findall(r'mp4:(.*)\'', settings_info) - - # find possible http servers of the mp4 files (also has rtsp) - base_url = re.findall( - r'netstreambasepath\':\s\'(h.*)\'', settings_info)[0] - - # these are the numbers used internally, but really map - # to other resolutions, e.g. 960 is 720p. - heights = [480, 720, 960] - videos_by_res = {} - for video_path in video_paths: - url = "{base_url}{video_path}".format( - base_url=base_url, - video_path=video_path) - filename, ext = os.path.splitext(video_path) - height_label = int(filename.split('_')[1]) - videos_by_res[height_label] = { - 'url': url, - 'ext': ext[1:], - 'protocol': 'http', # http-only supported currently - } - formats = [] - for height in heights: - if height in videos_by_res: - formats.append(videos_by_res[height]) + for width, height, content in re.findall( + r'(?s)<li><strong>(\d+)x(\d+):</strong>(.+?)</li>', webpage): + for video_url, ext, filesize in re.findall( + r'<a[^>]+href="([^"]+)">([^<]+) \(([^<]+)\)</a>', content): + formats.append({ + 'url': compat_urlparse.urljoin(url, video_url), + 'ext': ext.lower(), + 'format_id': '%s-%s' % (ext.lower(), height), + 'width': int(width), + 'height': int(height), + 'filesize_approx': parse_filesize(filesize), + }) + self._sort_formats(formats) - result = { + title = self._html_search_meta('title', webpage, 'title') + description = self._html_search_meta( + 'description', webpage, 'description', fatal=False) + + thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail', fatal=False) + if thumbnail: + thumbnail = re.sub(r'_[st]\.jpg$', '_x.jpg', thumbnail) + + duration = int_or_none(self._search_regex( + [r'var\s+videoSeconds\s*=\s*(\d+)', r"'duration'\s*:\s*(\d+)"], + webpage, 'duration', fatal=False)) + + upload_date = unified_strdate(self._html_search_meta( + 'last-modified', webpage, 'upload date', fatal=None)) + + return { 'id': video_id, 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, 'upload_date': upload_date, - 'formats': formats, + 'formats': formats } - - if thumbnail: - result['thumbnail'] = thumbnail - - return result From 9c21f229236c77a8865c857b43c6cbd95dcc6f23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 15:59:35 +0600 Subject: [PATCH 1643/2721] [esri:video] Rename extractor --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/{videoesri.py => esri.py} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename youtube_dl/extractor/{videoesri.py => esri.py} (98%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a4387636f..760f73918 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -695,7 +695,7 @@ from .vice import ViceIE from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE -from .videoesri import VideoEsriIE +from .esri import EsriVideoIE from .videolecturesnet import VideoLecturesNetIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE diff --git a/youtube_dl/extractor/videoesri.py b/youtube_dl/extractor/esri.py similarity index 98% rename from youtube_dl/extractor/videoesri.py rename to youtube_dl/extractor/esri.py index 84faba678..bf5d2019f 100644 --- a/youtube_dl/extractor/videoesri.py +++ b/youtube_dl/extractor/esri.py @@ -12,7 +12,7 @@ from ..utils import ( ) -class VideoEsriIE(InfoExtractor): +class EsriVideoIE(InfoExtractor): _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)' _TEST = { 'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications', From 1d25e9d173931da0d2cb65b114f44bbf24184f6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 16:00:24 +0600 Subject: [PATCH 1644/2721] [extractor/__init__] Fix order --- youtube_dl/extractor/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 760f73918..a8be63624 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -158,6 +158,7 @@ from .eporner import EpornerIE from .eroprofile import EroProfileIE from .escapist import EscapistIE from .espn import ESPNIE +from .esri import EsriVideoIE from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE from .expotv import ExpoTVIE @@ -695,7 +696,6 @@ from .vice import ViceIE from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE -from .esri import EsriVideoIE from .videolecturesnet import VideoLecturesNetIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE From fab83e24567226fa70e7f5076d961b83239ccfbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 16:10:20 +0600 Subject: [PATCH 1645/2721] Credit @scw for video.esri.com (#5459) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index ded9e87d2..d1693224e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -139,3 +139,4 @@ slangangular Behrouz Abbasi ngld nyuszika7h +Shaun Walbridge From c576ef1e7cfd31ca94ca6025c054b3ae4f611b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 18:13:37 +0600 Subject: [PATCH 1646/2721] [shahid] Improve and simplify --- youtube_dl/extractor/shahid.py | 140 ++++++++++++++++++--------------- 1 file changed, 78 insertions(+), 62 deletions(-) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index 399140189..6e9903d5e 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -2,90 +2,106 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urllib_parse from ..utils import ( - js_to_json, ExtractorError, - int_or_none + int_or_none, + parse_iso8601, ) class ShahidIE(InfoExtractor): _VALID_URL = r'https?://shahid\.mbc\.net/ar/episode/(?P<id>\d+)/?' - _TESTS = [ - { - 'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html', - 'info_dict': { - 'id': '90574', - 'ext': 'm3u8', - 'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3', - 'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان', - 'duration': 2972, - }, - 'params': { - # m3u8 download - 'skip_download': True, - } + _TESTS = [{ + 'url': 'https://shahid.mbc.net/ar/episode/90574/%D8%A7%D9%84%D9%85%D9%84%D9%83-%D8%B9%D8%A8%D8%AF%D8%A7%D9%84%D9%84%D9%87-%D8%A7%D9%84%D8%A5%D9%86%D8%B3%D8%A7%D9%86-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-3.html', + 'info_dict': { + 'id': '90574', + 'ext': 'm3u8', + 'title': 'الملك عبدالله الإنسان الموسم 1 كليب 3', + 'description': 'الفيلم الوثائقي - الملك عبد الله الإنسان', + 'duration': 2972, + 'timestamp': 1422057420, + 'upload_date': '20150123', }, - { - # shahid plus subscriber only - 'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', - 'only_matching': True + 'params': { + # m3u8 download + 'skip_download': True, } - ] + }, { + # shahid plus subscriber only + 'url': 'https://shahid.mbc.net/ar/episode/90511/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1.html', + 'only_matching': True + }] - _api_vars = { - 'type': 'player', - 'url': 'http://api.shahid.net/api/v1_1', - 'playerType': 'episode', - } + def _handle_error(self, response): + if not isinstance(response, dict): + return + error = response.get('error') + if error: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), + expected=True) + + def _download_json(self, url, video_id, note='Downloading JSON metadata'): + response = super(ShahidIE, self)._download_json(url, video_id, note)['data'] + self._handle_error(response) + return response def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - player_info = '' - flash_vars = self._search_regex('var flashvars = ({[^}]+})', webpage, 'flashvars', None) - if flash_vars is not None: - for line in flash_vars.splitlines(): - if '+' not in line and '(' not in line and ')' not in line: - player_info += line - player_info = self._parse_json(player_info, video_id, js_to_json, False) - if player_info is not None: - for key in self._api_vars: - if key in player_info: - self._api_vars[key] = player_info[key] + api_vars = { + 'id': video_id, + 'type': 'player', + 'url': 'http://api.shahid.net/api/v1_1', + 'playerType': 'episode', + } - player_json_data = self._download_json( - 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-' + video_id + '.type-' + self._api_vars['type'] + '.html', - video_id - )['data'] - if 'url' in player_json_data: - m3u8_url = player_json_data['url'] - else: - for error in player_json_data['error'].values(): - raise ExtractorError(error) - formats = self._extract_m3u8_formats(m3u8_url, video_id) + flashvars = self._search_regex( + r'var\s+flashvars\s*=\s*({[^}]+})', webpage, 'flashvars', default=None) + if flashvars: + for key in api_vars.keys(): + value = self._search_regex( + r'\b%s\s*:\s*(?P<q>["\'])(?P<value>.+?)(?P=q)' % key, + flashvars, 'type', default=None, group='value') + if value: + api_vars[key] = value - video_info = self._download_json( - self._api_vars['url'] + '/' + self._api_vars['playerType'] + '/' + video_id + '?apiKey=sh%40hid0nlin3&hash=b2wMCTHpSmyxGqQjJFOycRmLSex%2BBpTK%2Fooxy6vHaqs%3D', - video_id - )['data'] - if video_info.get('error'): - for error in video_info['error']: - raise ExtractorError(error) - video_info = video_info[self._api_vars['playerType']] - title = video_info['title'] - thumbnail = video_info.get('thumbnailUrl') - categories = [category['name'] for category in video_info.get('genres')] - description = video_info.get('description') - duration = int_or_none(video_info.get('duration')) + player = self._download_json( + 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html' + % (video_id, api_vars['type']), video_id, 'Downloading player JSON') + + formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') + + video = self._download_json( + '%s/%s/%s?%s' % ( + api_vars['url'], api_vars['playerType'], api_vars['id'], + compat_urllib_parse.urlencode({ + 'apiKey': 'sh@hid0nlin3', + 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', + }).encode('utf-8')), + video_id, 'Downloading video JSON') + + video = video[api_vars['playerType']] + + title = video['title'] + description = video.get('description') + thumbnail = video.get('thumbnailUrl') + duration = int_or_none(video.get('duration')) + timestamp = parse_iso8601(video.get('referenceDate')) + categories = [ + category['name'] + for category in video.get('genres', []) if 'name' in category] return { 'id': video_id, 'title': title, - 'thumbnail': thumbnail, - 'categories': categories, 'description': description, + 'thumbnail': thumbnail, 'duration': duration, + 'timestamp': timestamp, + 'categories': categories, 'formats': formats, } From 9303ce3e6969b5818982d6214a8d0ff4e3c95286 Mon Sep 17 00:00:00 2001 From: reddraggone9 <cljenkins9@gmail.com> Date: Thu, 13 Aug 2015 22:11:11 -0500 Subject: [PATCH 1647/2721] [youtube] Fix two-factor authentication --- youtube_dl/extractor/youtube.py | 40 +++++++++++++++------------------ 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index facd837ad..bfa9a12a8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -46,7 +46,7 @@ from ..utils import ( class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' - _TWOFACTOR_URL = 'https://accounts.google.com/SecondFactor' + _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge' _NETRC_MACHINE = 'youtube' # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False @@ -128,7 +128,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # Two-Factor # TODO add SMS and phone call support - these require making a request and then prompting the user - if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', login_results) is not None: + if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None: tfa_code = self._get_tfa_info() if tfa_code is None: @@ -136,31 +136,27 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)') return False - # Unlike the first login form, secTok and timeStmp are both required for the TFA form + def find_value(element_id): + match = re.search(r'id="%s"\s+value="(.+?)">' % element_id, login_results, re.M | re.U) + if match is None: + self._downloader.report_warning('Failed to get %s - did the page structure change?' % id) + return match.group(1) - match = re.search(r'id="secTok"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U) - if match is None: - self._downloader.report_warning('Failed to get secTok - did the page structure change?') - secTok = match.group(1) - match = re.search(r'id="timeStmp"\n\s+value=\'(.+)\'/>', login_results, re.M | re.U) - if match is None: - self._downloader.report_warning('Failed to get timeStmp - did the page structure change?') - timeStmp = match.group(1) + challengeId = find_value('challengeId') + challengeType = find_value('challengeType') + gxf = find_value('gxf') tfa_form_strs = { + 'challengeId': challengeId, + 'challengeType': challengeType, # This doesn't appear to change 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', - 'smsToken': '', - 'smsUserPin': tfa_code, - 'smsVerifyPin': 'Verify', - - 'PersistentCookie': 'yes', - 'checkConnection': '', - 'checkedDomains': 'youtube', - 'pstMsg': '1', - 'secTok': secTok, - 'timeStmp': timeStmp, 'service': 'youtube', 'hl': 'en_US', + 'checkedDomains': 'youtube', + 'pstMsg': '0', + 'gxf': gxf, + 'Pin': tfa_code, + 'TrustDevice': 'on', } tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items()) tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii') @@ -173,7 +169,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if tfa_results is False: return False - if re.search(r'(?i)<form[^>]* id="gaia_secondfactorform"', tfa_results) is not None: + if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None: self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.') return False if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None: From 201ea3ee8e392d6c82bb8137b80b4328db40a399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 21:52:22 +0600 Subject: [PATCH 1648/2721] [extractor/common] Improve _hidden_inputs --- youtube_dl/extractor/common.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 16ae4b98f..e2ace827f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -724,16 +724,18 @@ class InfoExtractor(object): @staticmethod def _hidden_inputs(html): - return dict([ - (input.group('name'), input.group('value')) for input in re.finditer( - r'''(?x) - <input\s+ - type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+ - name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+ - (?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)? - value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value) - ''', html) - ]) + hidden_inputs = {} + for input in re.findall(r'<input([^>]+)>', html): + if not re.search(r'type=(["\'])hidden\1', input): + continue + name = re.search(r'name=(["\'])(?P<value>.+?)\1', input) + if not name: + continue + value = re.search(r'value=(["\'])(?P<value>.*?)\1', input) + if not value: + continue + hidden_inputs[name.group('value')] = value.group('value') + return hidden_inputs def _form_hidden_inputs(self, form_id, html): form = self._search_regex( From e64b756943440d602dc757f81787cad6aee8f412 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 21:55:07 +0600 Subject: [PATCH 1649/2721] [extractor/common] Interactive TFA code input --- youtube_dl/extractor/common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e2ace827f..65835d257 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -15,6 +15,7 @@ import xml.etree.ElementTree from ..compat import ( compat_cookiejar, compat_cookies, + compat_getpass, compat_HTTPError, compat_http_client, compat_urllib_error, @@ -610,7 +611,7 @@ class InfoExtractor(object): return (username, password) - def _get_tfa_info(self): + def _get_tfa_info(self, note='two-factor verification code'): """ Get the two-factor authentication info TODO - asking the user will be required for sms/phone verify @@ -624,7 +625,7 @@ class InfoExtractor(object): if downloader_params.get('twofactor', None) is not None: return downloader_params['twofactor'] - return None + return compat_getpass('Type %s and press [Return]: ' % note) # Helper functions for extracting OpenGraph info @staticmethod From 041bc3adc55bbe81649a1c5d283302e5a120659e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 15 Aug 2015 22:03:43 +0600 Subject: [PATCH 1650/2721] [youtube] Simplify two-factor authentication --- youtube_dl/extractor/youtube.py | 35 +++++++++++---------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index bfa9a12a8..887c46d95 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -33,6 +33,7 @@ from ..utils import ( int_or_none, orderedSet, parse_duration, + remove_start, smuggle_url, str_to_int, unescapeHTML, @@ -129,35 +130,23 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # TODO add SMS and phone call support - these require making a request and then prompting the user if re.search(r'(?i)<form[^>]* id="challenge"', login_results) is not None: - tfa_code = self._get_tfa_info() + tfa_code = self._get_tfa_info('2-step verification code') - if tfa_code is None: - self._downloader.report_warning('Two-factor authentication required. Provide it with --twofactor <code>') - self._downloader.report_warning('(Note that only TOTP (Google Authenticator App) codes work at this time.)') + if not tfa_code: + self._downloader.report_warning( + 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>' + '(Note that only TOTP (Google Authenticator App) codes work at this time.)') return False - def find_value(element_id): - match = re.search(r'id="%s"\s+value="(.+?)">' % element_id, login_results, re.M | re.U) - if match is None: - self._downloader.report_warning('Failed to get %s - did the page structure change?' % id) - return match.group(1) + tfa_code = remove_start(tfa_code, 'G-') - challengeId = find_value('challengeId') - challengeType = find_value('challengeType') - gxf = find_value('gxf') + tfa_form_strs = self._form_hidden_inputs('challenge', login_results) - tfa_form_strs = { - 'challengeId': challengeId, - 'challengeType': challengeType, # This doesn't appear to change - 'continue': 'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', - 'service': 'youtube', - 'hl': 'en_US', - 'checkedDomains': 'youtube', - 'pstMsg': '0', - 'gxf': gxf, + tfa_form_strs.update({ 'Pin': tfa_code, 'TrustDevice': 'on', - } + }) + tfa_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in tfa_form_strs.items()) tfa_data = compat_urllib_parse.urlencode(tfa_form).encode('ascii') @@ -170,7 +159,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return False if re.search(r'(?i)<form[^>]* id="challenge"', tfa_results) is not None: - self._downloader.report_warning('Two-factor code expired. Please try again, or use a one-use backup code instead.') + self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.') return False if re.search(r'(?i)<form[^>]* id="gaia_loginform"', tfa_results) is not None: self._downloader.report_warning('unable to log in - did the page structure change?') From eaa5646483d22d3b658dcf63b61e6c3b67aa5bc5 Mon Sep 17 00:00:00 2001 From: "Sergey M." <dstftw@gmail.com> Date: Sat, 15 Aug 2015 22:57:06 +0600 Subject: [PATCH 1651/2721] [README.md] Clarify configuration file usage (Closes #6530) --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 542a7c26a..25844eb6d 100644 --- a/README.md +++ b/README.md @@ -236,7 +236,14 @@ which means you can modify it, redistribute it or use it however you like. # CONFIGURATION -You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.config/youtube-dl/config`. On Windows, the configuration file locations are `%APPDATA%\youtube-dl\config.txt` and `C:\Users\<user name>\youtube-dl.conf`. +You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux, system wide configuration file is located at `/etc/youtube-dl.conf` and user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. For example, with the following configration file youtube-dl will always extract the audio, not copy the mtime and use proxy: +``` +--extract-audio +--no-mtime +--proxy 127.0.0.1:3128 +``` + +You can use `--ignore-config` if you want to disable configuration file for a particular youtube-dl run. ### Authentication with `.netrc` file ### From 221a59fe6fe22ec286830319e72cbabdc83fd02f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Caletka?= <ondrej@caletka.cz> Date: Sun, 9 Aug 2015 12:27:31 +0200 Subject: [PATCH 1652/2721] [playtvak] Initial support for videos from Mafra group servers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support for videos in articles from idnes.cz, lidovky.cz, metro.cz, as well as video and live streams from playtvak.cz Signed-off-by: Ondřej Caletka <ondrej@caletka.cz> --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/playtvak.py | 127 +++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 youtube_dl/extractor/playtvak.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 83d21bd15..5307240f8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -448,6 +448,7 @@ from .planetaplay import PlanetaPlayIE from .pladform import PladformIE from .played import PlayedIE from .playfm import PlayFMIE +from .playtvak import PlaytvakIE from .playvid import PlayvidIE from .playwire import PlaywireIE from .podomatic import PodomaticIE diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dl/extractor/playtvak.py new file mode 100644 index 000000000..6dff6650c --- /dev/null +++ b/youtube_dl/extractor/playtvak.py @@ -0,0 +1,127 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urlparse, + compat_urllib_parse, +) +from ..utils import ExtractorError + + +def _extract_json(code): + return re.sub( + r'(?s)^VideoPlayer.data\("", ({.*})\);?\s*?(?://[^\n]*)*$', r'\1', code) + + +class PlaytvakIE(InfoExtractor): + _VALID_URL = r'https?://.*?(playtvak|idnes|lidovky|metro)\.cz/.*\?c=(?P<id>[A-Z][0-9]{6}_[0-9]{6}_.*)' + _TESTS = [{ + 'url': 'http://www.playtvak.cz/vyzente-vosy-a-srsne-ze-zahrady-dn5-/hodinovy-manzel.aspx?c=A150730_150323_hodinovy-manzel_kuko', + 'md5': '4525ae312c324b4be2f4603cc78ceb4a', + 'info_dict': { + 'id': 'A150730_150323_hodinovy-manzel_kuko', + 'ext': 'mp4', + 'title': 'Vyžeňte vosy a sršně ze zahrady', + 'thumbnail': 'http://oidnes.cz/15/074/mobil/KUK5cea00_010hodmanel58154.jpg', + 'description': 'Málo co kazí atmosféru venkovního posezení tak jako neustálé bzučení kolem hlavy. Vyzkoušejte náš lapač a odpuzovač vos a sršňů.', + } + }, { # live video test + 'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat', + 'info_dict': { + 'id': 'A150624_164934_planespotting_cat', + 'ext': 'flv', + 'title': 're:^Přímý přenos iDNES.cz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'thumbnail': 'http://data.idnes.cz/soubory/servisni-play-porady/89A150630_ACEK_026_VIDEOPLAYER-STREA.PNG', + 'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze', + 'is_live': True, + }, + 'params': { + 'skip_download': True, # requires rtmpdump + }, + }, { # idnes.cz + 'url': 'http://zpravy.idnes.cz/pes-zavreny-v-aute-rozbijeni-okynek-v-aute-fj5-/domaci.aspx?c=A150809_104116_domaci_pku', + 'md5': '819832ba33cd7016e58a6658577fe289', + 'info_dict': { + 'id': 'A150809_104116_domaci_pku', + 'ext': 'mp4', + 'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se', + 'thumbnail': 'http://i.idnes.cz/15/081/vidw/SHA5d1786_pizzaauto.jpg', + 'description': 'Na sociálních sítích se objevila výzva, aby lidé, kteří v horkých letních dnech uvidí v zaparkovaném autě zavřeného psa, neváhali rozbít okénko. Zastánci tohoto postoje argumentují zdravím zvířete, které v dusnu může zkolabovat. Policie doporučuje nejprve volat tísňovou linku.', + } + }, { # lidovky.cz + 'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE', + 'md5': 'c7209ac4ba9d234d4ad5bab7485bcee8', + 'info_dict': { + 'id': 'A150808_214044_ln-video_ELE', + 'ext': 'mp4', + 'title': 'Táhni! Demonstrace proti imigrantům budila emoce', + 'thumbnail': 'http://i.idnes.cz/15/081/vidw/PID5d1d52_vandas3.jpg', + 'description': 'Desítky lidí se sešly v Praze na protest proti imigrantům. Současně probíhala i demonstrace na jejich podporu. Na Staroměstském náměstí vystoupil i předseda dělnické strany Tomáš Vandas a kontroverzní slovenský politik Marian Kotleba. Dalšího slovenského nacionalistu Mariána Magáta odvedla policie.', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + infourl = self._html_search_regex(r'Misc.videoFLV\({ data: "([^"]+)"', webpage, 'xmlinfourl') + parsedurl = compat_urlparse.urlparse(infourl) + qs = compat_urlparse.parse_qs(parsedurl.query) + if 'reklama' in qs: # Don't ask for ads + qs['reklama'] = ['0'] + qs['type'] = ['js'] # Ask for JS-based info file + newquery = compat_urllib_parse.urlencode(qs, True) + infourl = compat_urlparse.urlunparse(parsedurl[:4] + (newquery, '')) + jsoninfo = self._download_json(infourl, video_id, transform_source=_extract_json) + + item = None + for i in jsoninfo['items']: + if i['type'] == 'video' or i['type'] == 'stream': + item = i + break + if item is None: + raise ExtractorError('No suitable stream found') + title = item['title'] + thumbnail = item['image'] + is_live = item['type'] == 'stream' + if is_live: + title = self._live_title(title) + + formats = [] + for fmt in item['video']: + format_entry = {'url': fmt['file'], + 'format_id': ("%s_%s" % (fmt['format'], fmt['quality'])), + } + if fmt['quality'] == 'middle': + format_entry['quality'] = -2 + elif fmt['quality'] == 'low': + format_entry['quality'] = -3 + + if fmt['format'] == 'mp4': + format_entry['ext'] = 'mp4' + elif fmt['format'] == 'webm': + format_entry['ext'] = 'webm' + elif fmt['format'] == 'apple': + format_entry['ext'] = 'mp4' + format_entry['protocol'] = 'm3u8' + # Some streams have mp3 audio which does not play + # well with ffmpeg filter aac_adtstoasc + format_entry['preference'] = -1 + elif fmt['format'] == 'rtmp': + format_entry['ext'] = 'flv' + else: # Other formats not supported yet + continue + + formats.append(format_entry) + + self._sort_formats(formats) + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'description': self._og_search_description(webpage), + 'is_live': is_live, + 'formats': formats, + } From 276c9897720fe087924aef4ac80cf528e621b832 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Aug 2015 03:07:05 +0600 Subject: [PATCH 1653/2721] [playtvak] Improve and simplify --- youtube_dl/extractor/playtvak.py | 145 ++++++++++++++++++++----------- 1 file changed, 92 insertions(+), 53 deletions(-) diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dl/extractor/playtvak.py index 6dff6650c..4e5034dc6 100644 --- a/youtube_dl/extractor/playtvak.py +++ b/youtube_dl/extractor/playtvak.py @@ -1,23 +1,22 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..compat import ( compat_urlparse, compat_urllib_parse, ) -from ..utils import ExtractorError - - -def _extract_json(code): - return re.sub( - r'(?s)^VideoPlayer.data\("", ({.*})\);?\s*?(?://[^\n]*)*$', r'\1', code) +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, + qualities, +) class PlaytvakIE(InfoExtractor): - _VALID_URL = r'https?://.*?(playtvak|idnes|lidovky|metro)\.cz/.*\?c=(?P<id>[A-Z][0-9]{6}_[0-9]{6}_.*)' + IE_DESC = 'Playtvak.cz, iDNES.cz and Lidovky.cz' + _VALID_URL = r'https?://(?:.+?\.)?(?:playtvak|idnes|lidovky|metro)\.cz/.*\?(?:c|idvideo)=(?P<id>[^&]+)' _TESTS = [{ 'url': 'http://www.playtvak.cz/vyzente-vosy-a-srsne-ze-zahrady-dn5-/hodinovy-manzel.aspx?c=A150730_150323_hodinovy-manzel_kuko', 'md5': '4525ae312c324b4be2f4603cc78ceb4a', @@ -25,8 +24,12 @@ class PlaytvakIE(InfoExtractor): 'id': 'A150730_150323_hodinovy-manzel_kuko', 'ext': 'mp4', 'title': 'Vyžeňte vosy a sršně ze zahrady', - 'thumbnail': 'http://oidnes.cz/15/074/mobil/KUK5cea00_010hodmanel58154.jpg', - 'description': 'Málo co kazí atmosféru venkovního posezení tak jako neustálé bzučení kolem hlavy. Vyzkoušejte náš lapač a odpuzovač vos a sršňů.', + 'description': 'md5:f93d398691044d303bc4a3de62f3e976', + 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$', + 'duration': 279, + 'timestamp': 1438732860, + 'upload_date': '20150805', + 'is_live': False, } }, { # live video test 'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat', @@ -34,8 +37,8 @@ class PlaytvakIE(InfoExtractor): 'id': 'A150624_164934_planespotting_cat', 'ext': 'flv', 'title': 're:^Přímý přenos iDNES.cz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'thumbnail': 'http://data.idnes.cz/soubory/servisni-play-porady/89A150630_ACEK_026_VIDEOPLAYER-STREA.PNG', 'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze', + 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$', 'is_live': True, }, 'params': { @@ -48,8 +51,12 @@ class PlaytvakIE(InfoExtractor): 'id': 'A150809_104116_domaci_pku', 'ext': 'mp4', 'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se', - 'thumbnail': 'http://i.idnes.cz/15/081/vidw/SHA5d1786_pizzaauto.jpg', - 'description': 'Na sociálních sítích se objevila výzva, aby lidé, kteří v horkých letních dnech uvidí v zaparkovaném autě zavřeného psa, neváhali rozbít okénko. Zastánci tohoto postoje argumentují zdravím zvířete, které v dusnu může zkolabovat. Policie doporučuje nejprve volat tísňovou linku.', + 'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2', + 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$', + 'duration': 39, + 'timestamp': 1438969140, + 'upload_date': '20150807', + 'is_live': False, } }, { # lidovky.cz 'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE', @@ -58,70 +65,102 @@ class PlaytvakIE(InfoExtractor): 'id': 'A150808_214044_ln-video_ELE', 'ext': 'mp4', 'title': 'Táhni! Demonstrace proti imigrantům budila emoce', - 'thumbnail': 'http://i.idnes.cz/15/081/vidw/PID5d1d52_vandas3.jpg', - 'description': 'Desítky lidí se sešly v Praze na protest proti imigrantům. Současně probíhala i demonstrace na jejich podporu. Na Staroměstském náměstí vystoupil i předseda dělnické strany Tomáš Vandas a kontroverzní slovenský politik Marian Kotleba. Dalšího slovenského nacionalistu Mariána Magáta odvedla policie.', + 'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c', + 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$', + 'timestamp': 1439052180, + 'upload_date': '20150808', + 'is_live': False, } + }, { + 'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - infourl = self._html_search_regex(r'Misc.videoFLV\({ data: "([^"]+)"', webpage, 'xmlinfourl') - parsedurl = compat_urlparse.urlparse(infourl) - qs = compat_urlparse.parse_qs(parsedurl.query) - if 'reklama' in qs: # Don't ask for ads - qs['reklama'] = ['0'] - qs['type'] = ['js'] # Ask for JS-based info file - newquery = compat_urllib_parse.urlencode(qs, True) - infourl = compat_urlparse.urlunparse(parsedurl[:4] + (newquery, '')) - jsoninfo = self._download_json(infourl, video_id, transform_source=_extract_json) + + info_url = self._html_search_regex( + r'Misc\.videoFLV\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url') + + parsed_url = compat_urlparse.urlparse(info_url) + + qs = compat_urlparse.parse_qs(parsed_url.query) + qs.update({ + 'reklama': ['0'], + 'type': ['js'], + }) + + info_url = compat_urlparse.urlunparse( + parsed_url._replace(query = compat_urllib_parse.urlencode(qs, True))) + + json_info = self._download_json( + info_url, video_id, + transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) item = None - for i in jsoninfo['items']: - if i['type'] == 'video' or i['type'] == 'stream': + for i in json_info['items']: + if i.get('type') == 'video' or i.get('type') == 'stream': item = i break - if item is None: + if not item: raise ExtractorError('No suitable stream found') - title = item['title'] - thumbnail = item['image'] - is_live = item['type'] == 'stream' - if is_live: - title = self._live_title(title) + + quality = qualities(['low', 'middle', 'high']) formats = [] for fmt in item['video']: - format_entry = {'url': fmt['file'], - 'format_id': ("%s_%s" % (fmt['format'], fmt['quality'])), - } - if fmt['quality'] == 'middle': - format_entry['quality'] = -2 - elif fmt['quality'] == 'low': - format_entry['quality'] = -3 + video_url = fmt.get('file') + if not video_url: + continue - if fmt['format'] == 'mp4': - format_entry['ext'] = 'mp4' - elif fmt['format'] == 'webm': - format_entry['ext'] = 'webm' - elif fmt['format'] == 'apple': - format_entry['ext'] = 'mp4' - format_entry['protocol'] = 'm3u8' + format_ = fmt['format'] + format_id = '%s_%s' % (format_, fmt['quality']) + preference = None + + if format_ in ['mp4', 'webm']: + ext = format_ + elif format_ == 'rtmp': + ext = 'flv' + elif format_ == 'apple': + ext = 'mp4' # Some streams have mp3 audio which does not play # well with ffmpeg filter aac_adtstoasc - format_entry['preference'] = -1 - elif fmt['format'] == 'rtmp': - format_entry['ext'] = 'flv' + preference = -1 + elif format_ == 'adobe': # f4m manifest fails with 404 in 80% of requests + continue else: # Other formats not supported yet continue - formats.append(format_entry) - + formats.append({ + 'url': video_url, + 'ext': ext, + 'format_id': format_id, + 'quality': quality(fmt.get('quality')), + 'preference': preference, + }) self._sort_formats(formats) + + title = item['title'] + is_live = item['type'] == 'stream' + if is_live: + title = self._live_title(title) + timestamp = None + duration = None + if not is_live: + duration = int_or_none(item.get('length')) + timestamp = item.get('published') + if timestamp: + timestamp = parse_iso8601(timestamp[:-5]) + return { 'id': video_id, 'title': title, - 'thumbnail': thumbnail, 'description': self._og_search_description(webpage), + 'thumbnail': item.get('image'), + 'duration': duration, + 'timestamp': timestamp, 'is_live': is_live, 'formats': formats, } From 6900b4f6f52ba783bb7a6028fd174250c8832a38 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 16 Aug 2015 01:05:04 +0200 Subject: [PATCH 1654/2721] release 2015.08.16 --- CONTRIBUTING.md | 2 +- docs/supportedsites.md | 6 ++++++ youtube_dl/version.py | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 588b15bde..42333c450 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -125,7 +125,7 @@ If you want to add support for a new site, you can follow this quick list (assum ``` 5. Add an import in [`youtube_dl/extractor/__init__.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/__init__.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will be then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. -7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L38). Add tests and code for as many as you want. +7. Have a look at [`youtube_dl/common/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L62-L200). Add tests and code for as many as you want. 8. If you can, check the code with [flake8](https://pypi.python.org/pypi/flake8). 9. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e21471102..9099e2da4 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -150,6 +150,7 @@ - **EroProfile** - **Escapist** - **ESPN** (Currently broken) + - **EsriVideo** - **EveryonesMixtape** - **exfm**: ex.fm - **ExpoTV** @@ -220,6 +221,8 @@ - **imdb:list**: Internet Movie Database lists - **Imgur** - **Ina** + - **Indavideo** + - **IndavideoEmbed** - **InfoQ** - **Instagram** - **instagram:user**: Instagram user profile @@ -386,6 +389,7 @@ - **PlanetaPlay** - **play.fm** - **played.to** + - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - **Playvid** - **Playwire** - **plus.google**: Google Plus @@ -432,6 +436,7 @@ - **rtve.es:alacarta**: RTVE a la carta - **rtve.es:infantil**: RTVE infantil - **rtve.es:live**: RTVE.es live streams + - **RTVNH** - **RUHD** - **rutube**: Rutube videos - **rutube:channel**: Rutube channels @@ -455,6 +460,7 @@ - **ServingSys** - **Sexu** - **SexyKarma**: Sexy Karma and Watch Indian Porn + - **Shahid** - **Shared** - **ShareSix** - **Sina** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6462d4477..689d6fca7 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.08.09' +__version__ = '2015.08.16' From 974f1a385a452b1c86d6f3ff16035b30baaeeb64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Aug 2015 17:22:13 +0600 Subject: [PATCH 1655/2721] [playtvak] Improve description extraction and add test for metro --- youtube_dl/extractor/playtvak.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dl/extractor/playtvak.py index 4e5034dc6..2b338966f 100644 --- a/youtube_dl/extractor/playtvak.py +++ b/youtube_dl/extractor/playtvak.py @@ -71,6 +71,19 @@ class PlaytvakIE(InfoExtractor): 'upload_date': '20150808', 'is_live': False, } + }, { # metro.cz + 'url': 'http://www.metro.cz/video-pod-billboardem-se-na-vltavske-roztocil-kolotoc-deti-vozil-jen-par-hodin-1hx-/metro-extra.aspx?c=A141111_173251_metro-extra_row', + 'md5': '84fc1deedcac37b7d4a6ccae7c716668', + 'info_dict': { + 'id': 'A141111_173251_metro-extra_row', + 'ext': 'mp4', + 'title': 'Recesisté udělali z billboardu kolotoč', + 'description': 'md5:7369926049588c3989a66c9c1a043c4c', + 'thumbnail': 're:(?i)^https?://.*\.(?:jpg|png)$', + 'timestamp': 1415725500, + 'upload_date': '20141111', + 'is_live': False, + } }, { 'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko', 'only_matching': True, @@ -146,6 +159,8 @@ class PlaytvakIE(InfoExtractor): is_live = item['type'] == 'stream' if is_live: title = self._live_title(title) + description = self._og_search_description(webpage, default=None) or self._html_search_meta( + 'description', webpage, 'description') timestamp = None duration = None if not is_live: @@ -157,7 +172,7 @@ class PlaytvakIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'description': self._og_search_description(webpage), + 'description': description, 'thumbnail': item.get('image'), 'duration': duration, 'timestamp': timestamp, From 7fc18d930917ad407c78bb9b0465dc4fae2fb335 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Aug 2015 19:53:14 +0600 Subject: [PATCH 1656/2721] [screenwavemedia] Fix extraction (Closes #6575) --- youtube_dl/extractor/screenwavemedia.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index 3bc84989e..78b068be2 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -1,6 +1,8 @@ # encoding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -35,15 +37,18 @@ class ScreenwaveMediaIE(InfoExtractor): sources = self._parse_json( js_to_json( - self._search_regex( - r"sources\s*:\s*(\[[^\]]+?\])", playerconfig, - 'sources', - ).replace( - "' + thisObj.options.videoserver + '", - videoserver - ).replace( - "' + playerVidId + '", - video_id + re.sub( + r'(?s)/\*.*?\*/', '', + self._search_regex( + r"sources\s*:\s*(\[[^\]]+?\])", playerconfig, + 'sources', + ).replace( + "' + thisObj.options.videoserver + '", + videoserver + ).replace( + "' + playerVidId + '", + video_id + ) ) ), video_id From 008687427725b8d857c44d75f358059c2533539a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Aug 2015 19:59:03 +0600 Subject: [PATCH 1657/2721] [playtvak] Use tuples --- youtube_dl/extractor/playtvak.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dl/extractor/playtvak.py index 2b338966f..278fdc1aa 100644 --- a/youtube_dl/extractor/playtvak.py +++ b/youtube_dl/extractor/playtvak.py @@ -120,7 +120,7 @@ class PlaytvakIE(InfoExtractor): if not item: raise ExtractorError('No suitable stream found') - quality = qualities(['low', 'middle', 'high']) + quality = qualities(('low', 'middle', 'high')) formats = [] for fmt in item['video']: @@ -132,7 +132,7 @@ class PlaytvakIE(InfoExtractor): format_id = '%s_%s' % (format_, fmt['quality']) preference = None - if format_ in ['mp4', 'webm']: + if format_ in ('mp4', 'webm'): ext = format_ elif format_ == 'rtmp': ext = 'flv' From 8626b23e4ea091c4093c25626ca9fc12293b2830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Aug 2015 20:18:15 +0600 Subject: [PATCH 1658/2721] [screenwavemedia] Make more robust --- youtube_dl/extractor/screenwavemedia.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py index 78b068be2..220d39078 100644 --- a/youtube_dl/extractor/screenwavemedia.py +++ b/youtube_dl/extractor/screenwavemedia.py @@ -51,19 +51,38 @@ class ScreenwaveMediaIE(InfoExtractor): ) ) ), - video_id + video_id, fatal=False ) + # Fallback to hardcoded sources if JS changes again + if not sources: + sources = [{ + 'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id), + 'type': 'mp4', + 'label': format_label, + } for format_id, format_label in ( + ('low', '144p Low'), ('med', '160p Med'), ('high', '360p High'), ('hd1', '720p HD1'))] + sources.append({ + 'file': 'http://%s/vod/smil:%s.smil/playlist.m3u8' % (videoserver, video_id), + 'type': 'hls', + }) + formats = [] for source in sources: if source['type'] == 'hls': formats.extend(self._extract_m3u8_formats(source['file'], video_id)) else: + file_ = source.get('file') + if not file_: + continue format_label = source.get('label') + format_id = self._search_regex( + r'_(.+?)\.[^.]+$', file_, 'format id', default=None) height = int_or_none(self._search_regex( r'^(\d+)[pP]', format_label, 'height', default=None)) formats.append({ 'url': source['file'], + 'format_id': format_id, 'format': format_label, 'ext': source.get('type'), 'height': height, From f74a7348f6ac52259ea66b74a40165b448fbd702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 16 Aug 2015 23:33:17 +0600 Subject: [PATCH 1659/2721] [youtube:search_url] Fix extraction (Closes #6578) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 887c46d95..4d1ca9298 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1762,7 +1762,7 @@ class YoutubeSearchURLIE(InfoExtractor): r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML') part_codes = re.findall( - r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code) + r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code) entries = [] for part_code in part_codes: part_title = self._html_search_regex( From cbaed4bb5e5e90103a1164d9326043a3abd0bf83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 17 Aug 2015 02:04:13 +0600 Subject: [PATCH 1660/2721] [youtube] Expand _VALID_URL to support vid.plus --- youtube_dl/extractor/youtube.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4d1ca9298..8e2da46e3 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -202,7 +202,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): v= ) )) - |youtu\.be/ # just youtu.be/xxxx + |(?: + youtu\.be| # just youtu.be/xxxx + vid\.plus # or vid.plus/xxxx + )/ |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= ) )? # all until now is optional -> you can pass the naked ID @@ -624,6 +627,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'params': { 'skip_download': True, }, + }, + { + 'url': 'http://vid.plus/FlRa-iH7PGw', + 'only_matching': True, } ] From c00c7c0af0fdcb380aef0ea9e072a61979d17816 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 16 Aug 2015 23:39:50 +0200 Subject: [PATCH 1661/2721] [sportdeutschland] Fix extraction --- youtube_dl/extractor/sportdeutschland.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py index 1a57aebf1..7ec6c613f 100644 --- a/youtube_dl/extractor/sportdeutschland.py +++ b/youtube_dl/extractor/sportdeutschland.py @@ -38,10 +38,12 @@ class SportDeutschlandIE(InfoExtractor): 'upload_date': '20140825', 'description': 'md5:60a20536b57cee7d9a4ec005e8687504', 'timestamp': 1408976060, + 'duration': 2732, 'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee', 'thumbnail': 're:^https?://.*\.jpg$', 'view_count': int, 'categories': ['Li-Ning Badminton WM 2014'], + } }] @@ -50,7 +52,7 @@ class SportDeutschlandIE(InfoExtractor): video_id = mobj.group('id') sport_id = mobj.group('sport') - api_url = 'http://splink.tv/api/permalinks/%s/%s' % ( + api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % ( sport_id, video_id) req = compat_urllib_request.Request(api_url, headers={ 'Accept': 'application/vnd.vidibus.v2.html+json', @@ -58,12 +60,11 @@ class SportDeutschlandIE(InfoExtractor): }) data = self._download_json(req, video_id) - categories = list(data.get('section', {}).get('tags', {}).values()) asset = data['asset'] - assets_info = self._download_json(asset['url'], video_id) + categories = [data['section']['title']] formats = [] - smil_url = assets_info['video'] + smil_url = asset['video'] if '.smil' in smil_url: m3u8_url = smil_url.replace('.smil', '.m3u8') formats.extend( @@ -91,6 +92,7 @@ class SportDeutschlandIE(InfoExtractor): 'title': asset['title'], 'thumbnail': asset.get('image'), 'description': asset.get('teaser'), + 'duration': asset.get('duration'), 'categories': categories, 'view_count': asset.get('views'), 'rtmp_live': asset.get('live'), From 0fa5795b85f8d97bf67f10e39a79b49656be58db Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 16 Aug 2015 23:40:07 +0200 Subject: [PATCH 1662/2721] release 2015.08.16.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 689d6fca7..c090c6df7 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.08.16' +__version__ = '2015.08.16.1' From 369c12e038c3183a0e725a929dd9bed4ec35fa11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 17 Aug 2015 20:16:43 +0600 Subject: [PATCH 1663/2721] [twitch] Allow untitled videos (Closes #6585) --- youtube_dl/extractor/twitch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index a2b6a35aa..0521257e5 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -132,7 +132,7 @@ class TwitchItemBaseIE(TwitchBaseIE): def _extract_info(self, info): return { 'id': info['_id'], - 'title': info['title'], + 'title': info.get('title') or 'Untitled Broadcast', 'description': info['description'], 'duration': info['length'], 'thumbnail': info['preview'], From 7a6e8a1b17a6a821d9200531ebf65562ccc2d428 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 17 Aug 2015 20:20:04 +0600 Subject: [PATCH 1664/2721] [twitch] Make more robust --- youtube_dl/extractor/twitch.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 0521257e5..8cba97bd4 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -15,6 +15,7 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + int_or_none, parse_duration, parse_iso8601, ) @@ -133,13 +134,13 @@ class TwitchItemBaseIE(TwitchBaseIE): return { 'id': info['_id'], 'title': info.get('title') or 'Untitled Broadcast', - 'description': info['description'], - 'duration': info['length'], - 'thumbnail': info['preview'], - 'uploader': info['channel']['display_name'], - 'uploader_id': info['channel']['name'], - 'timestamp': parse_iso8601(info['recorded_at']), - 'view_count': info['views'], + 'description': info.get('description'), + 'duration': int_or_none(info.get('length')), + 'thumbnail': info.get('preview'), + 'uploader': info.get('channel', {}).get('display_name'), + 'uploader_id': info.get('channel', {}).get('name'), + 'timestamp': parse_iso8601(info.get('recorded_at')), + 'view_count': int_or_none(info.get('views')), } def _real_extract(self, url): From 9c724a980210ec6a7659fe869cce401dde6e189d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 17 Aug 2015 20:23:52 +0600 Subject: [PATCH 1665/2721] [twitch:vod] Add test for #6585 --- youtube_dl/extractor/twitch.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 8cba97bd4..4f4eb6d72 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -188,7 +188,7 @@ class TwitchVodIE(TwitchItemBaseIE): _ITEM_TYPE = 'vod' _ITEM_SHORTCUT = 'v' - _TEST = { + _TESTS = [{ 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s', 'info_dict': { 'id': 'v6528877', @@ -207,7 +207,26 @@ class TwitchVodIE(TwitchItemBaseIE): # m3u8 download 'skip_download': True, }, - } + }, { + # Untitled broadcast (title is None) + 'url': 'http://www.twitch.tv/belkao_o/v/11230755', + 'info_dict': { + 'id': 'v11230755', + 'ext': 'mp4', + 'title': 'Untitled Broadcast', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 1638, + 'timestamp': 1439746708, + 'upload_date': '20150816', + 'uploader': 'BelkAO_o', + 'uploader_id': 'belkao_o', + 'view_count': int, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] def _real_extract(self, url): item_id = self._match_id(url) From 3b9b32f404ab09d9dc801dd8ec57d79711be5cb3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 18 Aug 2015 13:02:41 +0200 Subject: [PATCH 1666/2721] [libsyn] Strip options from player URL --- youtube_dl/extractor/libsyn.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py index 9ab1416f5..d375695f5 100644 --- a/youtube_dl/extractor/libsyn.py +++ b/youtube_dl/extractor/libsyn.py @@ -8,9 +8,9 @@ from ..utils import unified_strdate class LibsynIE(InfoExtractor): - _VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)' + _VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))' - _TEST = { + _TESTS = [{ 'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/', 'md5': '443360ee1b58007bc3dcf09b41d093bb', 'info_dict': { @@ -19,12 +19,24 @@ class LibsynIE(InfoExtractor): 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart", 'description': 'md5:601cb790edd05908957dae8aaa866465', 'upload_date': '20150220', + 'thumbnail': 're:^https?://.*', }, - } + }, { + 'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/', + 'md5': '6c5cb21acd622d754d3b1a92b582ce42', + 'info_dict': { + 'id': '3727166', + 'ext': 'mp3', + 'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career', + 'upload_date': '20150818', + 'thumbnail': 're:^https?://.*', + } + }] def _real_extract(self, url): - video_id = self._match_id(url) - + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + url = m.group('mainurl') webpage = self._download_webpage(url, video_id) formats = [{ @@ -32,20 +44,18 @@ class LibsynIE(InfoExtractor): } for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))] podcast_title = self._search_regex( - r'<h2>([^<]+)</h2>', webpage, 'title') + r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None) episode_title = self._search_regex( - r'<h3>([^<]+)</h3>', webpage, 'title', default=None) + r'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage, 'episode title') title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title description = self._html_search_regex( r'<div id="info_text_body">(.+?)</div>', webpage, - 'description', fatal=False) - + 'description', default=None) thumbnail = self._search_regex( r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"', webpage, 'thumbnail', fatal=False) - release_date = unified_strdate(self._search_regex( r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False)) From 34a4cd0a34bc9f07d865b02f6982fba60421ed0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 18 Aug 2015 20:02:56 +0600 Subject: [PATCH 1667/2721] [telecinco] Relax _VALID_URL (Closes #6601) --- youtube_dl/extractor/telecinco.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index a0c744fd1..ae94f055c 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -6,7 +6,7 @@ from .mitele import MiTeleIE class TelecincoIE(MiTeleIE): IE_NAME = 'telecinco.es' - _VALID_URL = r'https?://www\.telecinco\.es/[^/]+/[^/]+/(?:[^/]+/)?(?P<id>.*?)\.html' + _VALID_URL = r'https?://www\.telecinco\.es/(?:[^/]+/)+(?P<id>.+?)\.html' _TESTS = [{ 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', @@ -23,4 +23,7 @@ class TelecincoIE(MiTeleIE): }, { 'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html', 'only_matching': True, + }, { + 'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html', + 'only_matching': True, }] From 03c635a4b57e6ea4b874029d9fe3738508f6fc7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 18 Aug 2015 20:26:45 +0600 Subject: [PATCH 1668/2721] [twitch] Fix login (Closes #6599) --- youtube_dl/extractor/twitch.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 4f4eb6d72..023911c41 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -12,6 +12,7 @@ from ..compat import ( compat_urllib_parse, compat_urllib_parse_urlparse, compat_urllib_request, + compat_urlparse, ) from ..utils import ( ExtractorError, @@ -27,7 +28,7 @@ class TwitchBaseIE(InfoExtractor): _API_BASE = 'https://api.twitch.tv' _USHER_BASE = 'http://usher.twitch.tv' _LOGIN_URL = 'https://secure.twitch.tv/login' - _LOGIN_POST_URL = 'https://passport.twitch.tv/authorize' + _LOGIN_POST_URL = 'https://passport.twitch.tv/authentications/new' _NETRC_MACHINE = 'twitch' def _handle_error(self, response): @@ -70,8 +71,15 @@ class TwitchBaseIE(InfoExtractor): 'password': password.encode('utf-8'), }) + post_url = self._search_regex( + r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, + 'post url', default=self._LOGIN_POST_URL, group='url') + + if not post_url.startswith('http'): + post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + request = compat_urllib_request.Request( - self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8')) request.add_header('Referer', self._LOGIN_URL) response = self._download_webpage( request, None, 'Logging in as %s' % username) From 559f4c550f215a657ce386cab572bfc212128595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 18 Aug 2015 20:27:58 +0600 Subject: [PATCH 1669/2721] [playtvak] PEP 8 --- youtube_dl/extractor/playtvak.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dl/extractor/playtvak.py index 278fdc1aa..e360404f7 100644 --- a/youtube_dl/extractor/playtvak.py +++ b/youtube_dl/extractor/playtvak.py @@ -106,7 +106,7 @@ class PlaytvakIE(InfoExtractor): }) info_url = compat_urlparse.urlunparse( - parsed_url._replace(query = compat_urllib_parse.urlencode(qs, True))) + parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True))) json_info = self._download_json( info_url, video_id, From f3a65d9636908ee49ff3d50c24efb8067caa32c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 18 Aug 2015 21:10:52 +0600 Subject: [PATCH 1670/2721] [travis] Move to new infrastructure We don't use rtmpdump in tests anyway --- .travis.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 511bee64c..e78a2fa76 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,9 +5,7 @@ python: - "3.2" - "3.3" - "3.4" -before_install: - - sudo apt-get update -qq - - sudo apt-get install -yqq rtmpdump +sudo: false script: nosetests test --verbose notifications: email: From a01da8bbf83dfd4f87e3fdd105b9f7c850e76cad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 18 Aug 2015 23:02:57 +0600 Subject: [PATCH 1671/2721] [crunchyroll] Workaround fplive.net rtmp URLs (Closes #5881) --- youtube_dl/extractor/crunchyroll.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index d1b6d7366..33a033a7f 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -14,11 +14,13 @@ from ..compat import ( compat_urllib_parse, compat_urllib_parse_unquote, compat_urllib_request, + compat_urlparse, ) from ..utils import ( ExtractorError, bytes_to_intlist, intlist_to_bytes, + remove_end, unified_strdate, urlencode_postdata, ) @@ -279,6 +281,20 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text stream_info = streamdata.find('./{default}preload/stream_info') video_url = stream_info.find('./host').text video_play_path = stream_info.find('./file').text + + if '.fplive.net/' in video_url: + video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) + parsed_video_url = compat_urlparse.urlparse(video_url) + direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( + netloc='v.lvlt.crcdn.net', + path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1]))) + if self._is_valid_url(direct_video_url, video_id, video_format): + formats.append({ + 'url': direct_video_url, + 'format_id': video_format, + }) + continue + formats.append({ 'url': video_url, 'play_path': video_play_path, From ca681f7041838fa215f8ab5266cd7b442f3f9445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 19 Aug 2015 20:52:36 +0600 Subject: [PATCH 1672/2721] [videobam] Remove extractor videobam.com redirects to sendvid.com now --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/videobam.py | 81 -------------------------------- 2 files changed, 82 deletions(-) delete mode 100644 youtube_dl/extractor/videobam.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fa9acc923..c8c9f1855 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -696,7 +696,6 @@ from .vgtv import ( from .vh1 import VH1IE from .vice import ViceIE from .viddler import ViddlerIE -from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE from .videolecturesnet import VideoLecturesNetIE from .videofyme import VideofyMeIE diff --git a/youtube_dl/extractor/videobam.py b/youtube_dl/extractor/videobam.py deleted file mode 100644 index 0eb3d9414..000000000 --- a/youtube_dl/extractor/videobam.py +++ /dev/null @@ -1,81 +0,0 @@ -from __future__ import unicode_literals - -import re -import json - -from .common import InfoExtractor -from ..utils import int_or_none - - -class VideoBamIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)' - - _TESTS = [ - { - 'url': 'http://videobam.com/OiJQM', - 'md5': 'db471f27763a531f10416a0c58b5a1e0', - 'info_dict': { - 'id': 'OiJQM', - 'ext': 'mp4', - 'title': 'Is Alcohol Worse Than Ecstasy?', - 'description': 'md5:d25b96151515c91debc42bfbb3eb2683', - 'uploader': 'frihetsvinge', - }, - }, - { - 'url': 'http://videobam.com/pqLvq', - 'md5': 'd9a565b5379a99126ef94e1d7f9a383e', - 'note': 'HD video', - 'info_dict': { - 'id': 'pqLvq', - 'ext': 'mp4', - 'title': '_', - } - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page') - - formats = [] - - for preference, format_id in enumerate(['low', 'high']): - mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page) - if not mobj: - continue - formats.append({ - 'url': mobj.group('url'), - 'ext': 'mp4', - 'format_id': format_id, - 'preference': preference, - }) - - if not formats: - player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config')) - formats = [{ - 'url': item['url'], - 'ext': 'mp4', - } for item in player_config['playlist'] if 'autoPlay' in item] - - self._sort_formats(formats) - - title = self._og_search_title(page, default='_', fatal=False) - description = self._og_search_description(page, default=None) - thumbnail = self._og_search_thumbnail(page) - uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None) - view_count = int_or_none( - self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False)) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'view_count': view_count, - 'formats': formats, - 'age_limit': 18, - } From f877c6ae5a6e252d6904f90d597479451d2107aa Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Wed, 19 Aug 2015 23:11:25 +0800 Subject: [PATCH 1673/2721] [theplatform] Use InfoExtractor._parse_smil_formats() --- youtube_dl/extractor/common.py | 8 +++- youtube_dl/extractor/theplatform.py | 72 ++++++----------------------- 2 files changed, 20 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 65835d257..ac12be933 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1052,7 +1052,7 @@ class InfoExtractor(object): return self._search_regex( r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None) - def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None): + def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): base = smil_url for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): b = meta.get('base') or meta.get('httpBase') @@ -1091,6 +1091,12 @@ class InfoExtractor(object): 'width': width, 'height': height, }) + if transform_rtmp_url: + streamer, src = transform_rtmp_url(streamer, src) + formats[-1].update({ + 'url': streamer, + 'play_path': src, + }) continue src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 0643eccaf..29f938a76 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -9,9 +9,6 @@ import hashlib from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..utils import ( determine_ext, ExtractorError, @@ -20,7 +17,8 @@ from ..utils import ( int_or_none, ) -_x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'}) +default_ns = 'http://www.w3.org/2005/SMIL21/Language' +_x = lambda p: xpath_with_ns(p, {'smil': default_ns}) class ThePlatformIE(InfoExtractor): @@ -145,63 +143,19 @@ class ThePlatformIE(InfoExtractor): 'url': src, }] - head = meta.find(_x('smil:head')) - body = meta.find(_x('smil:body')) - - f4m_node = body.find(_x('smil:seq//smil:video')) - if f4m_node is None: - f4m_node = body.find(_x('smil:seq/smil:video')) - if f4m_node is not None and '.f4m' in f4m_node.attrib['src']: - f4m_url = f4m_node.attrib['src'] - if 'manifest.f4m?' not in f4m_url: - f4m_url += '?' + formats = self._parse_smil_formats( + meta, smil_url, video_id, namespace=default_ns, # the parameters are from syfy.com, other sites may use others, # they also work for nbc.com - f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' - formats = self._extract_f4m_formats(f4m_url, video_id) - else: - formats = [] - switch = body.find(_x('smil:switch')) - if switch is None: - switch = body.find(_x('smil:par//smil:switch')) - if switch is None: - switch = body.find(_x('smil:par/smil:switch')) - if switch is None: - switch = body.find(_x('smil:par')) - if switch is not None: - base_url = head.find(_x('smil:meta')).attrib['base'] - for f in switch.findall(_x('smil:video')): - attr = f.attrib - width = int_or_none(attr.get('width')) - height = int_or_none(attr.get('height')) - vbr = int_or_none(attr.get('system-bitrate'), 1000) - format_id = '%dx%d_%dk' % (width, height, vbr) - formats.append({ - 'format_id': format_id, - 'url': base_url, - 'play_path': 'mp4:' + attr['src'], - 'ext': 'flv', - 'width': width, - 'height': height, - 'vbr': vbr, - }) - else: - switch = body.find(_x('smil:seq//smil:switch')) - if switch is None: - switch = body.find(_x('smil:seq/smil:switch')) - for f in switch.findall(_x('smil:video')): - attr = f.attrib - vbr = int_or_none(attr.get('system-bitrate'), 1000) - ext = determine_ext(attr['src']) - if ext == 'once': - ext = 'mp4' - formats.append({ - 'format_id': compat_str(vbr), - 'url': attr['src'], - 'vbr': vbr, - 'ext': ext, - }) - self._sort_formats(formats) + f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'}, + transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src)) + + for _format in formats: + ext = determine_ext(_format['url']) + if ext == 'once': + _format['ext'] = 'mp4' + + self._sort_formats(formats) return { 'id': video_id, From 7900aede14d2e2a46c8fd4430e48cde41f354859 Mon Sep 17 00:00:00 2001 From: ping <lipng.ong@gmail.com> Date: Wed, 22 Jul 2015 14:31:29 +0800 Subject: [PATCH 1674/2721] [mwave] New extractor for mwave.interest.me --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/mwave.py | 46 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 youtube_dl/extractor/mwave.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c8c9f1855..006ef3922 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -345,6 +345,7 @@ from .muenchentv import MuenchenTVIE from .musicplayon import MusicPlayOnIE from .musicvault import MusicVaultIE from .muzu import MuzuTVIE +from .mwave import MwaveIE from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE from .myvi import MyviIE diff --git a/youtube_dl/extractor/mwave.py b/youtube_dl/extractor/mwave.py new file mode 100644 index 000000000..7f91aa269 --- /dev/null +++ b/youtube_dl/extractor/mwave.py @@ -0,0 +1,46 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class MwaveIE(InfoExtractor): + IE_NAME = 'mwave' + _VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859', + 'info_dict': { + 'id': '168859', + 'ext': 'flv', + 'title': '[M COUNTDOWN] SISTAR - SHAKE IT', + 'creator': 'M COUNTDOWN', + } + }, { + 'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168860', + 'info_dict': { + 'id': '168860', + 'ext': 'flv', + 'title': '[Full Ver.] M GIGS Ep. 59 - IDIOTAPE Live Part 1', + 'creator': 'M-GIGS', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + stream_info = self._download_json( + 'http://mwave.interest.me/onair/vod_info.m?vodtype=CL§orid=&endinfo=Y&id=%s' % video_id, + 'Download stream info') + + formats = [] + for info in stream_info['cdn']: + f4m_stream = self._download_json(info['url'], video_id, 'Download f4m stream') + formats.extend( + self._extract_f4m_formats(f4m_stream['fileurl'] + '&g=PCROWKHLYUDY&hdcore=3.0.3', video_id)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': stream_info['title'], + 'creator': stream_info.get('program_title'), + 'formats': formats, + } From 22c83245c51faa53118a8f815b13b2e4c2df9923 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 19 Aug 2015 23:07:41 +0600 Subject: [PATCH 1675/2721] [mwave] Improve --- youtube_dl/extractor/mwave.py | 50 ++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/mwave.py b/youtube_dl/extractor/mwave.py index 7f91aa269..66b523197 100644 --- a/youtube_dl/extractor/mwave.py +++ b/youtube_dl/extractor/mwave.py @@ -1,46 +1,58 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + parse_duration, +) class MwaveIE(InfoExtractor): - IE_NAME = 'mwave' _VALID_URL = r'https?://mwave\.interest\.me/mnettv/videodetail\.m\?searchVideoDetailVO\.clip_id=(?P<id>[0-9]+)' - _TESTS = [{ + _TEST = { 'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168859', + 'md5': 'c930e27b7720aaa3c9d0018dfc8ff6cc', 'info_dict': { 'id': '168859', 'ext': 'flv', 'title': '[M COUNTDOWN] SISTAR - SHAKE IT', - 'creator': 'M COUNTDOWN', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'M COUNTDOWN', + 'duration': 206, + 'view_count': int, } - }, { - 'url': 'http://mwave.interest.me/mnettv/videodetail.m?searchVideoDetailVO.clip_id=168860', - 'info_dict': { - 'id': '168860', - 'ext': 'flv', - 'title': '[Full Ver.] M GIGS Ep. 59 - IDIOTAPE Live Part 1', - 'creator': 'M-GIGS', - } - }] + } def _real_extract(self, url): video_id = self._match_id(url) - stream_info = self._download_json( + vod_info = self._download_json( 'http://mwave.interest.me/onair/vod_info.m?vodtype=CL§orid=&endinfo=Y&id=%s' % video_id, - 'Download stream info') + video_id, 'Download vod JSON') formats = [] - for info in stream_info['cdn']: - f4m_stream = self._download_json(info['url'], video_id, 'Download f4m stream') + for num, cdn_info in enumerate(vod_info['cdn']): + stream_url = cdn_info.get('url') + if not stream_url: + continue + stream_name = cdn_info.get('name') or compat_str(num) + f4m_stream = self._download_json( + stream_url, video_id, + 'Download %s stream JSON' % stream_name) + f4m_url = f4m_stream.get('fileurl') + if not f4m_url: + continue formats.extend( - self._extract_f4m_formats(f4m_stream['fileurl'] + '&g=PCROWKHLYUDY&hdcore=3.0.3', video_id)) + self._extract_f4m_formats(f4m_url + '&hdcore=3.0.3', video_id, f4m_id=stream_name)) self._sort_formats(formats) return { 'id': video_id, - 'title': stream_info['title'], - 'creator': stream_info.get('program_title'), + 'title': vod_info['title'], + 'thumbnail': vod_info.get('cover'), + 'uploader': vod_info.get('program_title'), + 'duration': parse_duration(vod_info.get('time')), + 'view_count': int_or_none(vod_info.get('hit')), 'formats': formats, } From 26e1c3514f4af1ed60cd1114a653fe49e1fa8d11 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Aug 2015 01:24:32 +0800 Subject: [PATCH 1676/2721] [theplatform] Add ThePlatformFeedIE --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/theplatform.py | 162 +++++++++++++++++++++------- 2 files changed, 126 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fa9acc923..b5f7ff9a9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -612,7 +612,10 @@ from .testurl import TestURLIE from .testtube import TestTubeIE from .tf1 import TF1IE from .theonion import TheOnionIE -from .theplatform import ThePlatformIE +from .theplatform import ( + ThePlatformIE, + ThePlatformFeedIE, +) from .thesixtyone import TheSixtyOneIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 29f938a76..f02e0f58d 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -15,13 +15,68 @@ from ..utils import ( xpath_with_ns, unsmuggle_url, int_or_none, + url_basename, + float_or_none, ) default_ns = 'http://www.w3.org/2005/SMIL21/Language' _x = lambda p: xpath_with_ns(p, {'smil': default_ns}) -class ThePlatformIE(InfoExtractor): +class ThePlatformBaseIE(InfoExtractor): + def _extract_theplatform_smil_formats(self, smil_url, video_id, note='Downloading SMIL data'): + meta = self._download_xml(smil_url, video_id, note=note) + try: + error_msg = next( + n.attrib['abstract'] + for n in meta.findall(_x('.//smil:ref')) + if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired') + except StopIteration: + pass + else: + raise ExtractorError(error_msg, expected=True) + + formats = self._parse_smil_formats( + meta, smil_url, video_id, namespace=default_ns, + # the parameters are from syfy.com, other sites may use others, + # they also work for nbc.com + f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'}, + transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src)) + + for _format in formats: + ext = determine_ext(_format['url']) + if ext == 'once': + _format['ext'] = 'mp4' + + self._sort_formats(formats) + + return formats + + def get_metadata(self, path, video_id): + info_url = 'http://link.theplatform.com/s/%s?format=preview' % path + info_json = self._download_webpage(info_url, video_id) + info = json.loads(info_json) + + subtitles = {} + captions = info.get('captions') + if isinstance(captions, list): + for caption in captions: + lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type') + subtitles[lang] = [{ + 'ext': 'srt' if mime == 'text/srt' else 'ttml', + 'url': src, + }] + + return { + 'title': info['title'], + 'subtitles': subtitles, + 'description': info['description'], + 'thumbnail': info['defaultThumbnailUrl'], + 'duration': int_or_none(info.get('duration'), 1000), + } + + +class ThePlatformIE(ThePlatformBaseIE): _VALID_URL = r'''(?x) (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/ (?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))? @@ -118,51 +173,78 @@ class ThePlatformIE(InfoExtractor): if sig: smil_url = self._sign_url(smil_url, sig['key'], sig['secret']) - meta = self._download_xml(smil_url, video_id) - try: - error_msg = next( - n.attrib['abstract'] - for n in meta.findall(_x('.//smil:ref')) - if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired') - except StopIteration: - pass - else: - raise ExtractorError(error_msg, expected=True) + formats = self._extract_theplatform_smil_formats(smil_url, video_id) - info_url = 'http://link.theplatform.com/s/%s?format=preview' % path - info_json = self._download_webpage(info_url, video_id) - info = json.loads(info_json) + ret = self.get_metadata(path, video_id) + ret.update({ + 'id': video_id, + 'formats': formats, + }) - subtitles = {} - captions = info.get('captions') - if isinstance(captions, list): - for caption in captions: - lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type') - subtitles[lang] = [{ - 'ext': 'srt' if mime == 'text/srt' else 'ttml', - 'url': src, - }] + return ret - formats = self._parse_smil_formats( - meta, smil_url, video_id, namespace=default_ns, - # the parameters are from syfy.com, other sites may use others, - # they also work for nbc.com - f4m_params={'g': 'UXWGVKRWHFSP', 'hdcore': '3.0.3'}, - transform_rtmp_url=lambda streamer, src: (streamer, 'mp4:' + src)) - for _format in formats: - ext = determine_ext(_format['url']) - if ext == 'once': - _format['ext'] = 'mp4' +class ThePlatformFeedIE(ThePlatformBaseIE): + _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s' + _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)' + _TEST = { + # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207 + 'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207', + 'md5': '22d2b84f058d3586efcd99e57d59d314', + 'info_dict': { + 'id': 'n_hardball_5biden_140207', + 'ext': 'mp4', + 'title': 'The Biden factor: will Joe run in 2016?', + 'description': 'Could Vice President Joe Biden be preparing a 2016 campaign? Mark Halperin and Sam Stein weigh in.', + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20140208', + 'timestamp': 1391824260, + 'duration': 467.0, + 'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'], + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + provider_id = mobj.group('provider_id') + feed_id = mobj.group('feed_id') + + real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id) + feed = self._download_json(real_url, video_id) + entry = feed['entries'][0] + + formats = [] + first_video_id = None + duration = None + for item in entry['media$content']: + smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M' + cur_video_id = url_basename(smil_url) + if first_video_id is None: + first_video_id = cur_video_id + duration = float_or_none(item.get('plfile$duration')) + formats.extend(self._extract_theplatform_smil_formats(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)) self._sort_formats(formats) - return { + thumbnails = [{ + 'url': thumbnail['plfile$url'], + 'width': int_or_none(thumbnail.get('plfile$width')), + 'height': int_or_none(thumbnail.get('plfile$height')), + } for thumbnail in entry.get('media$thumbnails', [])] + + timestamp = int_or_none(entry.get('media$availableDate'), scale=1000) + categories = [item['media$name'] for item in entry.get('media$categories', [])] + + ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id) + ret.update({ 'id': video_id, - 'title': info['title'], - 'subtitles': subtitles, 'formats': formats, - 'description': info['description'], - 'thumbnail': info['defaultThumbnailUrl'], - 'duration': int_or_none(info.get('duration'), 1000), - } + 'thumbnails': thumbnails, + 'duration': duration, + 'timestamp': timestamp, + 'categories': categories, + }) + + return ret From 05fe2594e4589b4e714a423550172eeec3949a70 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Aug 2015 01:38:39 +0800 Subject: [PATCH 1677/2721] [theplatform] Support URLs with 'guid=' --- youtube_dl/extractor/theplatform.py | 36 +++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index f02e0f58d..883bf491c 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -9,6 +9,10 @@ import hashlib from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) from ..utils import ( determine_ext, ExtractorError, @@ -120,6 +124,20 @@ class ThePlatformIE(ThePlatformBaseIE): }, { 'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7', 'only_matching': True, + }, { + 'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701', + 'md5': '734f3790fb5fc4903da391beeebc4836', + 'info_dict': { + 'id': 'tdy_or_siri_150701', + 'ext': 'mp4', + 'title': 'iPhone Siri’s sassy response to a math question has people talking', + 'description': 'md5:a565d1deadd5086f3331d57298ec6333', + 'duration': 83.0, + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1435752600, + 'upload_date': '20150701', + 'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"], + }, }] @staticmethod @@ -154,6 +172,24 @@ class ThePlatformIE(ThePlatformBaseIE): path += '/media' path += '/' + video_id + qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + if 'guid' in qs_dict: + webpage = self._download_webpage(url, video_id) + scripts = re.findall(r'<script[^>]+src="([^"]+)"', webpage) + feed_id = None + # feed id usually locates in the last script. + # Seems there's no pattern for the interested script filename, so + # I try one by one + for script in reversed(scripts): + feed_script = self._download_webpage(script, video_id, 'Downloading feed script') + feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None) + if feed_id is not None: + break + if feed_id is None: + raise ExtractorError('Unable to find feed id') + return self.url_result('http://feed.theplatform.com/f/%s/%s?byGuid=%s' % ( + provider_id, feed_id, qs_dict['guid'][0])) + if smuggled_data.get('force_smil_url', False): smil_url = url elif mobj.group('config'): From dac14bf311fd1b3c6af6c57b3b03878a11ef5aae Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Aug 2015 01:41:18 +0800 Subject: [PATCH 1678/2721] [nbc] Add MSNBCIE --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nbc.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b5f7ff9a9..86ea0576a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -358,6 +358,7 @@ from .nbc import ( NBCNewsIE, NBCSportsIE, NBCSportsVPlayerIE, + MSNBCIE, ) from .ndr import ( NDRIE, diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index ccdbfb6c9..e683d24c4 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -236,3 +236,28 @@ class NBCNewsIE(InfoExtractor): 'url': info['videoAssets'][-1]['publicUrl'], 'ie_key': 'ThePlatform', } + + +class MSNBCIE(InfoExtractor): + # https URLs redirect to corresponding http ones + _VALID_URL = r'http://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)' + _TEST = { + 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', + 'md5': '6d236bf4f3dddc226633ce6e2c3f814d', + 'info_dict': { + 'id': 'n_hayes_Aimm_140801_272214', + 'ext': 'mp4', + 'title': 'The chaotic GOP immigration vote', + 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1406937606, + 'upload_date': '20140802', + 'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'], + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + embed_url = self._html_search_meta('embedURL', webpage) + return self.url_result(embed_url) From aa6cd05ed82b14af0e3827b2ff43eed02087b574 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Aug 2015 01:47:55 +0800 Subject: [PATCH 1679/2721] [theplatform] Fix Python 2: declare coding --- youtube_dl/extractor/theplatform.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 883bf491c..adaec3375 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from __future__ import unicode_literals import re From ce00af87670d47f4dff6ad80e46a29e49cbdfe4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 20 Aug 2015 00:56:17 +0600 Subject: [PATCH 1680/2721] [extractor/common] Add default subtitles lang --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ac12be933..b1af45870 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1135,7 +1135,7 @@ class InfoExtractor(object): return formats - def _parse_smil_subtitles(self, smil, namespace=None): + def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): subtitles = {} for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))): src = textstream.get('src') @@ -1146,7 +1146,7 @@ class InfoExtractor(object): type_ = textstream.get('type') if type_ == 'text/srt': ext = 'srt' - lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') + lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or subtitles_lang subtitles.setdefault(lang, []).append({ 'url': src, 'ext': ext, From 5cdefc46257802708816e1d4ea7ff5cafe910ff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 20 Aug 2015 01:02:50 +0600 Subject: [PATCH 1681/2721] [extractor/common] Add more subtitle mime types for guess when ext is missing --- youtube_dl/extractor/common.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b1af45870..ce2030d28 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1144,8 +1144,13 @@ class InfoExtractor(object): ext = textstream.get('ext') or determine_ext(src) if not ext: type_ = textstream.get('type') - if type_ == 'text/srt': - ext = 'srt' + SUBTITLES_TYPES = { + 'text/vtt': 'vtt', + 'text/srt': 'srt', + 'application/smptett+xml': 'tt', + } + if type_ in SUBTITLES_TYPES: + ext = SUBTITLES_TYPES[type_] lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or subtitles_lang subtitles.setdefault(lang, []).append({ 'url': src, From dd565ac1ad22fe48f8e358d95ea912b1768b1e5a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Aug 2015 03:07:04 +0800 Subject: [PATCH 1682/2721] [theplatform] Use _download_json --- youtube_dl/extractor/theplatform.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index adaec3375..ba05ada39 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import json import time import hmac import binascii @@ -59,8 +58,7 @@ class ThePlatformBaseIE(InfoExtractor): def get_metadata(self, path, video_id): info_url = 'http://link.theplatform.com/s/%s?format=preview' % path - info_json = self._download_webpage(info_url, video_id) - info = json.loads(info_json) + info = self._download_json(info_url, video_id) subtitles = {} captions = info.get('captions') From 061f62da54cb4184a039108e40dee8e9eb2611c1 Mon Sep 17 00:00:00 2001 From: ping <lipng.ong@gmail.com> Date: Thu, 20 Aug 2015 12:56:11 +0800 Subject: [PATCH 1683/2721] [vlive] New extractor for vlive.tv --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vlive.py | 94 ++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 youtube_dl/extractor/vlive.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1c53a5632..6bee5b63c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -735,6 +735,7 @@ from .vk import ( VKIE, VKUserVideosIE, ) +from .vlive import VLiveIE from .vodlocker import VodlockerIE from .voicerepublic import VoiceRepublicIE from .vporn import VpornIE diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py new file mode 100644 index 000000000..b3bbd80fb --- /dev/null +++ b/youtube_dl/extractor/vlive.py @@ -0,0 +1,94 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import hmac +from hashlib import sha1 +from base64 import b64encode +from time import time + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + determine_ext +) +from ..compat import compat_urllib_parse + + +class VLiveIE(InfoExtractor): + IE_NAME = 'vlive' + _VALID_URL = r'https?://(?:(www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://m.vlive.tv/video/1326', + 'md5': 'cc7314812855ce56de70a06a27314983', + 'info_dict': { + 'id': '1326', + 'ext': 'mp4', + 'title': '[V] Girl\'s Day\'s Broadcast', + 'creator': 'Girl\'s Day', + 'upload_date': '20150817', + }, + } + _SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH' + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://m.vlive.tv/video/%s' % video_id, + video_id, note='Download video page') + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + creator = self._html_search_regex( + r'<span class="name">([^<>]+)</span>', webpage, 'creator') + upload_date = self._html_search_regex( + r'<span class="time">(\d{4}\.\d{2}\.\d{2})</span>', webpage, + 'upload date', default=None, fatal=False) + if upload_date: + upload_date = upload_date.replace('.', '') + + url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id + msgpad = {'msgpad': '%.0f' % (time() * 1000)} + md = { + 'md': b64encode( + hmac.new(self._SECRET.encode('ascii'), + (url[:255] + msgpad['msgpad']).encode('ascii'), sha1).digest()) + } + url += '&' + compat_urllib_parse.urlencode(msgpad) + '&' + compat_urllib_parse.urlencode(md) + + playinfo = self._download_json(url, video_id, 'Downloading video json') + + if playinfo.get('message', '') != 'success': + raise ExtractorError(playinfo['message']) + + if not playinfo.get('result'): + raise ExtractorError('No videos found.') + + formats = [] + for vid in playinfo['result'].get('videos', {}).get('list', []): + formats.append({ + 'url': vid['source'], + 'ext': 'mp4', + 'abr': vid.get('bitrate', {}).get('audio'), + 'vbr': vid.get('bitrate', {}).get('video'), + 'format_id': vid['encodingOption']['name'], + 'height': vid.get('height'), + 'width': vid.get('width'), + }) + self._sort_formats(formats) + + subtitles = {} + for caption in playinfo['result'].get('captions', {}).get('list', []): + subtitles[caption['language']] = [ + {'ext': determine_ext(caption['source'], default_ext='vtt'), + 'url': caption['source']}] + + return { + 'id': video_id, + 'title': title, + 'creator': creator, + 'thumbnail': thumbnail, + 'formats': formats, + 'upload_date': upload_date, + 'subtitles': subtitles, + } From 03bc7237add1747de4c0c5d09e72e03639b4fd21 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 20 Aug 2015 23:18:58 +0800 Subject: [PATCH 1684/2721] [common] _parse_smil_subtitles: accept `lang` as the subtitle language --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index ce2030d28..999afc110 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1151,7 +1151,7 @@ class InfoExtractor(object): } if type_ in SUBTITLES_TYPES: ext = SUBTITLES_TYPES[type_] - lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or subtitles_lang + lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang subtitles.setdefault(lang, []).append({ 'url': src, 'ext': ext, From 912e0b7e46d795df3ec1866f9b0ff071cca8d550 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 21 Aug 2015 01:37:07 +0800 Subject: [PATCH 1685/2721] [common] Add _merge_subtitles() --- youtube_dl/extractor/common.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 999afc110..b7437af5a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1279,6 +1279,26 @@ class InfoExtractor(object): def _get_subtitles(self, *args, **kwargs): raise NotImplementedError("This method must be implemented by subclasses") + @staticmethod + def _merge_subtitle_items(subtitle_list1, subtitle_list2): + """ Merge subtitle items for one language. Items with duplicated URLs + will be dropped. """ + list1_urls = set([item['url'] for item in subtitle_list1]) + ret = list(subtitle_list1) + ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls]) + return ret + + @classmethod + def _merge_subtitles(kls, subtitle_dict1, subtitle_dict2): + """ Merge two subtitle dictionaries, language by language. """ + print(subtitle_dict1) + print(subtitle_dict2) + ret = dict(subtitle_dict1) + for lang in subtitle_dict2: + ret[lang] = kls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang]) + print(ret) + return ret + def extract_automatic_captions(self, *args, **kwargs): if (self._downloader.params.get('writeautomaticsub', False) or self._downloader.params.get('listsubtitles')): From c687ac745b3c94b2fd246214e78c92a31bd9fc0f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 21 Aug 2015 01:37:43 +0800 Subject: [PATCH 1686/2721] [theplatform] Use subtitles from SMIL, too --- youtube_dl/extractor/theplatform.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index ba05ada39..25edc3100 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -28,7 +28,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns}) class ThePlatformBaseIE(InfoExtractor): - def _extract_theplatform_smil_formats(self, smil_url, video_id, note='Downloading SMIL data'): + def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'): meta = self._download_xml(smil_url, video_id, note=note) try: error_msg = next( @@ -54,7 +54,9 @@ class ThePlatformBaseIE(InfoExtractor): self._sort_formats(formats) - return formats + subtitles = self._parse_smil_subtitles(meta, default_ns) + + return formats, subtitles def get_metadata(self, path, video_id): info_url = 'http://link.theplatform.com/s/%s?format=preview' % path @@ -208,12 +210,14 @@ class ThePlatformIE(ThePlatformBaseIE): if sig: smil_url = self._sign_url(smil_url, sig['key'], sig['secret']) - formats = self._extract_theplatform_smil_formats(smil_url, video_id) + formats, subtitles = self._extract_theplatform_smil(smil_url, video_id) ret = self.get_metadata(path, video_id) + combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles) ret.update({ 'id': video_id, 'formats': formats, + 'subtitles': combined_subtitles, }) return ret @@ -251,6 +255,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE): entry = feed['entries'][0] formats = [] + subtitles = {} first_video_id = None duration = None for item in entry['media$content']: @@ -259,7 +264,9 @@ class ThePlatformFeedIE(ThePlatformBaseIE): if first_video_id is None: first_video_id = cur_video_id duration = float_or_none(item.get('plfile$duration')) - formats.extend(self._extract_theplatform_smil_formats(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id)) + cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id) + formats.extend(cur_formats) + subtitles = self._merge_subtitles(subtitles, cur_subtitles) self._sort_formats(formats) @@ -273,9 +280,11 @@ class ThePlatformFeedIE(ThePlatformBaseIE): categories = [item['media$name'] for item in entry.get('media$categories', [])] ret = self.get_metadata('%s/%s' % (provider_id, first_video_id), video_id) + subtitles = self._merge_subtitles(subtitles, ret['subtitles']) ret.update({ 'id': video_id, 'formats': formats, + 'subtitles': subtitles, 'thumbnails': thumbnails, 'duration': duration, 'timestamp': timestamp, From f908b74fa38b2678e26aea128dbd934cd781a9b6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 21 Aug 2015 01:38:57 +0800 Subject: [PATCH 1687/2721] [test/subtitles] Add test for ThePlatformFeedIE --- test/test_subtitles.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index c4e3adb67..0343967d9 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -25,6 +25,7 @@ from youtube_dl.extractor import ( RaiIE, VikiIE, ThePlatformIE, + ThePlatformFeedIE, RTVEALaCartaIE, FunnyOrDieIE, ) @@ -307,6 +308,18 @@ class TestThePlatformSubtitles(BaseTestSubtitles): self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') +class TestThePlatformFeedSubtitles(BaseTestSubtitles): + url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207' + IE = ThePlatformFeedIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade') + + class TestRtveSubtitles(BaseTestSubtitles): url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/' IE = RTVEALaCartaIE From f738dd7b7c7aefe4d26a65905dee9567a691d262 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 21 Aug 2015 01:43:22 +0800 Subject: [PATCH 1688/2721] [common] Remove debugging codes --- youtube_dl/extractor/common.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b7437af5a..f731703fb 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1291,12 +1291,9 @@ class InfoExtractor(object): @classmethod def _merge_subtitles(kls, subtitle_dict1, subtitle_dict2): """ Merge two subtitle dictionaries, language by language. """ - print(subtitle_dict1) - print(subtitle_dict2) ret = dict(subtitle_dict1) for lang in subtitle_dict2: ret[lang] = kls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang]) - print(ret) return ret def extract_automatic_captions(self, *args, **kwargs): From dc95bd503e82d3eb04c347ac0cdbcbabd7e14552 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 21 Aug 2015 08:54:28 +0600 Subject: [PATCH 1689/2721] [folketinget] Add skip_download for test --- youtube_dl/extractor/folketinget.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dl/extractor/folketinget.py index 0fb29de75..75399fa7d 100644 --- a/youtube_dl/extractor/folketinget.py +++ b/youtube_dl/extractor/folketinget.py @@ -30,6 +30,10 @@ class FolketingetIE(InfoExtractor): 'upload_date': '20141120', 'duration': 3960, }, + 'params': { + # rtmp download + 'skip_download': True, + }, } def _real_extract(self, url): From 5d003e29b188dd2f140fe1b9b93f1bb1ad8263a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 21 Aug 2015 08:56:05 +0600 Subject: [PATCH 1690/2721] [rtp] Add skip_download for test --- youtube_dl/extractor/rtp.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/rtp.py b/youtube_dl/extractor/rtp.py index ecf4939cd..82b323cdd 100644 --- a/youtube_dl/extractor/rtp.py +++ b/youtube_dl/extractor/rtp.py @@ -18,6 +18,10 @@ class RTPIE(InfoExtractor): 'description': 'As paixões musicais de António Cartaxo e António Macedo', 'thumbnail': 're:^https?://.*\.jpg', }, + 'params': { + # rtmp download + 'skip_download': True, + }, }, { 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', 'only_matching': True, From 4932a817a0c2375df14d66c9ac86cfa28988327d Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 21 Aug 2015 13:00:08 +0800 Subject: [PATCH 1691/2721] [rtl2] Add skip_download for test --- youtube_dl/extractor/rtl2.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index 72cd80498..e9589449e 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -15,6 +15,10 @@ class RTL2IE(InfoExtractor): 'title': 'GRIP sucht den Sommerkönig', 'description': 'Matthias, Det und Helge treten gegeneinander an.' }, + 'params': { + # rtmp download + 'skip_download': True, + }, }, { 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', 'md5': 'ffcd517d2805b57ce11a58a2980c2b02', From 9eb4ab6ad915a777b6f7d7b39d03d05d7d31cd24 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 21 Aug 2015 13:04:25 +0800 Subject: [PATCH 1692/2721] [rtl2] Remove an unused line --- youtube_dl/extractor/rtl2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index e9589449e..9e0c6890e 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -43,7 +43,6 @@ class RTL2IE(InfoExtractor): vivi_id = self._html_search_regex( r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id - webpage = self._download_webpage(info_url, '') info = self._download_json(info_url, video_id) video_info = info['video'] From 5e1a5ac8de12391cb22d2fa0dfb2119527bd7fc2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 21 Aug 2015 13:20:32 +0800 Subject: [PATCH 1693/2721] [rtl2] Fix extraction for test_RTL2_1 --- youtube_dl/extractor/rtl2.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index 9e0c6890e..276612fc7 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -1,6 +1,7 @@ # encoding: utf-8 from __future__ import unicode_literals +import re from .common import InfoExtractor @@ -28,6 +29,10 @@ class RTL2IE(InfoExtractor): 'title': 'Anna erwischt Alex!', 'description': 'Anna ist Alex\' Tochter bei Köln 50667.' }, + 'params': { + # rtmp download + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -38,10 +43,17 @@ class RTL2IE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - vico_id = self._html_search_regex( - r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') - vivi_id = self._html_search_regex( - r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') + mobj = re.search( + r'<div[^>]+data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"', + webpage) + if mobj: + vico_id = mobj.group('vico_id') + vivi_id = mobj.group('vivi_id') + else: + vico_id = self._html_search_regex( + r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') + vivi_id = self._html_search_regex( + r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') info_url = 'http://www.rtl2.de/video/php/get_video.php?vico_id=' + vico_id + '&vivi_id=' + vivi_id info = self._download_json(info_url, video_id) From d7c16305707f2af5c47d91b67cf0850b4dcada3a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Fri, 21 Aug 2015 13:21:21 +0800 Subject: [PATCH 1694/2721] [rtl2] Remove MD5 checksums --- youtube_dl/extractor/rtl2.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index 276612fc7..25f7faf76 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -9,7 +9,6 @@ class RTL2IE(InfoExtractor): _VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))' _TESTS = [{ 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', - 'md5': 'bfcc179030535b08dc2b36b469b5adc7', 'info_dict': { 'id': 'folge-203-0', 'ext': 'f4v', @@ -22,7 +21,6 @@ class RTL2IE(InfoExtractor): }, }, { 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', - 'md5': 'ffcd517d2805b57ce11a58a2980c2b02', 'info_dict': { 'id': '21040-anna-erwischt-alex', 'ext': 'mp4', From 8c97f81943de1c2bf8d2f524ba5ca09b29579dc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 21 Aug 2015 11:35:51 +0200 Subject: [PATCH 1695/2721] [common] Follow convention of using 'cls' in classmethods --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f731703fb..5d24bcb6a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1289,11 +1289,11 @@ class InfoExtractor(object): return ret @classmethod - def _merge_subtitles(kls, subtitle_dict1, subtitle_dict2): + def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2): """ Merge two subtitle dictionaries, language by language. """ ret = dict(subtitle_dict1) for lang in subtitle_dict2: - ret[lang] = kls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang]) + ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang]) return ret def extract_automatic_captions(self, *args, **kwargs): From 66ce97024d0de7836777562a6eb60603796636d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 22 Aug 2015 06:30:00 +0600 Subject: [PATCH 1696/2721] [soundcloud:user] Update tests --- youtube_dl/extractor/soundcloud.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 6ce86cbcd..ed5dcc0d3 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -309,7 +309,7 @@ class SoundcloudUserIE(SoundcloudIE): 'id': '114582580', 'title': 'The Akashic Chronicler (All)', }, - 'playlist_mincount': 112, + 'playlist_mincount': 111, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/tracks', 'info_dict': { @@ -330,14 +330,14 @@ class SoundcloudUserIE(SoundcloudIE): 'id': '114582580', 'title': 'The Akashic Chronicler (Reposts)', }, - 'playlist_mincount': 9, + 'playlist_mincount': 7, }, { 'url': 'https://soundcloud.com/the-akashic-chronicler/likes', 'info_dict': { 'id': '114582580', 'title': 'The Akashic Chronicler (Likes)', }, - 'playlist_mincount': 333, + 'playlist_mincount': 321, }, { 'url': 'https://soundcloud.com/grynpyret/spotlight', 'info_dict': { From 483fc223bb1509d11ac1843a5852f75c0aec3475 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Aug 2015 10:42:34 +0600 Subject: [PATCH 1697/2721] [pluralsight] Add extractor (Closes #6090) --- youtube_dl/extractor/__init__.py | 4 + youtube_dl/extractor/pluralsight.py | 218 ++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+) create mode 100644 youtube_dl/extractor/pluralsight.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1c53a5632..d59882598 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -454,6 +454,10 @@ from .playfm import PlayFMIE from .playtvak import PlaytvakIE from .playvid import PlayvidIE from .playwire import PlaywireIE +from .pluralsight import ( + PluralsightIE, + PluralsightCourseIE, +) from .podomatic import PodomaticIE from .porn91 import Porn91IE from .pornhd import PornHdIE diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py new file mode 100644 index 000000000..1bdcacbaa --- /dev/null +++ b/youtube_dl/extractor/pluralsight.py @@ -0,0 +1,218 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urllib_parse, + compat_urllib_request, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + int_or_none, + parse_duration, +) + + +class PluralsightIE(InfoExtractor): + IE_NAME = 'pluralsight' + _VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)' + _LOGIN_URL = 'https://www.pluralsight.com/id/' + _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' + _NETRC_MACHINE = 'pluralsight' + + _TEST = { + 'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas', + 'md5': '4d458cf5cf4c593788672419a8dd4cf8', + 'info_dict': { + 'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04', + 'ext': 'mp4', + 'title': 'Management of SQL Server - Demo Monitoring', + 'duration': 338, + }, + 'skip': 'Requires pluralsight account credentials', + } + + def _real_initialize(self): + self._login() + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + raise ExtractorError( + 'Pluralsight account is required, use --username and --password options to provide account credentials.', + expected=True) + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'Username': username.encode('utf-8'), + 'Password': password.encode('utf-8'), + }) + + post_url = self._search_regex( + r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, + 'post url', default=self._LOGIN_URL, group='url') + + if not post_url.startswith('http'): + post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + + request = compat_urllib_request.Request( + post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8')) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') + + response = self._download_webpage( + request, None, 'Logging in as %s' % username) + + error = self._search_regex( + r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + author = mobj.group('author') + name = mobj.group('name') + clip_id = mobj.group('clip') + course = mobj.group('course') + + display_id = '%s-%s' % (name, clip_id) + + webpage = self._download_webpage(url, display_id) + + collection = self._parse_json( + self._search_regex( + r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)', + webpage, 'modules'), + display_id) + + module, clip = None, None + + for module_ in collection: + if module_.get('moduleName') == name: + module = module_ + for clip_ in module_.get('clips', []): + clip_index = clip_.get('clipIndex') + if clip_index is None: + continue + if compat_str(clip_index) == clip_id: + clip = clip_ + break + + if not clip: + raise ExtractorError('Unable to resolve clip') + + QUALITIES = { + 'low': {'width': 640, 'height': 480}, + 'medium': {'width': 848, 'height': 640}, + 'high': {'width': 1024, 'height': 768}, + } + + ALLOWED_QUALITIES = ( + ('webm', ('high',)), + ('mp4', ('low', 'medium', 'high',)), + ) + + formats = [] + for ext, qualities in ALLOWED_QUALITIES: + for quality in qualities: + f = QUALITIES[quality].copy() + clip_post = { + 'a': author, + 'cap': 'false', + 'cn': clip_id, + 'course': course, + 'lc': 'en', + 'm': name, + 'mt': ext, + 'q': '%dx%d' % (f['width'], f['height']), + } + request = compat_urllib_request.Request( + 'http://www.pluralsight.com/training/Player/ViewClip', + json.dumps(clip_post).encode('utf-8')) + request.add_header('Content-Type', 'application/json;charset=utf-8') + format_id = '%s-%s' % (ext, quality) + clip_url = self._download_webpage( + request, display_id, 'Downloading %s URL' % format_id, fatal=False) + if not clip_url: + continue + f.update({ + 'url': clip_url, + 'ext': ext, + 'format_id': format_id, + }) + formats.append(f) + self._sort_formats(formats) + + # TODO: captions + # http://www.pluralsight.com/training/Player/ViewClip + cap = true + # or + # http://www.pluralsight.com/training/Player/Captions + # { a = author, cn = clip_id, lc = end, m = name } + + return { + 'id': clip['clipName'], + 'title': '%s - %s' % (module['title'], clip['title']), + 'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')), + 'creator': author, + 'formats': formats + } + + +class PluralsightCourseIE(InfoExtractor): + IE_NAME = 'pluralsight:course' + _VALID_URL = r'https?://(?:www\.)?pluralsight\.com/courses/(?P<id>[^/]+)' + _TEST = { + # Free course from Pluralsight Starter Subscription for Microsoft TechNet + # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz + 'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas', + 'info_dict': { + 'id': 'hosting-sql-server-windows-azure-iaas', + 'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals', + 'description': 'md5:61b37e60f21c4b2f91dc621a977d0986', + }, + 'playlist_count': 31, + } + + def _real_extract(self, url): + course_id = self._match_id(url) + + course = self._download_json( + 'http://www.pluralsight.com/data/course/%s' % course_id, + course_id, 'Downloading course JSON') + + title = course['title'] + description = course.get('description') or course.get('shortDescription') + + course_data = self._download_json( + 'http://www.pluralsight.com/data/course/content/%s' % course_id, + course_id, 'Downloading course data JSON') + + may_not_view = 0 + + entries = [] + for module in course_data: + for clip in module.get('clips', []): + if clip.get('userMayViewClip') is False: + may_not_view += 1 + continue + player_parameters = clip.get('playerParameters') + if not player_parameters: + continue + entries.append(self.url_result( + 'http://www.pluralsight.com/training/player?%s' % player_parameters, + 'Pluralsight')) + + if may_not_view > 0: + self._downloader.report_warning( + 'There are %d videos in this course that are not available for you. ' + 'Upgrade your account to get access to these videos.' % may_not_view) + + return self.playlist_result(entries, course_id, title, description) From 468083d2f5596314a0813859f3afe7d2fce3cac7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Aug 2015 10:44:10 +0600 Subject: [PATCH 1698/2721] [pluralsight] Remove unused const --- youtube_dl/extractor/pluralsight.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 1bdcacbaa..7c7f762ff 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -21,7 +21,6 @@ class PluralsightIE(InfoExtractor): IE_NAME = 'pluralsight' _VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)' _LOGIN_URL = 'https://www.pluralsight.com/id/' - _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' _NETRC_MACHINE = 'pluralsight' _TEST = { From 2b6bda1ed86e1b64242b33c032286dc315d541ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Aug 2015 11:21:56 +0600 Subject: [PATCH 1699/2721] [pluralsight] Do not yet rely on userMayViewClip --- youtube_dl/extractor/pluralsight.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 7c7f762ff..7ba396aef 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -183,6 +183,8 @@ class PluralsightCourseIE(InfoExtractor): def _real_extract(self, url): course_id = self._match_id(url) + # TODO: PSM cookie + course = self._download_json( 'http://www.pluralsight.com/data/course/%s' % course_id, course_id, 'Downloading course JSON') @@ -194,14 +196,9 @@ class PluralsightCourseIE(InfoExtractor): 'http://www.pluralsight.com/data/course/content/%s' % course_id, course_id, 'Downloading course data JSON') - may_not_view = 0 - entries = [] for module in course_data: for clip in module.get('clips', []): - if clip.get('userMayViewClip') is False: - may_not_view += 1 - continue player_parameters = clip.get('playerParameters') if not player_parameters: continue @@ -209,9 +206,4 @@ class PluralsightCourseIE(InfoExtractor): 'http://www.pluralsight.com/training/player?%s' % player_parameters, 'Pluralsight')) - if may_not_view > 0: - self._downloader.report_warning( - 'There are %d videos in this course that are not available for you. ' - 'Upgrade your account to get access to these videos.' % may_not_view) - return self.playlist_result(entries, course_id, title, description) From 2006a06eff606c5a996c315a3e597b9d2603db9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 23 Aug 2015 21:43:28 +0600 Subject: [PATCH 1700/2721] [kontrtube] Fix extraction (Closes #6644) --- youtube_dl/extractor/kontrtube.py | 40 +++++++++++++++---------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py index 720bc939b..a59c529f4 100644 --- a/youtube_dl/extractor/kontrtube.py +++ b/youtube_dl/extractor/kontrtube.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + parse_duration, +) class KontrTubeIE(InfoExtractor): @@ -34,33 +37,28 @@ class KontrTubeIE(InfoExtractor): webpage = self._download_webpage( url, display_id, 'Downloading page') - video_url = self._html_search_regex( + video_url = self._search_regex( r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL') - thumbnail = self._html_search_regex( - r"preview_url\s*:\s*'(.+?)/?',", webpage, 'video thumbnail', fatal=False) + thumbnail = self._search_regex( + r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False) title = self._html_search_regex( - r'<title>(.+?)', webpage, 'video title') + r'(?s)

    (.+?)

    ', webpage, 'title') description = self._html_search_meta( - 'description', webpage, 'video description') + 'description', webpage, 'description') - mobj = re.search( - r'
    Длительность: (?P\d+)м:(?P\d+)с
    ', - webpage) - duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None + duration = self._search_regex( + r'Длительность: ([^<]+)', webpage, 'duration', fatal=False) + if duration: + duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec')) - view_count = self._html_search_regex( - r'
    Просмотров: (\d+)
    ', + view_count = self._search_regex( + r'Просмотров: ([^<]+)', webpage, 'view count', fatal=False) + if view_count: + view_count = int_or_none(view_count.replace(' ', '')) - comment_count = None - comment_str = self._html_search_regex( - r'Комментарии: ([^<]+)', webpage, 'comment count', fatal=False) - if comment_str.startswith('комментариев нет'): - comment_count = 0 - else: - mobj = re.search(r'\d+ из (?P\d+) комментариев', comment_str) - if mobj: - comment_count = mobj.group('total') + comment_count = int_or_none(self._search_regex( + r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False)) return { 'id': video_id, From 9990c960f2d944cfbecb7d613062b98fe99464a7 Mon Sep 17 00:00:00 2001 From: clauderains Date: Sun, 23 Aug 2015 02:46:29 -0700 Subject: [PATCH 1701/2721] [spankwire] Fixed uploader_id field extraction so that test case passes --- youtube_dl/extractor/spankwire.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 5fa6faf18..0a35c2b3b 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -54,7 +54,7 @@ class SpankwireIE(InfoExtractor): r'by:\s*]*>(.+?)', webpage, 'uploader', fatal=False) uploader_id = self._html_search_regex( - r'by:\s* on (.+?) at \d+:\d+', From 59e6acc757a9df85ca78e519623e84072ffd9c01 Mon Sep 17 00:00:00 2001 From: clauderains Date: Sun, 23 Aug 2015 02:47:20 -0700 Subject: [PATCH 1702/2721] [spankwire] Support new cdn video url format --- youtube_dl/extractor/spankwire.py | 84 +++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 0a35c2b3b..0f2d8d0de 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -17,20 +17,34 @@ from ..aes import aes_decrypt_text class SpankwireIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?Pspankwire\.com/[^/]*/video(?P[0-9]+)/?)' - _TEST = { - 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', - 'md5': '8bbfde12b101204b39e4b9fe7eb67095', - 'info_dict': { - 'id': '103545', - 'ext': 'mp4', - 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch', - 'description': 'Crazy Bitch X rated music video.', - 'uploader': 'oreusz', - 'uploader_id': '124697', - 'upload_date': '20070507', - 'age_limit': 18, - } - } + _TESTS = [{ + 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', + 'md5': '8bbfde12b101204b39e4b9fe7eb67095', + 'info_dict': { + 'id': '103545', + 'ext': 'mp4', + 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch', + 'description': 'Crazy Bitch X rated music video.', + 'uploader': 'oreusz', + 'uploader_id': '124697', + 'upload_date': '20070507', + 'age_limit': 18, + } + }, + { + 'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/', + 'md5': '09b3c20833308b736ae8902db2f8d7e6', + 'info_dict': { + 'id': '1921551', + 'ext': 'mp4', + 'title': 'Titcums Compiloation I', + 'description': 'cum on tits', + 'uploader': 'dannyh78999', + 'uploader_id': '3056053', + 'upload_date': '20150822', + 'age_limit': 18, + } + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -82,18 +96,36 @@ class SpankwireIE(InfoExtractor): for video_url in video_urls: path = compat_urllib_parse_urlparse(video_url).path format = path.split('/')[4].split('_')[:2] - resolution, bitrate_str = format - format = "-".join(format) - height = int(resolution.rstrip('Pp')) - tbr = int(bitrate_str.rstrip('Kk')) - formats.append({ - 'url': video_url, - 'resolution': resolution, - 'format': format, - 'tbr': tbr, - 'height': height, - 'format_id': format, - }) + if format[0] == 'mp4': + format_id, quality = format + format = "-".join(format) + if quality == 'normal': + height = 180 + elif quality == 'high': + height = 240 + elif quality == 'ultra': + height = 480 + elif quality == '720p': + height = 720 + formats.append({ + 'url': video_url, + 'format': format, + 'height': height, + 'format_id': format, + }) + else: + resolution, bitrate_str = format + format = "-".join(format) + height = int(resolution.rstrip('Pp')) + tbr = int(bitrate_str.rstrip('Kk')) + formats.append({ + 'url': video_url, + 'resolution': resolution, + 'format': format, + 'tbr': tbr, + 'height': height, + 'format_id': format, + }) self._sort_formats(formats) age_limit = self._rta_search(webpage) From 551c7837ace81190ce9141551dceec24dfdae1bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Aug 2015 22:32:20 +0600 Subject: [PATCH 1703/2721] [spankwire] Simplify and properly format --- youtube_dl/extractor/spankwire.py | 105 +++++++++++++----------------- 1 file changed, 45 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 0f2d8d0de..0a47441b1 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -18,33 +18,34 @@ from ..aes import aes_decrypt_text class SpankwireIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?Pspankwire\.com/[^/]*/video(?P[0-9]+)/?)' _TESTS = [{ - 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', - 'md5': '8bbfde12b101204b39e4b9fe7eb67095', - 'info_dict': { - 'id': '103545', - 'ext': 'mp4', - 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch', - 'description': 'Crazy Bitch X rated music video.', - 'uploader': 'oreusz', - 'uploader_id': '124697', - 'upload_date': '20070507', - 'age_limit': 18, - } - }, - { - 'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/', - 'md5': '09b3c20833308b736ae8902db2f8d7e6', - 'info_dict': { - 'id': '1921551', - 'ext': 'mp4', - 'title': 'Titcums Compiloation I', - 'description': 'cum on tits', - 'uploader': 'dannyh78999', - 'uploader_id': '3056053', - 'upload_date': '20150822', - 'age_limit': 18, - } - }] + # download URL pattern: */P_K_.mp4 + 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', + 'md5': '8bbfde12b101204b39e4b9fe7eb67095', + 'info_dict': { + 'id': '103545', + 'ext': 'mp4', + 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch', + 'description': 'Crazy Bitch X rated music video.', + 'uploader': 'oreusz', + 'uploader_id': '124697', + 'upload_date': '20070507', + 'age_limit': 18, + } + }, { + # download URL pattern: */mp4__.mp4 + 'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/', + 'md5': '09b3c20833308b736ae8902db2f8d7e6', + 'info_dict': { + 'id': '1921551', + 'ext': 'mp4', + 'title': 'Titcums Compiloation I', + 'description': 'cum on tits', + 'uploader': 'dannyh78999', + 'uploader_id': '3056053', + 'upload_date': '20150822', + 'age_limit': 18, + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -81,9 +82,10 @@ class SpankwireIE(InfoExtractor): r']*>([\d,\.]+)', webpage, 'comment count', fatal=False)) - video_urls = list(map( - compat_urllib_parse_unquote, - re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage))) + videos = re.findall( + r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage) + heights = [int(video[0]) for video in videos] + video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos])) if webpage.find('flashvars\.encrypted = "true"') != -1: password = self._search_regex( r'flashvars\.video_title = "([^"]+)', @@ -93,39 +95,22 @@ class SpankwireIE(InfoExtractor): video_urls)) formats = [] - for video_url in video_urls: + for height, video_url in zip(heights, video_urls): path = compat_urllib_parse_urlparse(video_url).path - format = path.split('/')[4].split('_')[:2] - if format[0] == 'mp4': - format_id, quality = format - format = "-".join(format) - if quality == 'normal': - height = 180 - elif quality == 'high': - height = 240 - elif quality == 'ultra': - height = 480 - elif quality == '720p': - height = 720 - formats.append({ - 'url': video_url, - 'format': format, - 'height': height, - 'format_id': format, + _, quality = path.split('/')[4].split('_')[:2] + f = { + 'url': video_url, + 'height': height, + } + tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None) + if tbr: + f.update({ + 'tbr': int(tbr), + 'format_id': '%dp' % height, }) else: - resolution, bitrate_str = format - format = "-".join(format) - height = int(resolution.rstrip('Pp')) - tbr = int(bitrate_str.rstrip('Kk')) - formats.append({ - 'url': video_url, - 'resolution': resolution, - 'format': format, - 'tbr': tbr, - 'height': height, - 'format_id': format, - }) + f['format_id'] = quality + formats.append(f) self._sort_formats(formats) age_limit = self._rta_search(webpage) From 28b83495d898530e72d242874576f4d2d6d8ab3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Aug 2015 22:32:44 +0600 Subject: [PATCH 1704/2721] [spankwire] Simplify --- youtube_dl/extractor/spankwire.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 0a47441b1..609f78294 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -16,7 +16,7 @@ from ..aes import aes_decrypt_text class SpankwireIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?Pspankwire\.com/[^/]*/video(?P[0-9]+)/?)' + _VALID_URL = r'https?://(?:www\.)?(?Pspankwire\.com/[^/]*/video(?P[0-9]+)/?)' _TESTS = [{ # download URL pattern: */P_K_.mp4 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', @@ -49,10 +49,9 @@ class SpankwireIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') - url = 'http://www.' + mobj.group('url') + video_id = mobj.group('id') - req = compat_urllib_request.Request(url) + req = compat_urllib_request.Request('http://www.' + mobj.group('url')) req.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(req, video_id) From 90076b6172f25a36ca2a00c1b85cda169f2133c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 23 Aug 2015 22:33:26 +0600 Subject: [PATCH 1705/2721] [spankwire] Preserve old uploader pattern --- youtube_dl/extractor/spankwire.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index 609f78294..9e8fb35b2 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -68,7 +68,7 @@ class SpankwireIE(InfoExtractor): r'by:\s*]*>(.+?)', webpage, 'uploader', fatal=False) uploader_id = self._html_search_regex( - r'by:\s* on (.+?) at \d+:\d+', From e7c14660d3aef3a5a303a82dee7e11dfe063048d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 24 Aug 2015 00:36:24 +0600 Subject: [PATCH 1706/2721] [yandexmusic] Defer link resolve till actual download time to prevent link expiry (Closes #6650) --- youtube_dl/extractor/yandexmusic.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index f4c0f5702..85c495c11 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -1,4 +1,4 @@ -# coding=utf-8 +# coding: utf-8 from __future__ import unicode_literals import re @@ -64,7 +64,15 @@ class YandexMusicTrackIE(YandexMusicBaseIE): return self._get_track_info(track) -class YandexMusicAlbumIE(YandexMusicBaseIE): +class YandexMusicPlaylistBaseIE(InfoExtractor): + def _build_playlist(self, tracks): + return [ + self.url_result( + 'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id'])) + for track in tracks] + + +class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): IE_NAME = 'yandexmusic:album' IE_DESC = 'Яндекс.Музыка - Альбом' _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P\d+)/?(\?|$)' @@ -85,7 +93,7 @@ class YandexMusicAlbumIE(YandexMusicBaseIE): 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id, album_id, 'Downloading album JSON') - entries = [self._get_track_info(track) for track in album['volumes'][0]] + entries = self._build_playlist(album['volumes'][0]) title = '%s - %s' % (album['artists'][0]['name'], album['title']) year = album.get('year') @@ -95,7 +103,7 @@ class YandexMusicAlbumIE(YandexMusicBaseIE): return self.playlist_result(entries, compat_str(album['id']), title) -class YandexMusicPlaylistIE(YandexMusicBaseIE): +class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): IE_NAME = 'yandexmusic:playlist' IE_DESC = 'Яндекс.Музыка - Плейлист' _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P\d+)' @@ -120,8 +128,7 @@ class YandexMusicPlaylistIE(YandexMusicBaseIE): r'var\s+Mu\s*=\s*({.+?});\s*', webpage, 'player'), playlist_id)['pageData']['playlist'] - entries = [self._get_track_info(track) for track in playlist['tracks']] - return self.playlist_result( - entries, compat_str(playlist_id), + self._build_playlist(playlist['tracks']), + compat_str(playlist_id), playlist['title'], playlist.get('description')) From e4df2f98ccbe2e24785dd6883d7fd495193fd8e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 24 Aug 2015 00:36:54 +0600 Subject: [PATCH 1707/2721] [yandexmusic:track] Eliminate base class --- youtube_dl/extractor/yandexmusic.py | 36 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 85c495c11..91829be1c 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -12,7 +12,23 @@ from ..utils import ( ) -class YandexMusicBaseIE(InfoExtractor): +class YandexMusicTrackIE(InfoExtractor): + IE_NAME = 'yandexmusic:track' + IE_DESC = 'Яндекс.Музыка - Трек' + _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P\d+)/track/(?P\d+)' + + _TEST = { + 'url': 'http://music.yandex.ru/album/540508/track/4878838', + 'md5': 'f496818aa2f60b6c0062980d2e00dc20', + 'info_dict': { + 'id': '4878838', + 'ext': 'mp3', + 'title': 'Carlo Ambrosio - Gypsy Eyes 1', + 'filesize': 4628061, + 'duration': 193.04, + } + } + def _get_track_url(self, storage_dir, track_id): data = self._download_json( 'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s' @@ -35,24 +51,6 @@ class YandexMusicBaseIE(InfoExtractor): 'duration': float_or_none(track.get('durationMs'), 1000), } - -class YandexMusicTrackIE(YandexMusicBaseIE): - IE_NAME = 'yandexmusic:track' - IE_DESC = 'Яндекс.Музыка - Трек' - _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P\d+)/track/(?P\d+)' - - _TEST = { - 'url': 'http://music.yandex.ru/album/540508/track/4878838', - 'md5': 'f496818aa2f60b6c0062980d2e00dc20', - 'info_dict': { - 'id': '4878838', - 'ext': 'mp3', - 'title': 'Carlo Ambrosio - Gypsy Eyes 1', - 'filesize': 4628061, - 'duration': 193.04, - } - } - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) album_id, track_id = mobj.group('album_id'), mobj.group('id') From 11addc50ffa9ce65ac3bef7af6b1c38d7eae1af6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 23 Aug 2015 23:52:47 +0200 Subject: [PATCH 1708/2721] release 2015.08.23 --- docs/supportedsites.md | 6 +++++- youtube_dl/version.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9099e2da4..8d9db53a6 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -301,6 +301,7 @@ - **Moviezine** - **movshare**: MovShare - **MPORA** + - **MSNBC** - **MTV** - **mtviggy.com** - **mtvservices:embedded** @@ -308,6 +309,7 @@ - **MusicPlayOn** - **MusicVault** - **muzu.tv** + - **Mwave** - **MySpace** - **MySpace:album** - **MySpass** @@ -392,6 +394,8 @@ - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - **Playvid** - **Playwire** + - **pluralsight** + - **pluralsight:course** - **plus.google**: Google Plus - **pluzz.francetv.fr** - **podomatic** @@ -534,6 +538,7 @@ - **TF1** - **TheOnion** - **ThePlatform** + - **ThePlatformFeed** - **TheSixtyOne** - **ThisAmericanLife** - **ThisAV** @@ -599,7 +604,6 @@ - **Viddler** - **video.google:search**: Google Video search - **video.mit.edu** - - **VideoBam** - **VideoDetective** - **videofy.me** - **videolectures.net** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c090c6df7..394951ca7 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.08.16.1' +__version__ = '2015.08.23' From eba470f2f22389ab32164e4eb39067ceecf900f5 Mon Sep 17 00:00:00 2001 From: ping Date: Mon, 24 Aug 2015 16:30:00 +0800 Subject: [PATCH 1709/2721] [vlive] Remove upload_date extraction & cleanup --- youtube_dl/extractor/vlive.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index b3bbd80fb..6a403cc64 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -25,7 +25,6 @@ class VLiveIE(InfoExtractor): 'ext': 'mp4', 'title': '[V] Girl\'s Day\'s Broadcast', 'creator': 'Girl\'s Day', - 'upload_date': '20150817', }, } _SECRET = 'rFkwZet6pqk1vQt6SxxUkAHX7YL3lmqzUMrU4IDusTo4jEBdtOhNfT4BYYAdArwH' @@ -41,21 +40,14 @@ class VLiveIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) creator = self._html_search_regex( r'([^<>]+)', webpage, 'creator') - upload_date = self._html_search_regex( - r'(\d{4}\.\d{2}\.\d{2})', webpage, - 'upload date', default=None, fatal=False) - if upload_date: - upload_date = upload_date.replace('.', '') - + url = 'http://global.apis.naver.com/globalV/globalV/vod/%s/playinfo?' % video_id - msgpad = {'msgpad': '%.0f' % (time() * 1000)} - md = { - 'md': b64encode( - hmac.new(self._SECRET.encode('ascii'), - (url[:255] + msgpad['msgpad']).encode('ascii'), sha1).digest()) - } - url += '&' + compat_urllib_parse.urlencode(msgpad) + '&' + compat_urllib_parse.urlencode(md) - + msgpad = '%.0f' % (time() * 1000) + md = b64encode( + hmac.new(self._SECRET.encode('ascii'), + (url[:255] + msgpad).encode('ascii'), sha1).digest() + ) + url += '&' + compat_urllib_parse.urlencode({'msgpad': msgpad, 'md': md}) playinfo = self._download_json(url, video_id, 'Downloading video json') if playinfo.get('message', '') != 'success': @@ -89,6 +81,5 @@ class VLiveIE(InfoExtractor): 'creator': creator, 'thumbnail': thumbnail, 'formats': formats, - 'upload_date': upload_date, 'subtitles': subtitles, } From 95e431e9ec2477694d368a050222d6381a6f88ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 25 Aug 2015 21:08:38 +0600 Subject: [PATCH 1710/2721] [mailru] Skip tests --- youtube_dl/extractor/mailru.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index 54a14cb94..ab1300185 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -25,6 +25,7 @@ class MailRuIE(InfoExtractor): 'uploader_id': 'sonypicturesrus@mail.ru', 'duration': 184, }, + 'skip': 'Not accessible from Travis CI server', }, { 'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html', @@ -39,6 +40,7 @@ class MailRuIE(InfoExtractor): 'uploader_id': 'hitech@corp.mail.ru', 'duration': 245, }, + 'skip': 'Not accessible from Travis CI server', }, ] From ebbf078c7df575903ceb1be53e53533508c79dad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 25 Aug 2015 21:19:21 +0600 Subject: [PATCH 1711/2721] [krasview] Skip download for test --- youtube_dl/extractor/krasview.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py index 96f95979a..0ae8ebd68 100644 --- a/youtube_dl/extractor/krasview.py +++ b/youtube_dl/extractor/krasview.py @@ -25,6 +25,9 @@ class KrasViewIE(InfoExtractor): 'duration': 27, 'thumbnail': 're:^https?://.*\.jpg', }, + 'params': { + 'skip_download': 'Not accessible from Travis CI server', + }, } def _real_extract(self, url): From 6d53cdd6ce441dd7bc1d93bf1445f0594cfdffef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 25 Aug 2015 23:29:02 +0600 Subject: [PATCH 1712/2721] [yandexmusic] Skip removed tracks (#6666) --- youtube_dl/extractor/yandexmusic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 91829be1c..166cbf344 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -67,7 +67,7 @@ class YandexMusicPlaylistBaseIE(InfoExtractor): return [ self.url_result( 'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id'])) - for track in tracks] + for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)] class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): From baf510bf8cb296d2ed2a2f742ec9387d094623e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 00:11:15 +0600 Subject: [PATCH 1713/2721] [yandexmusic:playlist] Handle playlists with more than 150 tracks (Closes #6666) --- youtube_dl/extractor/yandexmusic.py | 51 +++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py index 166cbf344..4098e4629 100644 --- a/youtube_dl/extractor/yandexmusic.py +++ b/youtube_dl/extractor/yandexmusic.py @@ -5,7 +5,11 @@ import re import hashlib from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_urllib_parse, + compat_urllib_request, +) from ..utils import ( int_or_none, float_or_none, @@ -106,7 +110,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): IE_DESC = 'Яндекс.Музыка - Плейлист' _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245', 'info_dict': { 'id': '1245', @@ -114,19 +118,54 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9', }, 'playlist_count': 6, - } + }, { + # playlist exceeding the limit of 150 tracks shipped with webpage (see + # https://github.com/rg3/youtube-dl/issues/6666) + 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036', + 'info_dict': { + 'id': '1036', + 'title': 'Музыка 90-х', + }, + 'playlist_count': 310, + }] def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - playlist = self._parse_json( + mu = self._parse_json( self._search_regex( r'var\s+Mu\s*=\s*({.+?});\s*', webpage, 'player'), - playlist_id)['pageData']['playlist'] + playlist_id) + + playlist = mu['pageData']['playlist'] + tracks, track_ids = playlist['tracks'], playlist['trackIds'] + + # tracks dictionary shipped with webpage is limited to 150 tracks, + # missing tracks should be retrieved manually. + if len(tracks) < len(track_ids): + present_track_ids = set([compat_str(track['id']) for track in tracks if track.get('id')]) + missing_track_ids = set(map(compat_str, track_ids)) - set(present_track_ids) + request = compat_urllib_request.Request( + 'https://music.yandex.ru/handlers/track-entries.jsx', + compat_urllib_parse.urlencode({ + 'entries': ','.join(missing_track_ids), + 'lang': mu.get('settings', {}).get('lang', 'en'), + 'external-domain': 'music.yandex.ru', + 'overembed': 'false', + 'sign': mu.get('authData', {}).get('user', {}).get('sign'), + 'strict': 'true', + }).encode('utf-8')) + request.add_header('Referer', url) + request.add_header('X-Requested-With', 'XMLHttpRequest') + + missing_tracks = self._download_json( + request, playlist_id, 'Downloading missing tracks JSON', fatal=False) + if missing_tracks: + tracks.extend(missing_tracks) return self.playlist_result( - self._build_playlist(playlist['tracks']), + self._build_playlist(tracks), compat_str(playlist_id), playlist['title'], playlist.get('description')) From 4bc8eec4ebf5ffcca3b2e17c864be08df5215f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 26 Aug 2015 15:21:55 +0200 Subject: [PATCH 1714/2721] [youtube] Adapt player version regex to handle urls ending in '/html5player-new.js' It was always extracting 'new' as the version, breaking the cache system. --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8e2da46e3..ab6754154 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -660,7 +660,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_signature_function(self, video_id, player_url, example_sig): id_m = re.match( - r'.*?-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P[a-z]+)$', + r'.*?-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?)?\.(?P[a-z]+)$', player_url) if not id_m: raise ExtractorError('Cannot identify player %r' % player_url) @@ -1289,7 +1289,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_desc = 'flash player %s' % player_version else: player_version = self._search_regex( - r'html5player-([^/]+?)(?:/html5player)?\.js', + r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', player_url, 'html5 player', fatal=False) player_desc = 'html5 player %s' % player_version From 2f72e83bbd915054cac0e8f70df0c2cab4b9c116 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 20:47:57 +0600 Subject: [PATCH 1715/2721] [crunchyroll] Detect required login (#6677) --- youtube_dl/extractor/crunchyroll.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 33a033a7f..98d1881ae 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -237,7 +237,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text webpage_url = 'http://www.' + mobj.group('url') webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage') - note_m = self._html_search_regex(r'
    (.+?)
    ', webpage, 'trailer-notice', default='') + note_m = self._html_search_regex( + r'
    (.+?)
    ', + webpage, 'trailer-notice', default='') if note_m: raise ExtractorError(note_m) @@ -247,6 +249,12 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text if msg.get('type') == 'error': raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) + if 'To view this, please log in to verify you are 18 or older.' in webpage: + raise ExtractorError( + 'This video is only available for registered users, ' + 'use --username and --password options to provide account credentials.', + expected=True) + video_title = self._html_search_regex(r']*>(.+?)

    ', webpage, 'video_title', flags=re.DOTALL) video_title = re.sub(r' {2,}', ' ', video_title) video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') From 43e7d3c9453338ae29552311b1447fe95be05db5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 21:24:47 +0600 Subject: [PATCH 1716/2721] [extractor/common] Add raise_login_required --- youtube_dl/extractor/common.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5d24bcb6a..39cef9c5b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -510,6 +510,12 @@ class InfoExtractor(object): """Report attempt to log in.""" self.to_screen('Logging in') + @staticmethod + def raise_login_required(msg='This video is only available for registered users'): + raise ExtractorError( + '%s. Use --username and --password or --netrc to provide account credentials.' % msg, + expected=True) + # Methods for following #608 @staticmethod def url_result(url, ie=None, video_id=None, video_title=None): From bbb43a39fd11c2fdf28ae593eaa994f22ce663bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 21:25:04 +0600 Subject: [PATCH 1717/2721] [crunchyroll] Use raise_login_required --- youtube_dl/extractor/crunchyroll.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 98d1881ae..801b9b48e 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -250,10 +250,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) if 'To view this, please log in to verify you are 18 or older.' in webpage: - raise ExtractorError( - 'This video is only available for registered users, ' - 'use --username and --password options to provide account credentials.', - expected=True) + self.raise_login_required(video_id) video_title = self._html_search_regex(r']*>(.+?)

    ', webpage, 'video_title', flags=re.DOTALL) video_title = re.sub(r' {2,}', ' ', video_title) From 3c53455d15035a94bcd2bc915f565420e1a4279f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 21:25:37 +0600 Subject: [PATCH 1718/2721] [eroprofile] Use raise_login_required --- youtube_dl/extractor/eroprofile.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py index 316033cf1..7fcd0151d 100644 --- a/youtube_dl/extractor/eroprofile.py +++ b/youtube_dl/extractor/eroprofile.py @@ -71,8 +71,7 @@ class EroProfileIE(InfoExtractor): m = re.search(r'You must be logged in to view this video\.', webpage) if m: - raise ExtractorError( - 'This video requires login. Please specify a username and password and try again.', expected=True) + self.raise_login_required('This video requires login') video_id = self._search_regex( [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], From 62984e4584c2962e622514c7d6a475636a8c21d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 21:25:53 +0600 Subject: [PATCH 1719/2721] [lynda] Use raise_login_required --- youtube_dl/extractor/lynda.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 5b9157ed4..378117270 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -118,9 +118,7 @@ class LyndaIE(LyndaBaseIE): 'lynda returned error: %s' % video_json['Message'], expected=True) if video_json['HasAccess'] is False: - raise ExtractorError( - 'Video %s is only available for members. ' - % video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True) + self.raise_login_required('Video %s is only available for members' % video_id) video_id = compat_str(video_json['ID']) duration = video_json['DurationInSeconds'] From e7ddaef5bd209dd8d24b0025631cde1f5969e71d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 21:26:09 +0600 Subject: [PATCH 1720/2721] [pluralsight] Use raise_login_required --- youtube_dl/extractor/pluralsight.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 7ba396aef..fd32836cc 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -41,9 +41,7 @@ class PluralsightIE(InfoExtractor): def _login(self): (username, password) = self._get_login_info() if username is None: - raise ExtractorError( - 'Pluralsight account is required, use --username and --password options to provide account credentials.', - expected=True) + self.raise_login_required('Pluralsight account is required') login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') From e269d3ae7dbebb22d5b51bd5e6d477a69ae4f3b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 21:26:24 +0600 Subject: [PATCH 1721/2721] [safari] Use raise_login_required --- youtube_dl/extractor/safari.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index f3c80708c..a602af692 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -20,7 +20,6 @@ from ..utils import ( class SafariBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/' _SUCCESSFUL_LOGIN_REGEX = r']*>Sign Out' - _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com' _NETRC_MACHINE = 'safari' _API_BASE = 'https://www.safaribooksonline.com/api/v1/book' @@ -37,9 +36,7 @@ class SafariBaseIE(InfoExtractor): def _login(self): (username, password) = self._get_login_info() if username is None: - raise ExtractorError( - self._ACCOUNT_CREDENTIALS_HINT, - expected=True) + self.raise_login_required('safaribooksonline.com account is required') headers = std_headers if 'Referer' not in headers: From 42e7373bd3c819ee7cebf5898e4bdd33730dde6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 21:26:35 +0600 Subject: [PATCH 1722/2721] [smotri] Use raise_login_required --- youtube_dl/extractor/smotri.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py index 93a7cfe15..35a81ee87 100644 --- a/youtube_dl/extractor/smotri.py +++ b/youtube_dl/extractor/smotri.py @@ -330,10 +330,7 @@ class SmotriBroadcastIE(InfoExtractor): (username, password) = self._get_login_info() if username is None: - raise ExtractorError( - 'Erotic broadcasts allowed only for registered users, ' - 'use --username and --password options to provide account credentials.', - expected=True) + self.raise_login_required('Erotic broadcasts allowed only for registered users') login_form = { 'login-hint53': '1', From 61a7ff16222accdb259f771d0a6f0adb229b34dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 21:26:47 +0600 Subject: [PATCH 1723/2721] [tubitv] Use raise_login_required --- youtube_dl/extractor/tubitv.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dl/extractor/tubitv.py index 2c4b21807..4f86b3ee9 100644 --- a/youtube_dl/extractor/tubitv.py +++ b/youtube_dl/extractor/tubitv.py @@ -60,9 +60,7 @@ class TubiTvIE(InfoExtractor): webpage = self._download_webpage(url, video_id) if re.search(r"<(?:DIV|div) class='login-required-screen'>", webpage): - raise ExtractorError( - 'This video requires login, use --username and --password ' - 'options to provide account credentials.', expected=True) + self.raise_login_required('This video requires login') title = self._og_search_title(webpage) description = self._og_search_description(webpage) From a882c5f4747c527bb50d87828ea4cceae6d12533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 21:27:07 +0600 Subject: [PATCH 1724/2721] [udemy] Use raise_login_required --- youtube_dl/extractor/udemy.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 4a0eaf65f..365d8b4bf 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -70,9 +70,7 @@ class UdemyIE(InfoExtractor): def _login(self): (username, password) = self._get_login_info() if username is None: - raise ExtractorError( - 'Udemy account is required, use --username and --password options to provide account credentials.', - expected=True) + self.raise_login_required('Udemy account is required') login_popup = self._download_webpage( self._LOGIN_URL, None, 'Downloading login popup') From 39affb5aa427a3a1830e97523470d11bfdbd067e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 21:27:57 +0600 Subject: [PATCH 1725/2721] [crunchyroll] Fix typo --- youtube_dl/extractor/crunchyroll.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 801b9b48e..c2162aa68 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -250,7 +250,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) if 'To view this, please log in to verify you are 18 or older.' in webpage: - self.raise_login_required(video_id) + self.raise_login_required() video_title = self._html_search_regex(r']*>(.+?)', webpage, 'video_title', flags=re.DOTALL) video_title = re.sub(r' {2,}', ' ', video_title) From 3d8132f5e20b7cbdaa8f69aca482553b2c02bed8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 22:03:58 +0600 Subject: [PATCH 1726/2721] [shared] Extend _VALID_URL to support vivo.sx (Closes #6681) --- youtube_dl/extractor/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index a07677686..000ef1a07 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -14,7 +14,7 @@ from ..utils import ( class SharedIE(InfoExtractor): - _VALID_URL = r'http://shared\.sx/(?P[\da-z]{10})' + _VALID_URL = r'http://(?:shared|vivo)\.sx/(?P[\da-z]{10})' _TEST = { 'url': 'http://shared.sx/0060718775', From 70113c38c9e551d7d9ab2a4d1f7e76c81b68ae76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 22:04:39 +0600 Subject: [PATCH 1727/2721] [shared] Clarify IE_DESC --- youtube_dl/extractor/shared.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 000ef1a07..cf0a3bfef 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -14,6 +14,7 @@ from ..utils import ( class SharedIE(InfoExtractor): + IE_DESC = 'shared.sx and vivo.sx' _VALID_URL = r'http://(?:shared|vivo)\.sx/(?P[\da-z]{10})' _TEST = { From f62e02c24f1f0e0488b40df178ddb9bb5fdf9fc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 22:05:45 +0600 Subject: [PATCH 1728/2721] [shared] Add test for vivo --- youtube_dl/extractor/shared.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index cf0a3bfef..4fa991dff 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -17,7 +17,7 @@ class SharedIE(InfoExtractor): IE_DESC = 'shared.sx and vivo.sx' _VALID_URL = r'http://(?:shared|vivo)\.sx/(?P[\da-z]{10})' - _TEST = { + _TESTS = [{ 'url': 'http://shared.sx/0060718775', 'md5': '106fefed92a8a2adb8c98e6a0652f49b', 'info_dict': { @@ -25,7 +25,16 @@ class SharedIE(InfoExtractor): 'ext': 'mp4', 'title': 'Bmp4', }, - } + }, { + 'url': 'http://vivo.sx/d7ddda0e78', + 'md5': '15b3af41be0b4fe01f4df075c2678b2c', + 'info_dict': { + 'id': 'd7ddda0e78', + 'ext': 'mp4', + 'title': 'Chicken', + 'filesize': 528031, + }, + }] def _real_extract(self, url): video_id = self._match_id(url) From f11c316347bea41d9148d1c8d5d7738a594a06d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Aug 2015 22:06:10 +0600 Subject: [PATCH 1729/2721] [shared] Add filesize to test --- youtube_dl/extractor/shared.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 4fa991dff..c5636e8e9 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -24,6 +24,7 @@ class SharedIE(InfoExtractor): 'id': '0060718775', 'ext': 'mp4', 'title': 'Bmp4', + 'filesize': 1720110, }, }, { 'url': 'http://vivo.sx/d7ddda0e78', From d7e8264517d29156697f82b7761dc99d13994c21 Mon Sep 17 00:00:00 2001 From: nmrugg Date: Thu, 27 Aug 2015 23:24:13 +0800 Subject: [PATCH 1730/2721] Make FoxBusiness work. --- youtube_dl/extractor/foxnews.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 917f76b1e..7de88ab66 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( parse_iso8601, @@ -8,7 +10,7 @@ from ..utils import ( class FoxNewsIE(InfoExtractor): - _VALID_URL = r'https?://video\.foxnews\.com/v/(?:video-embed\.html\?video_id=)?(?P\d+)' + _VALID_URL = r'https?://video\.fox(?:news|business)\.com/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ { 'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', @@ -47,8 +49,10 @@ class FoxNewsIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + m = re.match(r'^https?://video\.fox(news|business)', url) + video = self._download_json( - 'http://video.foxnews.com/v/feed/video/%s.js?template=fox' % video_id, video_id) + 'http://video.fox' + m.group(1) + '.com/v/feed/video/%s.js?template=fox' % video_id, video_id) item = video['channel']['item'] title = item['title'] From 8df8c278b6d5e2b5a350446690873dc9f5f48aff Mon Sep 17 00:00:00 2001 From: nmrugg Date: Thu, 27 Aug 2015 23:24:28 +0800 Subject: [PATCH 1731/2721] Added matching test for FoxBusiness. --- youtube_dl/extractor/foxnews.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 7de88ab66..a8902c960 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -44,6 +44,10 @@ class FoxNewsIE(InfoExtractor): 'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com', 'only_matching': True, }, + { + 'url': 'http://video.foxbusiness.com/v/4442309889001', + 'only_matching': True, + }, ] def _real_extract(self, url): From 1b660cce120c733f2bb195ef1cfe2ff2421b439f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Aug 2015 21:48:03 +0600 Subject: [PATCH 1732/2721] [foxnews] Simplify (Closes #6694) --- youtube_dl/extractor/foxnews.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index a8902c960..244c75f0b 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -10,7 +10,7 @@ from ..utils import ( class FoxNewsIE(InfoExtractor): - _VALID_URL = r'https?://video\.fox(?:news|business)\.com/v/(?:video-embed\.html\?video_id=)?(?P\d+)' + _VALID_URL = r'https?://(?Pvideo\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ { 'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', @@ -51,12 +51,12 @@ class FoxNewsIE(InfoExtractor): ] def _real_extract(self, url): - video_id = self._match_id(url) - - m = re.match(r'^https?://video\.fox(news|business)', url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + host = mobj.group('host') video = self._download_json( - 'http://video.fox' + m.group(1) + '.com/v/feed/video/%s.js?template=fox' % video_id, video_id) + 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id) item = video['channel']['item'] title = item['title'] From 5307c332329d6a1f3eec240b66a4f11905889f23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 27 Aug 2015 21:48:47 +0600 Subject: [PATCH 1733/2721] [foxnews] Clarify IE_DESC --- youtube_dl/extractor/foxnews.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 244c75f0b..3a4a59135 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -10,6 +10,7 @@ from ..utils import ( class FoxNewsIE(InfoExtractor): + IE_DESC = 'Fox News and Fox Business Video' _VALID_URL = r'https?://(?Pvideo\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ { From a4962b80d668de704fc347d5e76587be0e95dfef Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 28 Aug 2015 05:04:39 +0200 Subject: [PATCH 1734/2721] release 2015.08.28 --- docs/supportedsites.md | 4 ++-- youtube_dl/version.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 8d9db53a6..328a819b3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -166,7 +166,7 @@ - **Folketinget**: Folketinget (ft.dk; Danish parliament) - **FootyRoom** - **Foxgay** - - **FoxNews** + - **FoxNews**: Fox News and Fox Business Video - **FoxSports** - **france2.fr:generation-quoi** - **FranceCulture** @@ -465,7 +465,7 @@ - **Sexu** - **SexyKarma**: Sexy Karma and Watch Indian Porn - **Shahid** - - **Shared** + - **Shared**: shared.sx and vivo.sx - **ShareSix** - **Sina** - **Slideshare** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 394951ca7..a07bc9233 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.08.23' +__version__ = '2015.08.28' From 071c10137b6b17b79ecfc8676736d5cc243022f6 Mon Sep 17 00:00:00 2001 From: Paul Hartmann Date: Wed, 26 Aug 2015 00:06:44 +0200 Subject: [PATCH 1735/2721] [MTV] move German mtv site to new class --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/mtv.py | 37 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d59882598..66422b005 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -340,6 +340,7 @@ from .mtv import ( MTVIE, MTVServicesEmbeddedIE, MTVIggyIE, + MTVDEIE, ) from .muenchentv import MuenchenTVIE from .musicplayon import MusicPlayOnIE diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index b48fac5e3..15df62649 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -288,3 +288,40 @@ class MTVIggyIE(MTVServicesInfoExtractor): } } _FEED_URL = 'http://all.mtvworldverticals.com/feed-xml/' + +class MTVDEIE(MTVServicesInfoExtractor): + IE_NAME = 'mtv.de' + _VALID_URL = r'''(?x)^https?://(?:www\.)?mtv\.de(?P/artists/.*)''' + _TESTS = [ + { + 'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum', + 'info_dict': { + 'id': 'a50bc5f0b3aa4b3190aa', + 'ext': 'mp4', + 'title': 'cro-traum', + 'description': 'Cro - Traum', + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + return self._get_videos_info(url, mobj.group('video_path')) + + def _get_videos_info(self, url, video_path): + webpage = self._download_webpage(url, video_path) + playlist_js = self._search_regex(r'|$)', + webpage, 'videoplayer applet', default=None) + if config_json: + config = self._parse_json(config_json, display_id, fatal=False) + if config: + sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi') + if sapi: + return self._extract_info(display_id, sapi, webpage) + items_json = self._search_regex( r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, default=None) @@ -190,22 +217,10 @@ class YahooIE(InfoExtractor): video_id = info['id'] return self._get_info(video_id, display_id, webpage) - def _get_info(self, video_id, display_id, webpage): - region = self._search_regex( - r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', - webpage, 'region', fatal=False, default='US') - data = compat_urllib_parse.urlencode({ - 'protocol': 'http', - 'region': region, - }) - query_url = ( - 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' - '{id}?{data}'.format(id=video_id, data=data)) - query_result = self._download_json( - query_url, display_id, 'Downloading video info') - - info = query_result['query']['results']['mediaObj'][0] + def _extract_info(self, display_id, query, webpage): + info = query['query']['results']['mediaObj'][0] meta = info.get('meta') + video_id = info.get('id') if not meta: msg = info['status'].get('msg') @@ -231,6 +246,9 @@ class YahooIE(InfoExtractor): 'ext': 'flv', }) else: + if s.get('format') == 'm3u8_playlist': + format_info['protocol'] = 'm3u8_native' + format_info['ext'] = 'mp4' format_url = compat_urlparse.urljoin(host, path) format_info['url'] = format_url formats.append(format_info) @@ -264,6 +282,21 @@ class YahooIE(InfoExtractor): 'subtitles': subtitles, } + def _get_info(self, video_id, display_id, webpage): + region = self._search_regex( + r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"', + webpage, 'region', fatal=False, default='US') + data = compat_urllib_parse.urlencode({ + 'protocol': 'http', + 'region': region, + }) + query_url = ( + 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + '{id}?{data}'.format(id=video_id, data=data)) + query_result = self._download_json( + query_url, display_id, 'Downloading video info') + return self._extract_info(display_id, query_result, webpage) + class YahooSearchIE(SearchInfoExtractor): IE_DESC = 'Yahoo screen search' From 1721fef28b89ac4264db978ab7fee3b4dd154056 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 10 Sep 2015 02:58:40 +0600 Subject: [PATCH 1871/2721] [yahoo] Fix test --- youtube_dl/extractor/yahoo.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 1d9b98750..fca5ddc69 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -147,6 +147,7 @@ class YahooIE(InfoExtractor): }, { # Query result is embedded in webpage, but explicit request to video API fails with geo restriction 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html', + 'md5': '4fbafb9c9b6f07aa8f870629f6671b35', 'info_dict': { 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504', 'ext': 'mp4', @@ -154,10 +155,6 @@ class YahooIE(InfoExtractor): 'description': 'md5:8fc39608213295748e1e289807838c97', 'duration': 1646, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, } ] From 689fb748ee1ba8e61f99d21a3bcb1bc83b708649 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 11 Sep 2015 04:44:17 +0100 Subject: [PATCH 1872/2721] [utlis] add extract_attributes for extracting html tags attributes --- youtube_dl/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 206dd56bc..bcebf9cc5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -248,6 +248,14 @@ def get_element_by_attribute(attribute, value, html): return unescapeHTML(res) +def extract_attributes(attributes_str, attributes_regex=r'(?s)\s*([^\s=]+)\s*=\s*["\']([^"\']+)["\']'): + attributes = re.findall(attributes_regex, attributes_str) + attributes_dict = {} + if attributes: + attributes_dict = {attribute_name: attribute_value for (attribute_name, attribute_value) in attributes} + return attributes_dict + + def clean_html(html): """Clean an HTML snippet into a readable string""" From ed1269000f24a6ddc683a295ff402ef3ded5c4fb Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 11 Sep 2015 04:46:21 +0100 Subject: [PATCH 1873/2721] [brightcove] add support for brightcove in page embed(fixes #6824) --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/brightcove.py | 92 ++++++++++++++++++++++++++++++ youtube_dl/extractor/generic.py | 21 ++++++- 3 files changed, 116 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 57f55b479..fcd9edec3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -59,7 +59,10 @@ from .bloomberg import BloombergIE from .bpb import BpbIE from .br import BRIE from .breakcom import BreakIE -from .brightcove import BrightcoveIE +from .brightcove import ( + BrightcoveIE, + BrightcoveInPageEmbedIE, +) from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 4721c2293..a07c0888f 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -22,6 +22,10 @@ from ..utils import ( fix_xml_ampersands, unescapeHTML, unsmuggle_url, + js_to_json, + int_or_none, + parse_iso8601, + extract_attributes, ) @@ -346,3 +350,91 @@ class BrightcoveIE(InfoExtractor): if 'url' not in info and not info.get('formats'): raise ExtractorError('Unable to extract video url for %s' % info['id']) return info + + +class BrightcoveInPageEmbedIE(InfoExtractor): + _VALID_URL = r'https?://players\.brightcove\.net/(?P\d+)/([a-z0-9-]+)_([a-z]+)/index.html?.*videoId=(?P\d+)' + TEST = { + 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001', + 'info_dict': { + 'id': '4463358922001', + 'ext': 'flv', + 'title': 'Meet the man behind Popcorn Time', + 'description': 'md5:a950cc4285c43e44d763d036710cd9cd', + 'duration': 165768, + } + } + + @staticmethod + def _extract_url(webpage): + video_attributes = re.search(r'(?s)]*)>.*?', webpage) + if video_attributes: + video_attributes = extract_attributes(video_attributes.group(), r'(?s)\s*data-(account|video-id|playlist-id|policy-key|player|embed)\s*=\s*["\']([^"\']+)["\']') + account_id = video_attributes.get('account') + player_id = video_attributes.get('player') + embed = video_attributes.get('embed') + video_id = video_attributes.get('video-id') + if account_id and player_id and embed and video_id: + return 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (account_id, player_id, embed, video_id) + return None + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + account_id, player_id, embed, video_id = mobj.groups() + + webpage = self._download_webpage('http://players.brightcove.net/%s/%s_%s/index.min.js' % (account_id, player_id, embed), video_id) + + catalog = self._parse_json( + js_to_json( + self._search_regex( + r'catalog\(({[^}]+})\);', + webpage, + 'catalog' + ) + ), + video_id + ) + policy_key = catalog['policyKey'] + + req = compat_urllib_request.Request( + 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id), + headers={'Accept': 'application/json;pk=%s' % policy_key}) + json_data = self._download_json(req, video_id) + + title = json_data['name'] + description = json_data.get('description') + thumbnail = json_data.get('name') + timestamp = parse_iso8601(json_data.get('published_at')) + duration = int_or_none(json_data.get('duration')) + + formats = [] + for source in json_data.get('sources'): + source_type = source.get('type') + if source_type == 'application/x-mpegURL': + formats.extend(self._extract_m3u8_formats(source.get('src'), video_id)) + else: + src = source.get('src') + if src: + formats.append({ + 'url': src, + 'abr': source.get('avg_bitrate'), + 'width': int_or_none(source.get('width')), + 'height': int_or_none(source.get('height')), + 'filesize': source.get('size'), + 'container': source.get('container'), + 'vcodec': source.get('container'), + }) + else: + formats.extend(self._extract_f4m_formats(source.get('streaming_src'), video_id)) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, + } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ec748ed9f..7a3a7f66b 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -29,7 +29,10 @@ from ..utils import ( url_basename, xpath_text, ) -from .brightcove import BrightcoveIE +from .brightcove import ( + BrightcoveIE, + BrightcoveInPageEmbedIE, +) from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE @@ -1012,6 +1015,17 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'cinemasnob', }, + }, + # BrightcoveInPageEmbed embed + { + 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/', + 'info_dict': { + 'id': '4238694884001', + 'ext': 'flv', + 'title': 'Tabletop: Dread, Last Thoughts', + 'description': 'Tabletop: Dread, Last Thoughts', + 'duration': 51690, + }, } ] @@ -1288,6 +1302,11 @@ class GenericIE(InfoExtractor): 'entries': entries, } + # Look for Brightcove In Page Embed: + brightcove_in_page_embed_url = BrightcoveInPageEmbedIE._extract_url(webpage) + if brightcove_in_page_embed_url: + return self.url_result(brightcove_in_page_embed_url, 'BrightcoveInPageEmbed') + # Look for embedded rtl.nl player matches = re.findall( r']+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', From 73eb13dfc74132b8f0e5c1ac1ea75f66e0aca6bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 11 Sep 2015 20:43:05 +0600 Subject: [PATCH 1874/2721] [extractor/common] Case insensitive inputs extraction --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5eeeda08d..835f6f368 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -732,7 +732,7 @@ class InfoExtractor(object): @staticmethod def _hidden_inputs(html): hidden_inputs = {} - for input in re.findall(r']+)>', html): + for input in re.findall(r'(?i)]+)>', html): if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): continue name = re.search(r'name=(["\'])(?P.+?)\1', input) @@ -746,7 +746,7 @@ class InfoExtractor(object): def _form_hidden_inputs(self, form_id, html): form = self._search_regex( - r'(?s)]+?id=(["\'])%s\1[^>]*>(?P
    .+?)
    ' % form_id, + r'(?is)]+?id=(["\'])%s\1[^>]*>(?P
    .+?)
    ' % form_id, html, '%s form' % form_id, group='form') return self._hidden_inputs(form) From 586f1cc532d167c28e733779cbf132b94d8f76e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 11 Sep 2015 21:07:32 +0600 Subject: [PATCH 1875/2721] [extractor/common] Skip html comment tags (Closes #6822) --- youtube_dl/extractor/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 835f6f368..d694e818e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -731,6 +731,7 @@ class InfoExtractor(object): @staticmethod def _hidden_inputs(html): + html = re.sub(r'', '', html) hidden_inputs = {} for input in re.findall(r'(?i)]+)>', html): if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): From 60ed60353b9ca57e8181f0b14d525ce487e673ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Sep 2015 20:34:48 +0600 Subject: [PATCH 1876/2721] [openfilm] Remove extractor OpenFilm has been shut down --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/openfilm.py | 70 -------------------------------- 2 files changed, 71 deletions(-) delete mode 100644 youtube_dl/extractor/openfilm.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 57f55b479..2e7272931 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -432,7 +432,6 @@ from .ooyala import ( OoyalaIE, OoyalaExternalIE, ) -from .openfilm import OpenFilmIE from .orf import ( ORFTVthekIE, ORFOE1IE, diff --git a/youtube_dl/extractor/openfilm.py b/youtube_dl/extractor/openfilm.py deleted file mode 100644 index d2ceedd01..000000000 --- a/youtube_dl/extractor/openfilm.py +++ /dev/null @@ -1,70 +0,0 @@ -from __future__ import unicode_literals - -import json - -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote_plus -from ..utils import ( - parse_iso8601, - parse_age_limit, - int_or_none, -) - - -class OpenFilmIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)openfilm\.com/videos/(?P.+)' - _TEST = { - 'url': 'http://www.openfilm.com/videos/human-resources-remastered', - 'md5': '42bcd88c2f3ec13b65edf0f8ad1cac37', - 'info_dict': { - 'id': '32736', - 'display_id': 'human-resources-remastered', - 'ext': 'mp4', - 'title': 'Human Resources (Remastered)', - 'description': 'Social Engineering in the 20th Century.', - 'thumbnail': 're:^https?://.*\.jpg$', - 'duration': 7164, - 'timestamp': 1334756988, - 'upload_date': '20120418', - 'uploader_id': '41117', - 'view_count': int, - 'age_limit': 0, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - player = compat_urllib_parse_unquote_plus( - self._og_search_video_url(webpage)) - - video = json.loads(self._search_regex( - r'\bp=({.+?})(?:&|$)', player, 'video JSON')) - - video_url = '%s1.mp4' % video['location'] - video_id = video.get('video_id') - display_id = video.get('alias') or display_id - title = video.get('title') - description = video.get('description') - thumbnail = video.get('main_thumb') - duration = int_or_none(video.get('duration')) - timestamp = parse_iso8601(video.get('dt_published'), ' ') - uploader_id = video.get('user_id') - view_count = int_or_none(video.get('views_count')) - age_limit = parse_age_limit(video.get('age_limit')) - - return { - 'id': video_id, - 'display_id': display_id, - 'url': video_url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'uploader_id': uploader_id, - 'view_count': view_count, - 'age_limit': age_limit, - } From 41ebd6530b124b9265a3df9d7d09aef02041b088 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 12 Sep 2015 22:42:57 +0800 Subject: [PATCH 1877/2721] [tudou] Add the test case (#6273) --- youtube_dl/extractor/tudou.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index e800477e2..950c42afb 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -27,6 +27,9 @@ class TudouIE(InfoExtractor): 'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', 'thumbnail': 're:^https?://.*\.jpg$', } + }, { + 'url': 'http://www.tudou.com/albumplay/cJAHGih4yYg.html', + 'only_matching': True, }] _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' From 94e507aea798dac6974237cc44257dda45d5fa5a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 12 Sep 2015 22:45:09 +0800 Subject: [PATCH 1878/2721] [tudou] A more comprehensive _VALID_URL --- youtube_dl/extractor/tudou.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 950c42afb..68712cb4a 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -9,7 +9,7 @@ from .common import InfoExtractor class TudouIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/?.*/(?P[^/?#]+?)(?:\.html)?/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/([^/]+/)*(?P[^/?#]+?)(?:\.html)?/?(?:$|[?#])' _TESTS = [{ 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', 'md5': '140a49ed444bd22f93330985d8475fcb', From 141ba36996f77a420df69903a59792f6f93ae314 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 12 Sep 2015 22:51:49 +0800 Subject: [PATCH 1879/2721] [tudou] Modernize --- youtube_dl/extractor/tudou.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 68712cb4a..c9d80a7ef 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -2,9 +2,6 @@ from __future__ import unicode_literals -import re -import json - from .common import InfoExtractor @@ -46,13 +43,10 @@ class TudouIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - m = re.search(r'vcode:\s*[\'"](.+?)[\'"]', webpage) - if m and m.group(1): - return { - '_type': 'url', - 'url': 'youku:' + m.group(1), - 'ie_key': 'Youku' - } + youku_vcode = self._search_regex( + r'vcode:\s*[\'"](.+?)[\'"]', webpage, 'youku vcode', default=None) + if youku_vcode: + return self.url_result('youku:' + youku_vcode, ie='Youku') title = self._search_regex( r",kw:\s*['\"](.+?)[\"']", webpage, 'title') @@ -63,8 +57,8 @@ class TudouIE(InfoExtractor): r"playerUrl\s*:\s*['\"](.+?\.swf)[\"']", webpage, 'player URL', default=self._PLAYER_URL) - segs_json = self._search_regex(r'segs: \'(.*)\'', webpage, 'segments') - segments = json.loads(segs_json) + segments = self._parse_json(self._search_regex( + r'segs: \'(.*)\'', webpage, 'segments'), video_id) # It looks like the keys are the arguments that have to be passed as # the hd field in the request url, we pick the higher # Also, filter non-number qualities (see issue #3643). From aab135516b288f24c55178b024976fd3e130c7b8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 12 Sep 2015 22:52:51 +0800 Subject: [PATCH 1880/2721] [tudou] Avoid shadowing builtin names --- youtube_dl/extractor/tudou.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index c9d80a7ef..6116b209d 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -31,11 +31,11 @@ class TudouIE(InfoExtractor): _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' - def _url_for_id(self, id, quality=None): - info_url = "http://v2.tudou.com/f?id=" + str(id) + def _url_for_id(self, video_id, quality=None): + info_url = "http://v2.tudou.com/f?id=" + str(video_id) if quality: info_url += '&hd' + quality - webpage = self._download_webpage(info_url, id, "Opening the info webpage") + webpage = self._download_webpage(info_url, video_id, "Opening the info webpage") final_url = self._html_search_regex('>(.+?)', webpage, 'video url') return final_url From 87813a857009dc3c3dfcc421679e5e806d363863 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 13 Sep 2015 02:36:51 +0800 Subject: [PATCH 1881/2721] [tudou] Use _download_xml --- youtube_dl/extractor/tudou.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 6116b209d..3b993192c 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -35,8 +35,8 @@ class TudouIE(InfoExtractor): info_url = "http://v2.tudou.com/f?id=" + str(video_id) if quality: info_url += '&hd' + quality - webpage = self._download_webpage(info_url, video_id, "Opening the info webpage") - final_url = self._html_search_regex('>(.+?)', webpage, 'video url') + xml_data = self._download_xml(info_url, video_id, "Opening the info XML page") + final_url = xml_data.text return final_url def _real_extract(self, url): From 349b3a2ea0d6c264facacd92508516e8530108b2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 13 Sep 2015 02:51:20 +0800 Subject: [PATCH 1882/2721] [tudou] Improve regexs --- youtube_dl/extractor/tudou.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 3b993192c..53ba8511f 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -44,21 +44,21 @@ class TudouIE(InfoExtractor): webpage = self._download_webpage(url, video_id) youku_vcode = self._search_regex( - r'vcode:\s*[\'"](.+?)[\'"]', webpage, 'youku vcode', default=None) + r'vcode\s*:\s*[\'"]([^\'"]*)[\'"]', webpage, 'youku vcode', default=None) if youku_vcode: return self.url_result('youku:' + youku_vcode, ie='Youku') title = self._search_regex( - r",kw:\s*['\"](.+?)[\"']", webpage, 'title') + r',kw\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'title') thumbnail_url = self._search_regex( - r",pic:\s*[\"'](.+?)[\"']", webpage, 'thumbnail URL', fatal=False) + r',pic\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'thumbnail URL', fatal=False) player_url = self._search_regex( - r"playerUrl\s*:\s*['\"](.+?\.swf)[\"']", + r'playerUrl\s*:\s*[\'"]([^\'"]+\.swf)[\'"]', webpage, 'player URL', default=self._PLAYER_URL) segments = self._parse_json(self._search_regex( - r'segs: \'(.*)\'', webpage, 'segments'), video_id) + r'segs: \'([^\']+)\'', webpage, 'segments'), video_id) # It looks like the keys are the arguments that have to be passed as # the hd field in the request url, we pick the higher # Also, filter non-number qualities (see issue #3643). From b264c2130221912adfc7cc35d73c2a88d79eafeb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 13 Sep 2015 02:57:14 +0800 Subject: [PATCH 1883/2721] [tudou] Use single quotes and compat_str --- youtube_dl/extractor/tudou.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 53ba8511f..5f7ac4b35 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str class TudouIE(InfoExtractor): @@ -32,7 +33,7 @@ class TudouIE(InfoExtractor): _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' def _url_for_id(self, video_id, quality=None): - info_url = "http://v2.tudou.com/f?id=" + str(video_id) + info_url = 'http://v2.tudou.com/f?id=' + compat_str(video_id) if quality: info_url += '&hd' + quality xml_data = self._download_xml(info_url, video_id, "Opening the info XML page") From 2ffe3bc14b5e65c902fe5ddd610143c791edaa52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Sep 2015 04:15:49 +0600 Subject: [PATCH 1884/2721] [ndr] Rework and cover with tests --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ndr.py | 469 ++++++++++++++++++++++--------- 2 files changed, 334 insertions(+), 136 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 44ab7ce3c..fadba905d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -367,6 +367,7 @@ from .nbc import ( from .ndr import ( NDRIE, NJoyIE, + NDREmbedBaseIE, NDREmbedIE, NJoyEmbedIE, ) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 87f3edbbe..e3cc6fde8 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -1,183 +1,380 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( - ExtractorError, + determine_ext, int_or_none, + parse_iso8601, qualities, ) -preference = qualities(['xs', 's', 'm','l', 'xl']) - - class NDRBaseIE(InfoExtractor): - - def extract_video_info(self, playlist, video_id): - formats = [] - streamType = playlist.get('config').get('streamType') - if streamType == 'httpVideo': - for key, f in playlist.items(): - if key != 'config': - src = f['src'] - if '.f4m' in src: - formats.extend(self._extract_f4m_formats(src, video_id)) - elif '.m3u8' in src: - formats.extend(self._extract_m3u8_formats(src, video_id, fatal=False)) - else: - quality = f.get('quality') - formats.append({ - 'url': src, - 'format_id': quality, - 'preference': preference(quality), - }) - elif streamType == 'httpAudio': - for key, f in playlist.items(): - if key != 'config': - formats.append({ - 'url': f['src'], - 'format_id': 'mp3', - 'vcodec': 'none', - }) - else: - raise ExtractorError('No media links available for %s' % video_id) - - self._sort_formats(formats) - - config = playlist.get('config') - - title = config['title'] - duration = int_or_none(config.get('duration')) - thumbnails = [{ - 'id': thumbnail.get('quality'), - 'url': thumbnail.get('src'), - 'preference': preference(thumbnail.get('quality')) - } for thumbnail in config.get('poster').values()] - - return { - 'id': video_id, - 'title': title, - 'thumbnails': thumbnails, - 'duration': duration, - 'formats': formats, - } - def _real_extract(self, url): - video_id = self._match_id(url) - - json_data = self._download_json('http://www.ndr.de/%s-ppjson.json' % video_id, video_id, fatal=False) - - if not json_data: - webpage = self._download_webpage(url, video_id) - embed_url = self._html_search_regex(r']+id="pp_\w+"[^>]+src="(/.*)"', webpage, 'embed url', None, False) - if not embed_url: - embed_url = self._html_search_meta('embedURL', webpage, fatal=False) - if embed_url: - if embed_url.startswith('/'): - return self.url_result('http://www.ndr.de%s' % embed_url, 'NDREmbed') - else: - return self.url_result(embed_url, 'NDREmbed') - raise ExtractorError('No media links available for %s' % video_id) - - return self.extract_video_info(json_data['playlist'], video_id) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + return self._extract_embed(webpage, display_id) class NDRIE(NDRBaseIE): IE_NAME = 'ndr' - IE_DESC = 'NDR.de - Mediathek' - _VALID_URL = r'https?://www\.ndr\.de/.+?,(?P\w+)\.html' + IE_DESC = 'NDR.de - Norddeutscher Rundfunk' + _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)+(?P[^/?#]+),[\da-z]+\.html' + _TESTS = [{ + # httpVideo, same content id + 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', + 'md5': '6515bc255dc5c5f8c85bbc38e035a659', + 'info_dict': { + 'id': 'hafengeburtstag988', + 'display_id': 'Party-Poette-und-Parade', + 'ext': 'mp4', + 'title': 'Party, Pötte und Parade', + 'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c', + 'uploader': 'ndrtv', + 'timestamp': 1431108900, + 'upload_date': '20150510', + 'duration': 3498, + }, + 'params': { + 'skip_download': True, + }, + }, { + # httpVideo, different content id + 'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html', + 'md5': '1043ff203eab307f0c51702ec49e9a71', + 'info_dict': { + 'id': 'osna272', + 'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch', + 'ext': 'mp4', + 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights', + 'description': 'md5:32e9b800b3d2d4008103752682d5dc01', + 'uploader': 'ndrtv', + 'timestamp': 1442059200, + 'upload_date': '20150912', + 'duration': 510, + }, + 'params': { + 'skip_download': True, + }, + }, { + # httpAudio, same content id + 'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html', + 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', + 'info_dict': { + 'id': 'audio51535', + 'display_id': 'La-Valette-entgeht-der-Hinrichtung', + 'ext': 'mp3', + 'title': 'La Valette entgeht der Hinrichtung', + 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', + 'uploader': 'ndrinfo', + 'timestamp': 1290626100, + 'upload_date': '20140729', + 'duration': 884, + }, + 'params': { + 'skip_download': True, + }, + }] - _TESTS = [ - { - 'url': 'http://www.ndr.de/fernsehen/sendungen/nordmagazin/Kartoffeltage-in-der-Lewitz,nordmagazin25866.html', - 'md5': '5bc5f5b92c82c0f8b26cddca34f8bb2c', - 'note': 'Video file', - 'info_dict': { - 'id': 'nordmagazin25866', - 'ext': 'mp4', - 'title': 'Kartoffeltage in der Lewitz', - 'duration': 166, - }, - 'skip': '404 Not found', - }, - { - 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', - 'md5': 'dadc003c55ae12a5d2f6bd436cd73f59', - 'info_dict': { - 'id': 'hafengeburtstag988', - 'ext': 'mp4', - 'title': 'Party, Pötte und Parade', - 'duration': 3498, - }, - }, - { - 'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html', - 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', - 'note': 'Audio file', - 'info_dict': { - 'id': 'audio51535', - 'ext': 'mp3', - 'title': 'La Valette entgeht der Hinrichtung', - 'duration': 884, - } + def _extract_embed(self, webpage, display_id): + embed_url = self._html_search_meta( + 'embedURL', webpage, 'embed URL', fatal=True) + description = self._search_regex( + r']+itemprop="description">([^<]+)

    ', + webpage, 'description', fatal=False) + timestamp = parse_iso8601( + self._search_regex( + r'