From 44e5eded77da32847b70f8ace8beb6df5426c61b Mon Sep 17 00:00:00 2001 From: Mark Lee Date: Tue, 25 Mar 2014 19:49:17 -0700 Subject: [PATCH 0001/1105] Add video joiner class to postprocessor module --- youtube_dl/postprocessor/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py index 7f19f717f..2ccbd5d48 100644 --- a/youtube_dl/postprocessor/__init__.py +++ b/youtube_dl/postprocessor/__init__.py @@ -5,6 +5,7 @@ from .ffmpeg import ( FFmpegVideoConvertor, FFmpegExtractAudioPP, FFmpegEmbedSubtitlePP, + FFmpegJoinVideosPP, ) from .xattrpp import XAttrMetadataPP @@ -14,5 +15,6 @@ __all__ = [ 'FFmpegVideoConvertor', 'FFmpegExtractAudioPP', 'FFmpegEmbedSubtitlePP', + 'FFmpegJoinVideosPP', 'XAttrMetadataPP', ] From 25ca480a513f53c9159ef277937e8b53e3834fa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 26 Mar 2014 12:01:08 +0100 Subject: [PATCH 0002/1105] Makefile: include the docs in the tarball --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c6d09932b..f7d917d09 100644 --- a/Makefile +++ b/Makefile @@ -72,8 +72,9 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- --exclude '__pycache' \ --exclude '.git' \ --exclude 'testdata' \ + --exclude 'docs/_build' \ -- \ - bin devscripts test youtube_dl \ + bin devscripts test youtube_dl docs \ CHANGELOG LICENSE README.md README.txt \ Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \ youtube-dl From f1eb03211a7b666b14043ff7fba46f0ee6c864dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 26 Mar 2014 19:51:54 +0700 Subject: [PATCH 0003/1105] [cspan] Roll back unfinished rtmp support --- youtube_dl/extractor/cspan.py | 57 ++++------------------------------- 1 file changed, 6 insertions(+), 51 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 795ccd926..2a8eda9ef 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -56,61 +56,16 @@ class CSpanIE(InfoExtractor): url = unescapeHTML(data['video']['files'][0]['path']['#text']) - doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id + '&version=2014-01-23', + doc = self._download_xml('http://www.c-span.org/common/services/flashXml.php?programid=' + video_id, video_id) - formats = [ - { - 'url': url, - } - ] - - def find_string(node, s): - return find_xpath_attr(node, './/string', 'name', s).text - - def find_number(node, s): - return int(find_xpath_attr(node, './/number', 'name', s).text) - - def find_array(node, s): - return find_xpath_attr(node, './/array', 'name', s) - - def process_files(files, url, formats): - for file in files: - path = find_string(file, 'path') - #duration = find_number(file, './number', 'name', 'length') - hd = find_number(file, 'hd') - formats.append({ - 'url': url, - 'play_path': path, - 'ext': 'flv', - 'quality': hd, - }) - - def process_node(node, formats): - url = find_xpath_attr(node, './string', 'name', 'url') - if url is None: - url = find_xpath_attr(node, './string', 'name', 'URL') - if url is None: - return - url = url.text.replace('$(protocol)', 'rtmp').replace('$(port)', '1935') - files = find_array(node, 'files') - if files is None: - return - process_files(files, url, formats) - - process_node(doc.find('./media-link'), formats) - - streams = find_array(doc, 'streams') - if streams is not None: - for stream in streams: - if find_string(stream, 'name') != 'vod': - continue - process_node(stream, formats) + def find_string(s): + return find_xpath_attr(doc, './/string', 'name', s).text return { 'id': video_id, - 'title': find_string(doc, 'title'), + 'title': find_string('title'), + 'url': url, 'description': description, - 'thumbnail': find_string(doc, 'poster'), - 'formats': formats, + 'thumbnail': find_string('poster'), } From 87a26277000b425972ec427f86b68fef2097c1ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 26 Mar 2014 15:03:34 +0100 Subject: [PATCH 0004/1105] [vice] Remove extractor The generic ooyala detection works fine. --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/vice.py | 38 -------------------------------- 2 files changed, 39 deletions(-) delete mode 100644 youtube_dl/extractor/vice.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 56b382aed..d4b89fd5c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -263,7 +263,6 @@ from .veehd import VeeHDIE from .veoh import VeohIE from .vesti import VestiIE from .vevo import VevoIE -from .vice import ViceIE from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py deleted file mode 100644 index 87812d6af..000000000 --- a/youtube_dl/extractor/vice.py +++ /dev/null @@ -1,38 +0,0 @@ -import re - -from .common import InfoExtractor -from .ooyala import OoyalaIE -from ..utils import ExtractorError - - -class ViceIE(InfoExtractor): - _VALID_URL = r'http://www\.vice\.com/.*?/(?P.+)' - - _TEST = { - u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1', - u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4', - u'info_dict': { - u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', - }, - u'params': { - # Requires ffmpeg (m3u8 manifest) - u'skip_download': True, - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - name = mobj.group('name') - webpage = self._download_webpage(url, name) - try: - ooyala_url = self._og_search_video_url(webpage) - except ExtractorError: - try: - embed_code = self._search_regex( - r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage, - u'ooyala embed code') - ooyala_url = OoyalaIE._url_for_embed_code(embed_code) - except ExtractorError: - raise ExtractorError(u'The page doesn\'t contain a video', expected=True) - return self.url_result(ooyala_url, ie='Ooyala') - From 44f373f3caf55c9ddc2e01bd605a7984fe9c4837 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 26 Mar 2014 15:09:14 +0100 Subject: [PATCH 0005/1105] [slashdot] Remove extractor The generic ooyala detection works fine. --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/slashdot.py | 24 ------------------------ 2 files changed, 25 deletions(-) delete mode 100644 youtube_dl/extractor/slashdot.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d4b89fd5c..685fc749d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -208,7 +208,6 @@ from .rutv import RUTVIE from .savefrom import SaveFromIE from .servingsys import ServingSysIE from .sina import SinaIE -from .slashdot import SlashdotIE from .slideshare import SlideshareIE from .smotri import ( SmotriIE, diff --git a/youtube_dl/extractor/slashdot.py b/youtube_dl/extractor/slashdot.py deleted file mode 100644 index d68646d24..000000000 --- a/youtube_dl/extractor/slashdot.py +++ /dev/null @@ -1,24 +0,0 @@ -import re - -from .common import InfoExtractor - - -class SlashdotIE(InfoExtractor): - _VALID_URL = r'https?://tv\.slashdot\.org/video/\?embed=(?P.*?)(&|$)' - - _TEST = { - u'add_ie': ['Ooyala'], - u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz', - u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4', - u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735', - u'info_dict': { - u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator', - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - ooyala_url = self._search_regex(r'', - webpage, 'full data json')) + r'\nwindow.app = (?P.+?);\n', + webpage, 'full data json'))['videoData'] video_id = full_data['activeVideo']['video'] video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id] From ff43d2365ff5569c98df8e01250d34706e266c44 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 22 Dec 2015 07:58:33 +0100 Subject: [PATCH 0135/1105] [soompi] remove extractor http://tv.soompi.com now redirect to viki.com because Viki has acquired Soompi http://www.soompi.com/2015/08/19/we-got-married-soompi-joins-viki/ --- youtube_dl/extractor/__init__.py | 4 - youtube_dl/extractor/soompi.py | 146 ------------------------------- 2 files changed, 150 deletions(-) delete mode 100644 youtube_dl/extractor/soompi.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 760b65441..702cbc6e2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -591,10 +591,6 @@ from .snagfilms import ( ) from .snotr import SnotrIE from .sohu import SohuIE -from .soompi import ( - SoompiIE, - SoompiShowIE, -) from .soundcloud import ( SoundcloudIE, SoundcloudSetIE, diff --git a/youtube_dl/extractor/soompi.py b/youtube_dl/extractor/soompi.py deleted file mode 100644 index 5da66ca9e..000000000 --- a/youtube_dl/extractor/soompi.py +++ /dev/null @@ -1,146 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -import re - -from .crunchyroll import CrunchyrollIE - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - ExtractorError, - int_or_none, - remove_start, - xpath_text, -) - - -class SoompiBaseIE(InfoExtractor): - def _get_episodes(self, webpage, episode_filter=None): - episodes = self._parse_json( - self._search_regex( - r'VIDEOS\s*=\s*(\[.+?\]);', webpage, 'episodes JSON'), - None) - return list(filter(episode_filter, episodes)) - - -class SoompiIE(SoompiBaseIE, CrunchyrollIE): - IE_NAME = 'soompi' - _VALID_URL = r'https?://tv\.soompi\.com/(?:en/)?watch/(?P[0-9]+)' - _TESTS = [{ - 'url': 'http://tv.soompi.com/en/watch/29235', - 'info_dict': { - 'id': '29235', - 'ext': 'mp4', - 'title': 'Episode 1096', - 'description': '2015-05-20' - }, - 'params': { - 'skip_download': True, - }, - }] - - def _get_episode(self, webpage, video_id): - return self._get_episodes(webpage, lambda x: x['id'] == video_id)[0] - - def _get_subtitles(self, config, video_id): - sub_langs = {} - for subtitle in config.findall('./{default}preload/subtitles/subtitle'): - sub_langs[subtitle.attrib['id']] = subtitle.attrib['title'] - - subtitles = {} - for s in config.findall('./{default}preload/subtitle'): - lang_code = sub_langs.get(s.attrib['id']) - if not lang_code: - continue - sub_id = s.get('id') - data = xpath_text(s, './data', 'data') - iv = xpath_text(s, './iv', 'iv') - if not id or not iv or not data: - continue - subtitle = self._decrypt_subtitles(data, iv, sub_id).decode('utf-8') - subtitles[lang_code] = self._extract_subtitles(subtitle) - return subtitles - - def _real_extract(self, url): - video_id = self._match_id(url) - - try: - webpage = self._download_webpage( - url, video_id, 'Downloading episode page') - except ExtractorError as ee: - if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: - webpage = ee.cause.read() - block_message = self._html_search_regex( - r'(?s)
(.+?)
', webpage, - 'block message', default=None) - if block_message: - raise ExtractorError(block_message, expected=True) - raise - - formats = [] - config = None - for format_id in re.findall(r'\?quality=([0-9a-zA-Z]+)', webpage): - config = self._download_xml( - 'http://tv.soompi.com/en/show/_/%s-config.xml?mode=hls&quality=%s' % (video_id, format_id), - video_id, 'Downloading %s XML' % format_id) - m3u8_url = xpath_text( - config, './{default}preload/stream_info/file', - '%s m3u8 URL' % format_id) - if not m3u8_url: - continue - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', m3u8_id=format_id)) - self._sort_formats(formats) - - episode = self._get_episode(webpage, video_id) - - title = episode['name'] - description = episode.get('description') - duration = int_or_none(episode.get('duration')) - - thumbnails = [{ - 'id': thumbnail_id, - 'url': thumbnail_url, - } for thumbnail_id, thumbnail_url in episode.get('img_url', {}).items()] - - subtitles = self.extract_subtitles(config, video_id) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnails': thumbnails, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles - } - - -class SoompiShowIE(SoompiBaseIE): - IE_NAME = 'soompi:show' - _VALID_URL = r'https?://tv\.soompi\.com/en/shows/(?P[0-9a-zA-Z\-_]+)' - _TESTS = [{ - 'url': 'http://tv.soompi.com/en/shows/liar-game', - 'info_dict': { - 'id': 'liar-game', - 'title': 'Liar Game', - 'description': 'md5:52c02bce0c1a622a95823591d0589b66', - }, - 'playlist_count': 14, - }] - - def _real_extract(self, url): - show_id = self._match_id(url) - - webpage = self._download_webpage( - url, show_id, 'Downloading show page') - - title = remove_start(self._og_search_title(webpage), 'SoompiTV | ') - description = self._og_search_description(webpage) - - entries = [ - self.url_result('http://tv.soompi.com/en/watch/%s' % episode['id'], 'Soompi') - for episode in self._get_episodes(webpage)] - - return self.playlist_result(entries, show_id, title, description) From dc016bf5216d4c0d5b5fb2cd707e1d08fa4b0517 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 22 Dec 2015 09:55:25 +0100 Subject: [PATCH 0136/1105] [viki] detect errors and fix formats extraction --- youtube_dl/extractor/viki.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index a63c23617..ca3f20a3d 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -30,6 +30,12 @@ class VikiBaseIE(InfoExtractor): _token = None + _ERRORS = { + 'geo': 'Sorry, this content is not available in your region.', + 'upcoming': 'Sorry, this content is not yet available.', + # 'paywall': 'paywall', + } + def _prepare_call(self, path, timestamp=None, post_data=None): path += '?' if '?' not in path else '&' if not timestamp: @@ -67,6 +73,12 @@ class VikiBaseIE(InfoExtractor): '%s returned error: %s' % (self.IE_NAME, error), expected=True) + def _check_errors(self, data): + for reason, status in data.get('blocking', {}).items(): + if status and reason in self._ERRORS: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, self._ERRORS[reason]), expected=True) + def _real_initialize(self): self._login() @@ -193,6 +205,7 @@ class VikiIE(VikiBaseIE): 'timestamp': 1321985454, 'description': 'md5:44b1e46619df3a072294645c770cef36', 'title': 'Love In Magic', + 'age_limit': 13, }, }] @@ -202,6 +215,8 @@ class VikiIE(VikiBaseIE): video = self._call_api( 'videos/%s.json' % video_id, video_id, 'Downloading video JSON') + self._check_errors(video) + title = self.dict_selection(video.get('titles', {}), 'en') if not title: title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id @@ -262,8 +277,11 @@ class VikiIE(VikiBaseIE): r'^(\d+)[pP]$', format_id, 'height', default=None)) for protocol, format_dict in stream_dict.items(): if format_id == 'm3u8': - formats = self._extract_m3u8_formats( - format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol) + m3u8_formats = self._extract_m3u8_formats( + format_dict['url'], video_id, 'mp4', 'm3u8_native', + m3u8_id='m3u8-%s' % protocol, fatal=None) + if m3u8_formats: + formats.extend(m3u8_formats) else: formats.append({ 'url': format_dict['url'], @@ -315,6 +333,8 @@ class VikiChannelIE(VikiBaseIE): 'containers/%s.json' % channel_id, channel_id, 'Downloading channel JSON') + self._check_errors(channel) + title = self.dict_selection(channel['titles'], 'en') description = self.dict_selection(channel['descriptions'], 'en') From 48a6c984b806141dc6d3da0a96df2e553bb815e0 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 22 Dec 2015 10:14:57 +0100 Subject: [PATCH 0137/1105] [bleacherreport] update test --- youtube_dl/extractor/bleacherreport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py index bd2a6340b..38bda3af5 100644 --- a/youtube_dl/extractor/bleacherreport.py +++ b/youtube_dl/extractor/bleacherreport.py @@ -90,7 +90,7 @@ class BleacherReportCMSIE(AMPIE): _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P[0-9a-f-]{36})' _TESTS = [{ 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1', - 'md5': 'f0ca220af012d4df857b54f792c586bb', + 'md5': '8c2c12e3af7805152675446c905d159b', 'info_dict': { 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', 'ext': 'flv', From 220bc3f0e3777b89de335cdbc58a7d105584f06b Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 22 Dec 2015 11:27:18 +0100 Subject: [PATCH 0138/1105] [franceinter] fix title extraction --- youtube_dl/extractor/franceinter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 6613ee17a..90a17815d 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -33,7 +33,7 @@ class FranceInterIE(InfoExtractor): video_url = 'http://www.franceinter.fr/' + path title = self._html_search_regex( - r'(.+?)', webpage, 'title') + r'(.+?)', webpage, 'title') description = self._html_search_regex( r'(.*?)', webpage, 'description', fatal=False) From 2db5806991145ee293c964ecd85623c093d2e429 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 22 Dec 2015 11:30:35 +0100 Subject: [PATCH 0139/1105] [franceinter] use _match_id --- youtube_dl/extractor/franceinter.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 90a17815d..fdc51f44f 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import int_or_none @@ -23,8 +21,7 @@ class FranceInterIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) From 2be689b7e27df89648d1d98fa74c297f0e06cbc1 Mon Sep 17 00:00:00 2001 From: j Date: Mon, 21 Dec 2015 02:26:37 +0100 Subject: [PATCH 0140/1105] [theintercept] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/theintercept.py | 68 ++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 youtube_dl/extractor/theintercept.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index eac50eda5..042b1e921 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -657,6 +657,7 @@ from .tenplay import TenPlayIE from .testurl import TestURLIE from .testtube import TestTubeIE from .tf1 import TF1IE +from .theintercept import TheInterceptIE from .theonion import TheOnionIE from .theplatform import ( ThePlatformIE, diff --git a/youtube_dl/extractor/theintercept.py b/youtube_dl/extractor/theintercept.py new file mode 100644 index 000000000..b096a28de --- /dev/null +++ b/youtube_dl/extractor/theintercept.py @@ -0,0 +1,68 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from ..utils import ( + ExtractorError, +) + +class TheInterceptIE(InfoExtractor): + _VALID_URL = r'https://theintercept.com/fieldofvision/(?P.+?)/' + _TESTS = [{ + 'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/', + 'info_dict': { + 'id': 'thisisacoup-episode-four-surrender-or-die', + 'ext': 'mp4', + 'title': '#ThisIsACoup – Episode Four: Surrender or Die', + 'upload_date': '20151218', + 'description': 'md5:74dd27f0e2fbd50817829f97eaa33140', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + mobj = re.search(r'initialStoreTree =(?P.+})', webpage) + if mobj is None: + raise ExtractorError('Unable to extract initialStoreTree') + json_data = self._parse_json(mobj.group('json_data'), display_id) + + info = None + for post in json_data['resources']['posts'].values(): + if post['slug'] == display_id: + info = post + break + if info is None: + raise ExtractorError('Unable to find info for %s'%display_id) + + title = info['title'] + description = info['excerpt'] + upload_date = info['date'][:10].replace('-', '') + video_id = info['fov_videoid'] + creator = ','.join([a['display_name'] for a in info['authors']]) + thumbnail = self._og_search_property('image', webpage) + content_id = thumbnail.split('/')[-1].split('.')[0] + content_url = 'https://content.jwplatform.com/jw6/{content_id}.xml'.format(content_id=content_id) + content = self._download_xml(content_url, video_id) + + formats = [] + for source in content.findall('.//{http://rss.jwpcdn.com/}source'): + if source.attrib['file'].endswith('.m3u8'): + formats.extend(self._extract_m3u8_formats( + source.attrib['file'], video_id, 'mp4', preference=1, m3u8_id='hls')) + + return { + 'creator': creator, + 'description': description, + 'display_id': display_id, + 'formats': formats, + 'id': video_id, + 'id': video_id, + 'thumbnail': thumbnail, + 'title': title, + 'upload_date': upload_date, + } From 3b68efdc6ae109a840ff5f15f0e28910c2463b3f Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 22 Dec 2015 15:54:51 +0100 Subject: [PATCH 0141/1105] [vgtv] update tests and correct format sorting --- youtube_dl/extractor/vgtv.py | 31 +++++++++++++++++++++---------- youtube_dl/extractor/xstream.py | 2 +- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 347410a78..811ee197d 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -104,10 +104,10 @@ class VGTVIE(XstreamIE): }, { 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more', - 'md5': '7fbc265a3ca4933a423c7a66aa879a67', + 'md5': 'fd828cd29774a729bf4d4425fe192972', 'info_dict': { 'id': '21039', - 'ext': 'mp4', + 'ext': 'mov', 'title': 'TRAILER: «SWEATSHOP» - I can´t take any more', 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238', 'duration': 66, @@ -174,16 +174,15 @@ class VGTVIE(XstreamIE): for mp4_url in mp4_urls: format_info = { 'url': mp4_url, - 'preference': 1, } mobj = re.search('(\d+)_(\d+)_(\d+)', mp4_url) if mobj: - vbr = int(mobj.group(3)) + tbr = int(mobj.group(3)) format_info.update({ 'width': int(mobj.group(1)), 'height': int(mobj.group(2)), - 'vbr': vbr, - 'format_id': 'mp4-%s' % vbr, + 'tbr': tbr, + 'format_id': 'mp4-%s' % tbr, }) formats.append(format_info) @@ -210,7 +209,7 @@ class BTArticleIE(InfoExtractor): _VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P[^/]+)-\d+\.html' _TEST = { 'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html', - 'md5': 'd055e8ee918ef2844745fcfd1a4175fb', + 'md5': '2acbe8ad129b3469d5ae51b1158878df', 'info_dict': { 'id': '23199', 'ext': 'mp4', @@ -227,7 +226,7 @@ class BTArticleIE(InfoExtractor): def _real_extract(self, url): webpage = self._download_webpage(url, self._match_id(url)) video_id = self._search_regex( - r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id') + r']+data-id="(\d+)"', webpage, 'video id') return self.url_result('bttv:%s' % video_id, 'VGTV') @@ -235,7 +234,7 @@ class BTVestlendingenIE(InfoExtractor): IE_NAME = 'bt:vestlendingen' IE_DESC = 'Bergens Tidende - Vestlendingen' _VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588', 'md5': 'd7d17e3337dc80de6d3a540aefbe441b', 'info_dict': { @@ -246,7 +245,19 @@ class BTVestlendingenIE(InfoExtractor): 'timestamp': 1430473209, 'upload_date': '20150501', }, - } + 'skip': '404 Error', + }, { + 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86255', + 'md5': 'a2893f8632e96389f4bdf36aa9463ceb', + 'info_dict': { + 'id': '86255', + 'ext': 'mov', + 'title': 'Du må tåle å fryse og være sulten', + 'description': 'md5:b8046f4d022d5830ddab04865791d063', + 'upload_date': '20150321', + 'timestamp': 1426942023, + }, + }] def _real_extract(self, url): return self.url_result('bttv:%s' % self._match_id(url), 'VGTV') diff --git a/youtube_dl/extractor/xstream.py b/youtube_dl/extractor/xstream.py index 436f8978b..76c91bd92 100644 --- a/youtube_dl/extractor/xstream.py +++ b/youtube_dl/extractor/xstream.py @@ -93,7 +93,7 @@ class XstreamIE(InfoExtractor): formats.append({ 'url': link.get('href'), 'format_id': link.get('rel'), - 'preference': 2, + 'preference': 1, }) thumbnails = [{ From dbee18b5521edbfa1642c683ad2d317ba06e9d5b Mon Sep 17 00:00:00 2001 From: Abhishek Kedia Date: Mon, 21 Dec 2015 01:50:07 +0100 Subject: [PATCH 0142/1105] Improve extraction (Closes #7918) remove outer parentheses in if Conflicts: youtube_dl/extractor/imgur.py checked code with flake8 not returning list in case of single images. using the fact that id with length 5 are albums and more are single videos. Also for single videos ie ImgurIE both urls - http://imgur.com/gallery/oWeAMW2 and http://imgur.com/oWeAMW2 are equally fine. Change regex to allow thuis. For albums urls - http://imgur.com/gallery/Q95ko and http://imgur.com/Q95ko are ok. Change regex to allow this also. update description in ImgurIE Tests. Also move single video test 'https://imgur.com/gallery/YcAQlkx' from ImgurAlbumIE to ImgurIE. --- youtube_dl/extractor/imgur.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index 70c8ca64e..88423f179 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -13,7 +13,7 @@ from ..utils import ( class ImgurIE(InfoExtractor): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!gallery)(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(gallery/)?(?P[a-zA-Z0-9]{6,})' _TESTS = [{ 'url': 'https://i.imgur.com/A61SaA1.gifv', @@ -21,7 +21,7 @@ class ImgurIE(InfoExtractor): 'id': 'A61SaA1', 'ext': 'mp4', 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', - 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', + 'description': 'Imgur: The most awesome images on the Internet.', }, }, { 'url': 'https://imgur.com/A61SaA1', @@ -29,8 +29,17 @@ class ImgurIE(InfoExtractor): 'id': 'A61SaA1', 'ext': 'mp4', 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', - 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', + 'description': 'Imgur: The most awesome images on the Internet.', }, + }, { + 'url': 'https://imgur.com/gallery/YcAQlkx', + 'info_dict': { + 'id': 'YcAQlkx', + 'ext': 'mp4', + 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....', + 'description': 'Imgur: The most awesome images on the Internet.' + + } }] def _real_extract(self, url): @@ -100,7 +109,7 @@ class ImgurIE(InfoExtractor): class ImgurAlbumIE(InfoExtractor): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(gallery/)?(?P[a-zA-Z0-9]{5})(?![a-zA-Z0-9])' _TEST = { 'url': 'http://imgur.com/gallery/Q95ko', @@ -113,12 +122,15 @@ class ImgurAlbumIE(InfoExtractor): def _real_extract(self, url): album_id = self._match_id(url) - album_images = self._download_json( - 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id, - album_id)['data']['images'] + album_img_data = self._download_json( + 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id, album_id)['data'] - entries = [ - self.url_result('http://imgur.com/%s' % image['hash']) - for image in album_images if image.get('hash')] + if len(album_img_data) == 0: + return self.url_result('http://imgur.com/%s' % album_id) + else: + album_images = album_img_data['images'] + entries = [ + self.url_result('http://imgur.com/%s' % image['hash']) + for image in album_images if image.get('hash')] return self.playlist_result(entries, album_id) From 774ce35571c08a1532fe4079224239adfdb80e43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Dec 2015 21:48:48 +0600 Subject: [PATCH 0143/1105] [imgur] Improve (Closes #7928) --- youtube_dl/extractor/imgur.py | 41 +++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py index 88423f179..85e9344aa 100644 --- a/youtube_dl/extractor/imgur.py +++ b/youtube_dl/extractor/imgur.py @@ -13,7 +13,7 @@ from ..utils import ( class ImgurIE(InfoExtractor): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(gallery/)?(?P[a-zA-Z0-9]{6,})' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|topic/[^/]+)/)?(?P[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$' _TESTS = [{ 'url': 'https://i.imgur.com/A61SaA1.gifv', @@ -40,6 +40,9 @@ class ImgurIE(InfoExtractor): 'description': 'Imgur: The most awesome images on the Internet.' } + }, { + 'url': 'http://imgur.com/topic/Funny/N8rOudd', + 'only_matching': True, }] def _real_extract(self, url): @@ -109,28 +112,38 @@ class ImgurIE(InfoExtractor): class ImgurAlbumIE(InfoExtractor): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(gallery/)?(?P[a-zA-Z0-9]{5})(?![a-zA-Z0-9])' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:a|gallery|topic/[^/]+)/)?(?P[a-zA-Z0-9]{5})(?:[/?#&]+)?$' - _TEST = { + _TESTS = [{ 'url': 'http://imgur.com/gallery/Q95ko', 'info_dict': { 'id': 'Q95ko', }, 'playlist_count': 25, - } + }, { + 'url': 'http://imgur.com/a/j6Orj', + 'only_matching': True, + }, { + 'url': 'http://imgur.com/topic/Aww/ll5Vk', + 'only_matching': True, + }] def _real_extract(self, url): album_id = self._match_id(url) - album_img_data = self._download_json( - 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id, album_id)['data'] + album_images = self._download_json( + 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id, + album_id, fatal=False) - if len(album_img_data) == 0: - return self.url_result('http://imgur.com/%s' % album_id) - else: - album_images = album_img_data['images'] - entries = [ - self.url_result('http://imgur.com/%s' % image['hash']) - for image in album_images if image.get('hash')] + if album_images: + data = album_images.get('data') + if data and isinstance(data, dict): + images = data.get('images') + if images and isinstance(images, list): + entries = [ + self.url_result('http://imgur.com/%s' % image['hash']) + for image in images if image.get('hash')] + return self.playlist_result(entries, album_id) - return self.playlist_result(entries, album_id) + # Fallback to single video + return self.url_result('http://imgur.com/%s' % album_id, ImgurIE.ie_key()) From 4c24ed94640b148882f1ceb400127b3b3afcafd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Dec 2015 01:10:31 +0600 Subject: [PATCH 0144/1105] [comcarcoff] Improve json data regex and modernize --- youtube_dl/extractor/comcarcoff.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py index 4391b7ce4..edf5b29a0 100644 --- a/youtube_dl/extractor/comcarcoff.py +++ b/youtube_dl/extractor/comcarcoff.py @@ -31,9 +31,10 @@ class ComCarCoffIE(InfoExtractor): display_id = 'comediansincarsgettingcoffee.com' webpage = self._download_webpage(url, display_id) - full_data = json.loads(self._search_regex( - r'\nwindow.app = (?P.+?);\n', - webpage, 'full data json'))['videoData'] + full_data = self._parse_json( + self._search_regex( + r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'), + display_id)['videoData'] video_id = full_data['activeVideo']['video'] video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id] From cfe9e5aa6c5b14016ae454649b1a9df9c7c18b3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Dec 2015 01:18:14 +0600 Subject: [PATCH 0145/1105] [comcarcoff] Extract duration --- youtube_dl/extractor/comcarcoff.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py index edf5b29a0..2efa200b5 100644 --- a/youtube_dl/extractor/comcarcoff.py +++ b/youtube_dl/extractor/comcarcoff.py @@ -1,10 +1,12 @@ # encoding: utf-8 from __future__ import unicode_literals -import json - from .common import InfoExtractor -from ..utils import parse_iso8601 +from ..utils import ( + int_or_none, + parse_duration, + parse_iso8601, +) class ComCarCoffIE(InfoExtractor): @@ -16,6 +18,7 @@ class ComCarCoffIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20141127', 'timestamp': 1417107600, + 'duration': 1232, 'title': 'Happy Thanksgiving Miranda', 'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.', 'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg', @@ -46,12 +49,18 @@ class ComCarCoffIE(InfoExtractor): formats = self._extract_m3u8_formats( video_data['mediaUrl'], video_id, ext='mp4') + timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601( + video_data.get('pubDate')) + duration = int_or_none(video_data.get('durationSeconds')) or parse_duration( + video_data.get('duration')) + return { 'id': video_id, 'display_id': display_id, 'title': video_data['title'], 'description': video_data.get('description'), - 'timestamp': parse_iso8601(video_data.get('pubDate')), + 'timestamp': timestamp, + 'duration': duration, 'thumbnails': thumbnails, 'formats': formats, 'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))), From 89abf7bf4d5dfc8c161924067f4430b7d81a8b32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Dec 2015 02:09:50 +0600 Subject: [PATCH 0146/1105] [periscope] Fix token based extraction (Closes #7943) --- youtube_dl/extractor/periscope.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 63cc764bb..514e9b433 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -31,9 +31,8 @@ class PeriscopeIE(InfoExtractor): }] def _call_api(self, method, value): - attribute = 'token' if len(value) > 13 else 'broadcast_id' return self._download_json( - 'https://api.periscope.tv/api/v2/%s?%s=%s' % (method, attribute, value), value) + 'https://api.periscope.tv/api/v2/%s?broadcast_id=%s' % (method, value), value) def _real_extract(self, url): token = self._match_id(url) From 3a70ed9ebeac782b922cc3cb3b74cd999e60845a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Dec 2015 02:54:32 +0600 Subject: [PATCH 0147/1105] [daum] Fix extraction (Closes #7949) --- youtube_dl/extractor/daum.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 934da765e..e3fc639b0 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -37,9 +37,11 @@ class DaumIE(InfoExtractor): video_id = mobj.group('id') canonical_url = 'http://tvpot.daum.net/v/%s' % video_id webpage = self._download_webpage(canonical_url, video_id) + og_url = self._og_search_url(webpage, default=None) or self._search_regex( + r']+rel=(["\'])canonical\1[^>]+href=(["\'])(?P.+?)\2', + webpage, 'canonical url', group='url') full_id = self._search_regex( - r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']', - webpage, 'full id') + r'tvpot\.daum\.net/v/([^/]+)', og_url, 'full id') query = compat_urllib_parse.urlencode({'vid': full_id}) info = self._download_xml( 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, From 178b47e6af73521da50e9eec04af7fdceb236e39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Dec 2015 02:59:49 +0600 Subject: [PATCH 0148/1105] [daum] Add test for #7949 --- youtube_dl/extractor/daum.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index e3fc639b0..9a94cf361 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -24,6 +24,18 @@ class DaumIE(InfoExtractor): 'upload_date': '20130831', 'duration': 3868, }, + }, { + # Test for https://github.com/rg3/youtube-dl/issues/7949 + 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=M1O35s8HPOo0&clipid=73147290', + 'md5': 'c92d78bcee4424451f1667f275c1dc97', + 'info_dict': { + 'id': '73147290', + 'ext': 'mp4', + 'title': '싸이 - 나팔바지 [유희열의 스케치북] 299회 20151218', + 'description': '싸이 - 나팔바지', + 'upload_date': '20151219', + 'duration': 232, + }, }, { 'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz', 'only_matching': True, From 60427f63d129919d5c98e1176ea8136d0eedd0f4 Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 23 Dec 2015 10:40:45 +0100 Subject: [PATCH 0149/1105] [appletrailers] Add support for AppleTrailers Section --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/appletrailers.py | 74 +++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fede5ff0d..3b541a538 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -25,7 +25,10 @@ from .aol import AolIE from .allocine import AllocineIE from .aparat import AparatIE from .appleconnect import AppleConnectIE -from .appletrailers import AppleTrailersIE +from .appletrailers import ( + AppleTrailersIE, + AppleTrailersSectionIE, +) from .archiveorg import ArchiveOrgIE from .ard import ( ARDIE, diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index f68dc3236..ca9a70924 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -11,6 +11,7 @@ from ..utils import ( class AppleTrailersIE(InfoExtractor): + IE_NAME = 'appletrailers' _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P[^/]+)/(?P[^/]+)' _TESTS = [{ 'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', @@ -144,3 +145,76 @@ class AppleTrailersIE(InfoExtractor): 'id': movie, 'entries': playlist, } + + +class AppleTrailersSectionIE(InfoExtractor): + IE_NAME = 'appletrailers:section' + _SECTIONS = { + 'justadded': { + 'feed_path': 'just_added', + 'title': 'Just Added', + }, + 'exclusive': { + 'feed_path': 'exclusive', + 'title': 'Exclusive', + }, + 'justhd': { + 'feed_path': 'just_hd', + 'title': 'Just HD', + }, + 'mostpopular': { + 'feed_path': 'most_pop', + 'title': 'Most Popular', + }, + 'moviestudios': { + 'feed_path': 'studios', + 'title': 'Movie Studios', + }, + } + _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P%s)' % '|'.join(_SECTIONS) + _TESTS = [{ + 'url': 'http://trailers.apple.com/#section=justadded', + 'info_dict': { + 'title': 'Just Added', + 'id': 'justadded', + }, + 'playlist_mincount': 80, + }, { + 'url': 'http://trailers.apple.com/#section=exclusive', + 'info_dict': { + 'title': 'Exclusive', + 'id': 'exclusive', + }, + 'playlist_mincount': 80, + }, { + 'url': 'http://trailers.apple.com/#section=justhd', + 'info_dict': { + 'title': 'Just HD', + 'id': 'justhd', + }, + 'playlist_mincount': 80, + }, { + 'url': 'http://trailers.apple.com/#section=mostpopular', + 'info_dict': { + 'title': 'Most Popular', + 'id': 'mostpopular', + }, + 'playlist_mincount': 80, + }, { + 'url': 'http://trailers.apple.com/#section=moviestudios', + 'info_dict': { + 'title': 'Movie Studios', + 'id': 'moviestudios', + }, + 'playlist_mincount': 80, + }] + + def _real_extract(self, url): + section = self._match_id(url) + section_data = self._download_json( + 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'], + section) + entries = [ + self.url_result('http://trailers.apple.com' + e['location']) + for e in section_data] + return self.playlist_result(entries, section, self._SECTIONS[section]['title']) From f10c27b8cb35c72b5f9633956f71f96da72ada31 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Dec 2015 14:05:06 +0100 Subject: [PATCH 0150/1105] release 2015.12.23 --- docs/supportedsites.md | 8 +++----- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 299bc5e72..1a5c7cde9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -23,7 +23,6 @@ - **AdobeTVShow** - **AdobeTVVideo** - **AdultSwim** - - **Aftenposten** - **Aftonbladet** - **AirMozilla** - **AlJazeera** @@ -34,7 +33,8 @@ - **Aparat** - **AppleConnect** - **AppleDaily**: 臺灣蘋果日報 - - **AppleTrailers** + - **appletrailers** + - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** - **ARD:mediathek** @@ -502,8 +502,6 @@ - **SnagFilmsEmbed** - **Snotr** - **Sohu** - - **soompi** - - **soompi:show** - **soundcloud** - **soundcloud:playlist** - **soundcloud:search**: Soundcloud search @@ -627,7 +625,7 @@ - **Vessel** - **Vesti**: Вести.Ru - **Vevo** - - **VGTV**: VGTV and BTTV + - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - **Vice** - **Viddler** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7095033c5..255d64269 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.12.21' +__version__ = '2015.12.23' From 7fe37d8a05609229332d5a156cb9b7cf4bba2790 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 23 Dec 2015 14:48:40 +0100 Subject: [PATCH 0151/1105] [appletrailers] Improve regex for fixing '' tags (#7953) --- youtube_dl/extractor/appletrailers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index ca9a70924..82beed2ce 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -80,7 +80,7 @@ class AppleTrailersIE(InfoExtractor): def fix_html(s): s = re.sub(r'(?s).*?', '', s) - s = re.sub(r'', r'', s) + s = re.sub(r'', r'', s) # The ' in the onClick attributes are not escaped, it couldn't be parsed # like: http://trailers.apple.com/trailers/wb/gravity/ From 747b028412828c66080c7f165b461a7ea490fead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Dec 2015 20:42:36 +0600 Subject: [PATCH 0152/1105] [24video] Fix extraction (Closes #7956) --- youtube_dl/extractor/twentyfourvideo.py | 35 +++++++++---------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index c1ee1decc..cb9e5f1b5 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -64,33 +64,22 @@ class TwentyFourVideoIE(InfoExtractor): r'
(\d+) комментари', webpage, 'comment count', fatal=False)) - formats = [] + # Sets some cookies + self._download_xml( + r'http://www.24video.net/video/xml/%s?mode=init' % video_id, + video_id, 'Downloading init XML') - pc_video = self._download_xml( + video = self._download_xml( 'http://www.24video.net/video/xml/%s?mode=play' % video_id, - video_id, 'Downloading PC video URL').find('.//video') + video_id, 'Downloading video XML').find('.//video') - formats.append({ - 'url': pc_video.attrib['url'], - 'format_id': 'pc', - 'quality': 1, - }) + formats = [{ + 'url': video.attrib['url'], + }] - like_count = int_or_none(pc_video.get('ratingPlus')) - dislike_count = int_or_none(pc_video.get('ratingMinus')) - age_limit = 18 if pc_video.get('adult') == 'true' else 0 - - mobile_video = self._download_xml( - 'http://www.24video.net/video/xml/%s' % video_id, - video_id, 'Downloading mobile video URL').find('.//video') - - formats.append({ - 'url': mobile_video.attrib['url'], - 'format_id': 'mobile', - 'quality': 0, - }) - - self._sort_formats(formats) + like_count = int_or_none(video.get('ratingPlus')) + dislike_count = int_or_none(video.get('ratingMinus')) + age_limit = 18 if video.get('adult') == 'true' else 0 return { 'id': video_id, From 128eb31d90583113083ba1fe329eb4cf42c2989f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Dec 2015 20:49:41 +0600 Subject: [PATCH 0153/1105] [24video] Fix extraction on python 2.6 --- youtube_dl/extractor/twentyfourvideo.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index cb9e5f1b5..68e2277a4 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -5,6 +5,8 @@ from .common import InfoExtractor from ..utils import ( parse_iso8601, int_or_none, + xpath_attr, + xpath_element, ) @@ -69,12 +71,14 @@ class TwentyFourVideoIE(InfoExtractor): r'http://www.24video.net/video/xml/%s?mode=init' % video_id, video_id, 'Downloading init XML') - video = self._download_xml( + video_xml = self._download_xml( 'http://www.24video.net/video/xml/%s?mode=play' % video_id, - video_id, 'Downloading video XML').find('.//video') + video_id, 'Downloading video XML') + + video = xpath_element(video_xml, './/video', 'video', fatal=True) formats = [{ - 'url': video.attrib['url'], + 'url': xpath_attr(video, '', 'url', 'video URL', fatal=True), }] like_count = int_or_none(video.get('ratingPlus')) From be514c856cf9f95fe3e0d45b1df0319a0872b911 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Dec 2015 20:49:52 +0600 Subject: [PATCH 0154/1105] [24video] Fix test --- youtube_dl/extractor/twentyfourvideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py index 68e2277a4..e03e2dbaa 100644 --- a/youtube_dl/extractor/twentyfourvideo.py +++ b/youtube_dl/extractor/twentyfourvideo.py @@ -17,7 +17,7 @@ class TwentyFourVideoIE(InfoExtractor): _TESTS = [ { 'url': 'http://www.24video.net/video/view/1044982', - 'md5': 'd041af8b5b4246ea466226a0d6693345', + 'md5': 'e09fc0901d9eaeedac872f154931deeb', 'info_dict': { 'id': '1044982', 'ext': 'mp4', From dcdc352371115007028632da6ae377d0ad39b62a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Dec 2015 21:13:31 +0600 Subject: [PATCH 0155/1105] [instagram:user] Improve _VALID_URL (Closes #7955) --- youtube_dl/extractor/instagram.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index c158f2064..e5e16ca3b 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -47,7 +47,7 @@ class InstagramIE(InfoExtractor): class InstagramUserIE(InfoExtractor): - _VALID_URL = r'https://instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' IE_DESC = 'Instagram user profile' IE_NAME = 'instagram:user' _TEST = { From 261b4c23c70c7c5dc4fe9fd22cf9e867b7456c40 Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 23 Dec 2015 17:48:37 +0100 Subject: [PATCH 0156/1105] [appletrailers] skip clips with empty url --- youtube_dl/extractor/appletrailers.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 82beed2ce..62ed0c918 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -64,6 +64,12 @@ class AppleTrailersIE(InfoExtractor): }, }, ] + }, { + 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', + 'info_dict': { + 'id': 'blackthorn', + }, + 'playlist_mincount': 2, }, { 'url': 'http://trailers.apple.com/ca/metropole/autrui/', 'only_matching': True, @@ -97,6 +103,9 @@ class AppleTrailersIE(InfoExtractor): trailer_info_json = self._search_regex(self._JSON_RE, on_click, 'trailer info') trailer_info = json.loads(trailer_info_json) + first_url = trailer_info.get('url') + if not first_url: + continue title = trailer_info['title'] video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() thumbnail = li.find('.//img').attrib['src'] @@ -108,7 +117,6 @@ class AppleTrailersIE(InfoExtractor): if m: duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) - first_url = trailer_info['url'] trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') From a8f1d167f6741485b7cbec2cb355315c3774d5bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 23 Dec 2015 17:55:58 +0100 Subject: [PATCH 0157/1105] [arte] Prefer json URLs that contain the video id from the 'vid' parameter in the URL (fixes #7920) --- youtube_dl/extractor/arte.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 2a00da3ee..10301a8ea 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -68,9 +68,13 @@ class ArteTVPlus7IE(InfoExtractor): def _extract_url_info(cls, url): mobj = re.match(cls._VALID_URL, url) lang = mobj.group('lang') - # This is not a real id, it can be for example AJT for the news - # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal - video_id = mobj.group('id') + query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + if 'vid' in query: + video_id = query['vid'][0] + else: + # This is not a real id, it can be for example AJT for the news + # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal + video_id = mobj.group('id') return video_id, lang def _real_extract(self, url): @@ -79,9 +83,15 @@ class ArteTVPlus7IE(InfoExtractor): return self._extract_from_webpage(webpage, video_id, lang) def _extract_from_webpage(self, webpage, video_id, lang): + patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']') + ids = (video_id, '') + # some pages contain multiple videos (like + # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D), + # so we first try to look for json URLs that contain the video id from + # the 'vid' parameter. + patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates] json_url = self._html_search_regex( - [r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'], - webpage, 'json vp url', default=None) + patterns, webpage, 'json vp url', default=None) if not json_url: iframe_url = self._html_search_regex( r']+src=(["\'])(?P.+\bjson_url=.+?)\1', From 2c566d02febb0cf137a8dce8646957beb1415770 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Dec 2015 23:22:47 +0600 Subject: [PATCH 0158/1105] [pbs] Extend PBS station regex (Closes #7964) --- youtube_dl/extractor/pbs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py index 744e4a09a..97e8ffc97 100644 --- a/youtube_dl/extractor/pbs.py +++ b/youtube_dl/extractor/pbs.py @@ -16,7 +16,7 @@ from ..utils import ( class PBSIE(InfoExtractor): _STATIONS = ( - (r'(?:video|www)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/ + (r'(?:video|www|player)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/ (r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/ (r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/ (r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org From fc383f199e73358f88ecf24b7e804dda7400afae Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Wed, 23 Dec 2015 17:35:10 +0100 Subject: [PATCH 0159/1105] Fix typos --- CONTRIBUTING.md | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f3fe0d432..d15267d7e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -28,7 +28,7 @@ So please elaborate on what feature you are requesting, or what bug you want to - How it could be fixed - How your proposed solution would look like -If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. +If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. diff --git a/README.md b/README.md index 7002f45e0..3a4707227 100644 --- a/README.md +++ b/README.md @@ -830,7 +830,7 @@ So please elaborate on what feature you are requesting, or what bug you want to - How it could be fixed - How your proposed solution would look like -If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. +If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. From 96db61ffb83de9d912003a4778e9ce7c4d46e848 Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 23 Dec 2015 22:36:53 +0100 Subject: [PATCH 0160/1105] [theintercept] improve extraction --- youtube_dl/extractor/theintercept.py | 67 ++++++++++------------------ 1 file changed, 24 insertions(+), 43 deletions(-) diff --git a/youtube_dl/extractor/theintercept.py b/youtube_dl/extractor/theintercept.py index b096a28de..8cb3c3669 100644 --- a/youtube_dl/extractor/theintercept.py +++ b/youtube_dl/extractor/theintercept.py @@ -1,24 +1,28 @@ # encoding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor - +from ..compat import compat_str from ..utils import ( + parse_iso8601, + int_or_none, ExtractorError, ) + class TheInterceptIE(InfoExtractor): - _VALID_URL = r'https://theintercept.com/fieldofvision/(?P.+?)/' + _VALID_URL = r'https://theintercept.com/fieldofvision/(?P[^/?#]+)' _TESTS = [{ 'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/', + 'md5': '145f28b41d44aab2f87c0a4ac8ec95bd', 'info_dict': { - 'id': 'thisisacoup-episode-four-surrender-or-die', + 'id': '46214', 'ext': 'mp4', 'title': '#ThisIsACoup – Episode Four: Surrender or Die', - 'upload_date': '20151218', 'description': 'md5:74dd27f0e2fbd50817829f97eaa33140', + 'timestamp': 1450429239, + 'upload_date': '20151218', + 'comment_count': int, } }] @@ -26,43 +30,20 @@ class TheInterceptIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - mobj = re.search(r'initialStoreTree =(?P.+})', webpage) - if mobj is None: - raise ExtractorError('Unable to extract initialStoreTree') - json_data = self._parse_json(mobj.group('json_data'), display_id) + json_data = self._parse_json(self._search_regex( + r'initialStoreTree\s*=\s*(?P{.+})', webpage, + 'initialStoreTree'), display_id) - info = None for post in json_data['resources']['posts'].values(): if post['slug'] == display_id: - info = post - break - if info is None: - raise ExtractorError('Unable to find info for %s'%display_id) - - title = info['title'] - description = info['excerpt'] - upload_date = info['date'][:10].replace('-', '') - video_id = info['fov_videoid'] - creator = ','.join([a['display_name'] for a in info['authors']]) - thumbnail = self._og_search_property('image', webpage) - content_id = thumbnail.split('/')[-1].split('.')[0] - content_url = 'https://content.jwplatform.com/jw6/{content_id}.xml'.format(content_id=content_id) - content = self._download_xml(content_url, video_id) - - formats = [] - for source in content.findall('.//{http://rss.jwpcdn.com/}source'): - if source.attrib['file'].endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats( - source.attrib['file'], video_id, 'mp4', preference=1, m3u8_id='hls')) - - return { - 'creator': creator, - 'description': description, - 'display_id': display_id, - 'formats': formats, - 'id': video_id, - 'id': video_id, - 'thumbnail': thumbnail, - 'title': title, - 'upload_date': upload_date, - } + return { + '_type': 'url_transparent', + 'url': 'jwplatform:%s' % post['fov_videoid'], + 'id': compat_str(post['ID']), + 'display_id': display_id, + 'title': post['title'], + 'description': post.get('excerpt'), + 'timestamp': parse_iso8601(post.get('date')), + 'comment_count': int_or_none(post.get('comments_number')), + } + raise ExtractorError('Unable to find the current post') From 8a609c32fdf8d99f7c868c74d64a05d9a936044d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Dec 2015 20:09:48 +0600 Subject: [PATCH 0161/1105] [chaturbate] Improve error extraction (Closes #7989) --- youtube_dl/extractor/chaturbate.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py index 0b67ba67d..242fba311 100644 --- a/youtube_dl/extractor/chaturbate.py +++ b/youtube_dl/extractor/chaturbate.py @@ -23,6 +23,8 @@ class ChaturbateIE(InfoExtractor): 'only_matching': True, }] + _ROOM_OFFLINE = 'Room is currently offline' + def _real_extract(self, url): video_id = self._match_id(url) @@ -34,9 +36,16 @@ class ChaturbateIE(InfoExtractor): if not m3u8_url: error = self._search_regex( - r']+class=(["\'])desc_span\1[^>]*>(?P[^<]+)', - webpage, 'error', group='error') - raise ExtractorError(error, expected=True) + [r']+class=(["\'])desc_span\1[^>]*>(?P[^<]+)', + r']+id=(["\'])defchat\1[^>]*>\s*

(?P[^<]+)<'], + webpage, 'error', group='error', default=None) + if not error: + if any(p not in webpage for p in ( + self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): + error = self._ROOM_OFFLINE + if error: + raise ExtractorError(error, expected=True) + raise ExtractorError('Unable to find stream URL') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') From e41604227ce14ded0b87385a17490bfa65d35fde Mon Sep 17 00:00:00 2001 From: Boris Wachtmeister Date: Thu, 24 Dec 2015 08:27:25 +0100 Subject: [PATCH 0162/1105] [zdf] expand valid-url pattern for channels The webpage also creates URLs which include additional text that defines the sorting order on the page like "aktuellste" (most current) and "meist-gesehen" (most seen), e.g.: http://www.zdf.de/ZDFmediathek/kanaluebersicht/aktuellste/332 http://www.zdf.de/ZDFmediathek/kanaluebersicht/meist-gesehen/332 --- youtube_dl/extractor/zdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index a795f56b3..b581813d6 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -131,7 +131,7 @@ class ZDFIE(InfoExtractor): class ZDFChannelIE(InfoExtractor): - _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/)(?P[0-9]+)' + _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:(?:aktuellste|meist-gesehen)/)?)(?P[0-9]+)' _TEST = { 'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', 'info_dict': { From 67ba388efb5a6fa925ba3472697484553f6ddcc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Dec 2015 20:42:29 +0600 Subject: [PATCH 0163/1105] [zdf:channel] Relax _VALID_URL --- youtube_dl/extractor/zdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index b581813d6..d852ffd07 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -131,7 +131,7 @@ class ZDFIE(InfoExtractor): class ZDFChannelIE(InfoExtractor): - _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:(?:aktuellste|meist-gesehen)/)?)(?P[0-9]+)' + _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:[^/]+/)?)(?P[0-9]+)' _TEST = { 'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', 'info_dict': { From c24044635b6e7f67274899e7c5e7a5efcce947af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Dec 2015 20:44:49 +0600 Subject: [PATCH 0164/1105] [zdf:channel] Add more tests --- youtube_dl/extractor/zdf.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index d852ffd07..9a3331a69 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -132,13 +132,22 @@ class ZDFIE(InfoExtractor): class ZDFChannelIE(InfoExtractor): _VALID_URL = r'(?:zdf:topic:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/.*kanaluebersicht/(?:[^/]+/)?)(?P[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.zdf.de/ZDFmediathek#/kanaluebersicht/1586442/sendung/Titanic', 'info_dict': { 'id': '1586442', }, 'playlist_count': 3, - } + }, { + 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/aktuellste/332', + 'only_matching': True, + }, { + 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/meist-gesehen/332', + 'only_matching': True, + }, { + 'url': 'http://www.zdf.de/ZDFmediathek/kanaluebersicht/_/1798716?bc=nrt;nrm?flash=off', + 'only_matching': True, + }] _PAGE_SIZE = 50 def _fetch_page(self, channel_id, page): From fb8e402ad252bebc54c36c5acca1b7ef05416c9b Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 25 Dec 2015 01:59:56 +0100 Subject: [PATCH 0165/1105] [hotstar] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/hotstar.py | 79 ++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 youtube_dl/extractor/hotstar.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 365c0b86f..dd7e23d80 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -253,6 +253,7 @@ from .history import HistoryIE from .hitbox import HitboxIE, HitboxLiveIE from .hornbunny import HornBunnyIE from .hotnewhiphop import HotNewHipHopIE +from .hotstar import HotStarIE from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE from .huffpost import HuffPostIE diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py new file mode 100644 index 000000000..05d27e75d --- /dev/null +++ b/youtube_dl/extractor/hotstar.py @@ -0,0 +1,79 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + determine_ext, + int_or_none, +) + + +class HotStarIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P\d{10})' + _TEST = { + 'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273', + 'info_dict': { + 'id': '1000076273', + 'ext': 'mp4', + 'title': 'On Air With AIB - English', + 'description': 'md5:c957d8868e9bc793ccb813691cc4c434', + 'timestamp': 1447227000, + 'upload_date': '20151111', + 'duration': 381, + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + } + + _GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s' + _GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s' + + def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True): + json_data = super(HotStarIE, self)._download_json(url_or_request, video_id, note, fatal=fatal) + if json_data['resultCode'] != 'OK': + if fatal: + raise ExtractorError(json_data['errorDescription']) + return None + return json_data['resultObj'] + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + self._GET_CONTENT_TEMPLATE % video_id, + video_id)['contentInfo'][0] + + formats = [] + # PCTV for extracting f4m manifest + for f in ('TABLET',): + format_data = self._download_json( + self._GET_CDN_TEMPLATE % (f, video_id, 'VOD'), + video_id, 'Downloading %s JSON metadata' % f, fatal=False) + if format_data: + format_url = format_data['src'] + ext = determine_ext(format_url) + if ext == 'm3u8': + m3u8_formats = self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) + elif ext == 'f4m': + # produce broken files + continue + else: + formats.append({ + 'url': format_url, + 'width': int_or_none(format_data.get('width')), + 'height': int_or_none(format_data.get('height')), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_data['episodeTitle'], + 'description': video_data.get('description'), + 'duration': int_or_none(video_data.get('duration')), + 'timestamp': int_or_none(video_data.get('broadcastDate')), + 'formats': formats, + } From 06d5556dface3901a86419b6b125ef377116448f Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 25 Dec 2015 15:38:12 +0100 Subject: [PATCH 0166/1105] [rai] improve extraction --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/rai.py | 203 ++++++++++++++++++------------- 2 files changed, 121 insertions(+), 87 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dd7e23d80..4784db70b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -528,7 +528,10 @@ from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE -from .rai import RaiIE +from .rai import ( + RaiTVIE, + RaiIE, +) from .rbmaradio import RBMARadioIE from .rds import RDSIE from .redtube import RedTubeIE diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 7ff1d06c4..14f1ccbb4 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -8,20 +8,24 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + ExtractorError, + determine_ext, parse_duration, unified_strdate, + int_or_none, + xpath_text, ) -class RaiIE(InfoExtractor): - _VALID_URL = r'(?P(?Phttp://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it))/dl/.+?-(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' +class RaiTVIE(InfoExtractor): + _VALID_URL = r'http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/(?:[^/]+/)+media/.+?-(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' _TESTS = [ { 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', - 'md5': 'c064c0b2d09c278fb293116ef5d0a32d', + 'md5': '96382709b61dd64a6b88e0f791e6df4c', 'info_dict': { 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Report del 07/04/2014', 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', 'upload_date': '20140407', @@ -30,16 +34,14 @@ class RaiIE(InfoExtractor): }, { 'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', - 'md5': '8bb9c151924ce241b74dd52ef29ceafa', + 'md5': 'd9751b78eac9710d62c2447b224dea39', 'info_dict': { 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'TG PRIMO TEMPO', - 'description': '', 'upload_date': '20140612', 'duration': 1758, }, - 'skip': 'Error 404', }, { 'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', @@ -55,110 +57,106 @@ class RaiIE(InfoExtractor): }, { 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html', - 'md5': '35694f062977fe6619943f08ed935730', 'info_dict': { 'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132', 'ext': 'mp4', 'title': 'Alluvione in Sardegna e dissesto idrogeologico', 'description': 'Edizione delle ore 20:30 ', - } + }, + 'skip': 'invalid urls', }, { 'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html', - 'md5': '02b64456f7cc09f96ff14e7dd489017e', + 'md5': '496ab63e420574447f70d02578333437', 'info_dict': { 'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6', 'ext': 'flv', 'title': 'Il Candidato - Primo episodio: "Le Primarie"', - 'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!', - 'uploader': 'RaiTre', + 'description': 'md5:364b604f7db50594678f483353164fb8', + 'upload_date': '20140923', + 'duration': 386, } }, - { - 'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', - 'md5': '037104d2c14132887e5e4cf114569214', - 'info_dict': { - 'id': '0c7a664b-d0f4-4b2c-8835-3f82e46f433e', - 'ext': 'flv', - 'title': 'Il pacco', - 'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', - 'uploader': 'RaiTre', - 'upload_date': '20141221', - }, - } ] - def _extract_relinker_url(self, webpage): - return self._proto_relative_url(self._search_regex( - [r'name="videourl" content="([^"]+)"', r'var\s+videoURL(?:_MP4)?\s*=\s*"([^"]+)"'], - webpage, 'relinker url', default=None)) - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - host = mobj.group('host') + video_id = self._match_id(url) + media = self._download_json( + 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % video_id, + video_id, 'Downloading video JSON') - webpage = self._download_webpage(url, video_id) + thumbnails = [] + for image_type in ('image', 'image_medium', 'image_300'): + thumbnail_url = media.get(image_type) + if thumbnail_url: + thumbnails.append({ + 'url': thumbnail_url, + }) - relinker_url = self._extract_relinker_url(webpage) + subtitles = [] + formats = [] + media_type = media['type'] + if 'Audio' in media_type: + formats.append({ + 'format_id': media.get('formatoAudio'), + 'url': media['audioUrl'], + 'ext': media.get('formatoAudio'), + }) + elif 'Video' in media_type: + def fix_xml(xml): + return xml.replace(' tag elementi', '').replace('>/', ']+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"', - r'drawMediaRaiTV\(["\'](.+?)["\']'], - webpage, 'iframe') - if not iframe_url.startswith('http'): - iframe_url = compat_urlparse.urljoin(url, iframe_url) - webpage = self._download_webpage( - iframe_url, video_id) - relinker_url = self._extract_relinker_url(webpage) + relinker = self._download_xml( + media['mediaUri'] + '&output=43', video_id, transform_source=fix_xml) - relinker = self._download_json( - '%s&output=47' % relinker_url, video_id) + has_subtitle = False - media_url = relinker['video'][0] - ct = relinker.get('ct') - if ct == 'f4m': - formats = self._extract_f4m_formats( - media_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id) + for element in relinker.findall('element'): + media_url = xpath_text(element, 'url') + ext = determine_ext(media_url) + content_type = xpath_text(element, 'content-type') + if ext == 'm3u8': + m3u8_formats = self._extract_m3u8_formats( + media_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', + fatal=None) + if m3u8_formats: + formats.extend(m3u8_formats) + elif ext == 'f4m': + f4m_formats = self._extract_f4m_formats( + media_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', + video_id, f4m_id='hds', fatal=None) + if f4m_formats: + formats.extend(f4m_formats) + elif ext == 'stl': + has_subtitle = True + elif content_type.startswith('video/'): + bitrate = int_or_none(xpath_text(element, 'bitrate')) + formats.append({ + 'url': media_url, + 'tbr': bitrate if bitrate > 0 else None, + 'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http', + }) + elif content_type.startswith('image/'): + thumbnails.append({ + 'url': media_url, + }) + + self._sort_formats(formats) + + if has_subtitle: + webpage = self._download_webpage(url, video_id) + subtitles = self._get_subtitles(video_id, webpage) else: - formats = [{ - 'url': media_url, - 'format_id': ct, - }] - - json_link = self._html_search_meta( - 'jsonlink', webpage, 'JSON link', default=None) - if json_link: - media = self._download_json( - host + json_link, video_id, 'Downloading video JSON') - title = media.get('name') - description = media.get('desc') - thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image') - duration = parse_duration(media.get('length')) - uploader = media.get('author') - upload_date = unified_strdate(media.get('date')) - else: - title = (self._search_regex( - r'var\s+videoTitolo\s*=\s*"(.+?)";', - webpage, 'title', default=None) or self._og_search_title(webpage)).replace('\\"', '"') - description = self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage) - duration = None - uploader = self._html_search_meta('Editore', webpage, 'uploader') - upload_date = unified_strdate(self._html_search_meta( - 'item-date', webpage, 'upload date', default=None)) - - subtitles = self.extract_subtitles(video_id, webpage) + raise ExtractorError('not a media file') return { 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'upload_date': upload_date, - 'duration': duration, + 'title': media['name'], + 'description': media.get('desc'), + 'thumbnails': thumbnails, + 'uploader': media.get('author'), + 'upload_date': unified_strdate(media.get('date')), + 'duration': parse_duration(media.get('length')), 'formats': formats, 'subtitles': subtitles, } @@ -177,3 +175,36 @@ class RaiIE(InfoExtractor): 'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions), }] return subtitles + + +class RaiIE(InfoExtractor): + _VALID_URL = r'http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html' + _TESTS = [ + { + 'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', + 'md5': 'e0e7a8a131e249d1aa0ebf270d1d8db7', + 'info_dict': { + 'id': '59d69d28-6bb6-409d-a4b5-ed44096560af', + 'ext': 'flv', + 'title': 'Il pacco', + 'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', + 'upload_date': '20141221', + }, + } + ] + + @classmethod + def suitable(cls, url): + return False if RaiTVIE.suitable(url) else super(RaiIE, cls).suitable(url) + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + iframe_url = self._search_regex( + [r']+src="([^"]*/dl/[^"]+\?iframe\b[^"]*)"', + r'drawMediaRaiTV\(["\'](.+?)["\']'], + webpage, 'iframe') + if not iframe_url.startswith('http'): + iframe_url = compat_urlparse.urljoin(url, iframe_url) + return self.url_result(iframe_url) From 6418b2439b81ceec77b50879b4d9d395893d8eba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 25 Dec 2015 21:14:00 +0600 Subject: [PATCH 0167/1105] [rutv] Fix extraction (Closes #8004) --- youtube_dl/extractor/rutv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index d9df06861..f7fe1fece 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -131,7 +131,7 @@ class RUTVIE(InfoExtractor): is_live = video_type == 'live' json_data = self._download_json( - 'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if is_live else '', video_id), + 'http://player.rutv.ru/iframe/data%s/id/%s' % ('live' if is_live else 'video', video_id), video_id, 'Downloading JSON') if json_data['errors']: From 1fc0b47fdf9367aa71e6b81076666f137e68f637 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 25 Dec 2015 17:37:50 +0100 Subject: [PATCH 0168/1105] [srmediathek] improve extraction --- youtube_dl/extractor/ard.py | 14 ++++++--- youtube_dl/extractor/srmediathek.py | 49 ++++++++++++++++++----------- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 73be6d204..687eb9f82 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -110,13 +110,19 @@ class ARDMediathekIE(InfoExtractor): server = stream.get('_server') for stream_url in stream_urls: ext = determine_ext(stream_url) + if quality != 'auto' and ext in ('f4m', 'm3u8'): + continue if ext == 'f4m': - formats.extend(self._extract_f4m_formats( + f4m_formats = self._extract_f4m_formats( stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', - video_id, preference=-1, f4m_id='hds')) + video_id, preference=-1, f4m_id='hds', fatal=False) + if f4m_formats: + formats.extend(f4m_formats) elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - stream_url, video_id, 'mp4', preference=1, m3u8_id='hls')) + m3u8_formats = self._extract_m3u8_formats( + stream_url, video_id, 'mp4', preference=1, m3u8_id='hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) else: if server and server.startswith('rtmp'): f = { diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py index 5d583c720..74d01183f 100644 --- a/youtube_dl/extractor/srmediathek.py +++ b/youtube_dl/extractor/srmediathek.py @@ -1,17 +1,18 @@ # encoding: utf-8 from __future__ import unicode_literals -import json - -from .common import InfoExtractor -from ..utils import js_to_json +from .ard import ARDMediathekIE +from ..utils import ( + ExtractorError, + get_element_by_attribute, +) -class SRMediathekIE(InfoExtractor): +class SRMediathekIE(ARDMediathekIE): IE_DESC = 'Saarländischer Rundfunk' _VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455', 'info_dict': { 'id': '28455', @@ -20,24 +21,36 @@ class SRMediathekIE(InfoExtractor): 'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ', 'thumbnail': 're:^https?://.*\.jpg$', }, - } + 'skip': 'no longer available', + }, { + 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=37682', + 'info_dict': { + 'id': '37682', + 'ext': 'mp4', + 'title': 'Love, Cakes and Rock\'n\'Roll', + 'description': 'md5:18bf9763631c7d326c22603681e1123d', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest'] + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - murls = json.loads(js_to_json(self._search_regex( - r'var mediaURLs\s*=\s*(.*?);\n', webpage, 'video URLs'))) - formats = [{'url': murl} for murl in murls] - self._sort_formats(formats) + if '>Der gewünschte Beitrag ist leider nicht mehr verfügbar.<' in webpage: + raise ExtractorError('Video %s is no longer available' % video_id, expected=True) - title = json.loads(js_to_json(self._search_regex( - r'var mediaTitles\s*=\s*(.*?);\n', webpage, 'title')))[0] - - return { + media_collection_url = self._search_regex( + r'data-mediacollection-ardplayer="([^"]+)"', webpage, 'media collection url') + info = self._extract_media_info(media_collection_url, webpage, video_id) + info.update({ 'id': video_id, - 'title': title, - 'formats': formats, + 'title': get_element_by_attribute('class', 'ardplayer-title', webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), - } + }) + return info From 607d65fbceebe7df6d45bb02d6eab378896f099b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Dec 2015 03:17:56 +0600 Subject: [PATCH 0169/1105] [ign] flake8 --- youtube_dl/extractor/ign.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index 6160f03d1..a2e18c8a7 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -109,7 +109,7 @@ class IGNIE(InfoExtractor): if page_type != 'video': multiple_urls = re.findall( r']*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]', - webpage) + webpage) if multiple_urls: entries = [self.url_result(u, ie='IGN') for u in multiple_urls] return { @@ -209,7 +209,7 @@ class PCMagIE(IGNIE): 'upload_date': '20150106', 'uploader_id': 'cozzipix@gmail.com', } - },{ + }, { 'url': 'http://www.pcmag.com/article2/0,2817,2470156,00.asp', 'md5': '94130c1ca07ba0adb6088350681f16c1', 'info_dict': { From 85367c3a478241a9d47be76ff5418b512cd40eed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Sat, 28 Nov 2015 23:14:38 +0200 Subject: [PATCH 0170/1105] [lrt] fix duration parsing --- youtube_dl/extractor/lrt.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index e3236f7b5..24a9a4c91 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -37,8 +37,7 @@ class LRTIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage) duration = parse_duration(self._search_regex( - r"'duration':\s*'([^']+)',", webpage, - 'duration', fatal=False, default=None)) + r"var record_len = '([0-9]+:[0-9]+:[0-9]+)';", webpage, 'record_len', fatal=False, default=None)) formats = [] for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage): From 339b1944e7c8c28d1fc7cc8721af73fd23937a1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Sun, 29 Nov 2015 02:58:52 +0200 Subject: [PATCH 0171/1105] [lrt] fix the rest of extractor Closes #7690. --- youtube_dl/extractor/lrt.py | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index 24a9a4c91..42e768ce7 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -1,12 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( - determine_ext, - js_to_json, parse_duration, remove_end, ) @@ -25,7 +21,7 @@ class LRTIE(InfoExtractor): 'duration': 1783, }, 'params': { - 'skip_download': True, # HLS download + 'skip_download': True, # m3u8 download }, } @@ -39,23 +35,8 @@ class LRTIE(InfoExtractor): duration = parse_duration(self._search_regex( r"var record_len = '([0-9]+:[0-9]+:[0-9]+)';", webpage, 'record_len', fatal=False, default=None)) - formats = [] - for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage): - data = self._parse_json(js, video_id, transform_source=js_to_json) - if 'provider' not in data: - continue - if data['provider'] == 'rtmp': - formats.append({ - 'format_id': 'rtmp', - 'ext': determine_ext(data['file']), - 'url': data['streamer'], - 'play_path': 'mp4:%s' % data['file'], - 'preference': -1, - 'rtmp_real_time': True, - }) - else: - formats.extend( - self._extract_m3u8_formats(data['file'], video_id, 'mp4')) + link = self._search_regex(r'file: "(.*)" \+ location\.hash\.substring\(1\)', webpage, 'link to m3u8') + formats = self._extract_m3u8_formats(link, video_id, "mp4") return { 'id': video_id, From f7e1d82d407ccb0be23999ba8af27ae2549d39aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Dec 2015 12:16:55 +0600 Subject: [PATCH 0172/1105] [lrt] Improve --- youtube_dl/extractor/lrt.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index 42e768ce7..f8d27598a 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -30,13 +30,16 @@ class LRTIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = remove_end(self._og_search_title(webpage), ' - LRT') + m3u8_url = self._search_regex( + r'file\s*:\s*(["\'])(?P.+?)\1\s*\+\s*location\.hash\.substring\(1\)', + webpage, 'm3u8 url', group='url') + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage) duration = parse_duration(self._search_regex( - r"var record_len = '([0-9]+:[0-9]+:[0-9]+)';", webpage, 'record_len', fatal=False, default=None)) - - link = self._search_regex(r'file: "(.*)" \+ location\.hash\.substring\(1\)', webpage, 'link to m3u8') - formats = self._extract_m3u8_formats(link, video_id, "mp4") + r'var\s+record_len\s*=\s*(["\'])(?P[0-9]+:[0-9]+:[0-9]+)\1', + webpage, 'duration', default=None, group='duration')) return { 'id': video_id, From 15aad84dc5ea9d60ffd4a1fd5d67f90afd11b4c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Dec 2015 12:26:48 +0600 Subject: [PATCH 0173/1105] [lrt] Extract counters --- youtube_dl/extractor/lrt.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index f8d27598a..863efd896 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + int_or_none, parse_duration, remove_end, ) @@ -19,6 +20,8 @@ class LRTIE(InfoExtractor): 'title': 'Septynios Kauno dienos', 'description': 'md5:24d84534c7dc76581e59f5689462411a', 'duration': 1783, + 'view_count': int, + 'like_count': int, }, 'params': { 'skip_download': True, # m3u8 download @@ -41,6 +44,13 @@ class LRTIE(InfoExtractor): r'var\s+record_len\s*=\s*(["\'])(?P[0-9]+:[0-9]+:[0-9]+)\1', webpage, 'duration', default=None, group='duration')) + view_count = int_or_none(self._html_search_regex( + r']+class=(["\']).*?record-desc-seen.*?\1[^>]*>(?P.+?)

', + webpage, 'view count', fatal=False, group='count')) + like_count = int_or_none(self._search_regex( + r']+id=(["\'])flikesCount.*?\1>(?P\d+)<', + webpage, 'like count', fatal=False, group='count')) + return { 'id': video_id, 'title': title, @@ -48,4 +58,6 @@ class LRTIE(InfoExtractor): 'thumbnail': thumbnail, 'description': description, 'duration': duration, + 'view_count': view_count, + 'like_count': like_count, } From 6afe044b51e99a3a0e638492e118634a2ca3cdad Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 27 Dec 2015 09:56:15 +0100 Subject: [PATCH 0174/1105] [dcn] improve extraction --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/dcn.py | 148 ++++++++++++++----------------- 2 files changed, 67 insertions(+), 83 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 677c75564..abf36caf3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -119,7 +119,7 @@ from .dailymotion import ( from .daum import DaumIE from .dbtv import DBTVIE from .dcn import ( - DCNGeneralIE, + DCNIE, DCNVideoIE, DCNLiveIE, DCNSeasonIE, diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index a9a5e94f5..3857ba334 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -17,24 +17,61 @@ from ..utils import ( ) -class DCNGeneralIE(InfoExtractor): +class DCNIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P\d+)/[^/]+(?:/(?P\d+)/(?P\d+))?' def _real_extract(self, url): show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() - url = '' - ie_key = '' if video_id and int(video_id) > 0: - return self.url_result('http://www.dcndigital.ae/#/media/%s' % video_id, 'DCNVideo') + return self.url_result( + 'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo') + elif season_id and int(season_id) > 0: + return self.url_result(smuggle_url( + 'http://www.dcndigital.ae/program/season/%s' % season_id, + {'show_id': show_id}), 'DCNSeason') else: - if season_id and int(season_id) > 0: - url = smuggle_url('http://www.dcndigital.ae/#/program/season/%s' % season_id, {'show_id': show_id}) - else: - url = 'http://www.dcndigital.ae/#/program/%s' % show_id - return self.url_result(url, 'DCNSeason') + return self.url_result( + 'http://www.dcndigital.ae/program/%s' % show_id, 'DCNSeason') -class DCNVideoIE(InfoExtractor): +class DCNBaseIE(InfoExtractor): + def _extract_video_info(self, video_data, video_id, is_live): + title = video_data.get('title_en') or video_data['title_ar'] + img = video_data.get('img') + thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None + duration = int_or_none(video_data.get('duration')) + description = video_data.get('description_en') or video_data.get('description_ar') + timestamp = parse_iso8601(video_data.get('create_time'), ' ') + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'is_live': is_live, + } + + def _extract_video_formats(self, webpage, video_id, entry_protocol): + m3u8_url = self._html_search_regex( + r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url') + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls') + + rtsp_url = self._search_regex( + r']+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) + if rtsp_url: + formats.append({ + 'url': rtsp_url, + 'format_id': 'rtsp', + }) + + self._sort_formats(formats) + return formats + + +class DCNVideoIE(DCNBaseIE): IE_NAME = 'dcn:video' _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P\d+)' _TEST = { @@ -45,7 +82,6 @@ class DCNVideoIE(InfoExtractor): 'ext': 'mp4', 'title': 'رحلة العمر : الحلقة 1', 'description': 'md5:0156e935d870acb8ef0a66d24070c6d6', - 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 2041, 'timestamp': 1227504126, 'upload_date': '20081124', @@ -62,51 +98,23 @@ class DCNVideoIE(InfoExtractor): request = compat_urllib_request.Request( 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, headers={'Origin': 'http://www.dcndigital.ae'}) - - video = self._download_json(request, video_id) - title = video.get('title_en') or video['title_ar'] - img = video.get('img') - thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None - duration = int_or_none(video.get('duration')) - description = video.get('description_en') or video.get('description_ar') - timestamp = parse_iso8601(video.get('create_time') or video.get('update_time'), ' ') + video_data = self._download_json(request, video_id) + info = self._extract_video_info(video_data, video_id, False) webpage = self._download_webpage( - 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' - + compat_urllib_parse.urlencode({ - 'id': video['id'], - 'user_id': video['user_id'], - 'signature': video['signature'], + 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + + compat_urllib_parse.urlencode({ + 'id': video_data['id'], + 'user_id': video_data['user_id'], + 'signature': video_data['signature'], 'countries': 'Q0M=', 'filter': 'DENY', }), video_id) - - m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url') - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') - - rtsp_url = self._search_regex( - r']+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) - if rtsp_url: - formats.append({ - 'url': rtsp_url, - 'format_id': 'rtsp', - }) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'timestamp': timestamp, - 'formats': formats, - } + info['formats'] = self._extract_video_formats(webpage, video_id, 'm3u8_native') + return info -class DCNLiveIE(InfoExtractor): +class DCNLiveIE(DCNBaseIE): IE_NAME = 'dcn:live' _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P\d+)' _TEST = { @@ -132,45 +140,20 @@ class DCNLiveIE(InfoExtractor): 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, headers={'Origin': 'http://www.dcndigital.ae'}) - channel = self._download_json(request, channel_id) - title = channel.get('title_en') or channel['title_ar'] - img = channel.get('thumbnail') - thumbnail = 'http://admin.mangomolo.com/analytics/%s' % img if img else None - description = channel.get('description_en') or channel.get('description_ar') - timestamp = parse_iso8601(channel.get('create_time') or channel.get('update_time'), ' ') + channel_data = self._download_json(request, channel_id) + info = self._extract_video_info(channel_data, channel_id, True) webpage = self._download_webpage( 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse.urlencode({ - 'id': base64.b64encode(channel['user_id'].encode()).decode(), - 'channelid': base64.b64encode(channel['id'].encode()).decode(), - 'signature': channel['signature'], + 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), + 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), + 'signature': channel_data['signature'], 'countries': 'Q0M=', 'filter': 'DENY', }), channel_id) - - m3u8_url = self._html_search_regex(r'file:\s*"([^"]+)', webpage, 'm3u8 url') - formats = self._extract_m3u8_formats( - m3u8_url, channel_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') - - rtsp_url = self._search_regex( - r']+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) - if rtsp_url: - formats.append({ - 'url': rtsp_url, - 'format_id': 'rtsp', - }) - - self._sort_formats(formats) - - return { - 'id': channel_id, - 'title': self._live_title(title), - 'description': description, - 'thumbnail': thumbnail, - 'formats': formats, - 'is_live': True, - } + info['formats'] = self._extract_video_formats(webpage, channel_id, 'm3u8') + return info class DCNSeasonIE(InfoExtractor): @@ -218,6 +201,7 @@ class DCNSeasonIE(InfoExtractor): entries = [] for video in show['videos']: - entries.append(self.url_result('http://www.dcndigital.ae/#/media/%s' % video['id'], 'DCNVideo')) + entries.append(self.url_result( + 'http://www.dcndigital.ae/media/%s' % video['id'], 'DCNVideo')) return self.playlist_result(entries, season_id, title) From 608cc3b85cbe5acc045274c22e3fdaca08b90d45 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 27 Dec 2015 10:19:44 +0100 Subject: [PATCH 0175/1105] [kaltura] add referrer to m3u8 url --- youtube_dl/extractor/kaltura.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 353e960b0..4807c8110 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -160,10 +160,11 @@ class KalturaIE(InfoExtractor): 'width': int_or_none(f.get('width')), 'url': video_url, }) - + m3u8_url = info['dataUrl'].replace('format/url', 'format/applehttp') + if referrer: + m3u8_url += '?referrer=%s' % referrer m3u8_formats = self._extract_m3u8_formats( - info['dataUrl'].replace('format/url', 'format/applehttp'), - entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) if m3u8_formats: formats.extend(m3u8_formats) From 5b025168c73e00a45f1da857694835c52b73f0fa Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 27 Dec 2015 12:57:39 +0100 Subject: [PATCH 0176/1105] [livestream] improve extraction - split long lines - use m3u8 entry protocol for live streams - extend _VALID_URL regex for livestream original - extract livestream original live streams --- youtube_dl/extractor/livestream.py | 114 +++++++++++++++++++---------- 1 file changed, 76 insertions(+), 38 deletions(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index b95c23c8c..11168fc4d 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -62,7 +62,8 @@ class LivestreamIE(InfoExtractor): _API_URL_TEMPLATE = 'http://livestream.com/api/accounts/%s/events/%s' def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): - base_ele = find_xpath_attr(smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase') + base_ele = find_xpath_attr( + smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase') base = base_ele.get('content') if base_ele else 'http://livestreamvod-f.akamaihd.net/' formats = [] @@ -96,7 +97,8 @@ class LivestreamIE(InfoExtractor): video_url = video_data.get(key) if video_url: ext = determine_ext(video_url) - bitrate = int_or_none(self._search_regex(r'(\d+)\.%s' % ext, video_url, 'bitrate', default=None)) + bitrate = int_or_none(self._search_regex( + r'(\d+)\.%s' % ext, video_url, 'bitrate', default=None)) formats.append({ 'url': video_url, 'format_id': format_id, @@ -119,7 +121,8 @@ class LivestreamIE(InfoExtractor): f4m_url = video_data.get('f4m_url') if f4m_url: - f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False) + f4m_formats = self._extract_f4m_formats( + f4m_url, video_id, f4m_id='hds', fatal=False) if f4m_formats: formats.extend(f4m_formats) self._sort_formats(formats) @@ -157,10 +160,11 @@ class LivestreamIE(InfoExtractor): if smil_formats: formats.extend(smil_formats) + entry_protocol = 'm3u8' if is_live else 'm3u8_native' m3u8_url = stream_info.get('m3u8_url') if m3u8_url: m3u8_formats = self._extract_m3u8_formats( - m3u8_url, broadcast_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + m3u8_url, broadcast_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False) if m3u8_formats: formats.extend(m3u8_formats) @@ -197,7 +201,8 @@ class LivestreamIE(InfoExtractor): else: info_url = '{root}?&id={id}&newer=-1&type=video'.format( root=feed_root_url, id=last_video) - videos_info = self._download_json(info_url, event_id, 'Downloading page {0}'.format(i))['data'] + videos_info = self._download_json( + info_url, event_id, 'Downloading page {0}'.format(i))['data'] videos_info = [v['data'] for v in videos_info if v['type'] == 'video'] if not videos_info: break @@ -215,7 +220,8 @@ class LivestreamIE(InfoExtractor): account = mobj.group('account_id') or mobj.group('account_name') api_url = self._API_URL_TEMPLATE % (account, event) if video_id: - video_data = self._download_json(api_url + '/videos/%s' % video_id, video_id) + video_data = self._download_json( + api_url + '/videos/%s' % video_id, video_id) return self._extract_video_info(video_data) else: event_data = self._download_json(api_url, video_id) @@ -226,8 +232,8 @@ class LivestreamIE(InfoExtractor): class LivestreamOriginalIE(InfoExtractor): IE_NAME = 'livestream:original' _VALID_URL = r'''(?x)https?://original\.livestream\.com/ - (?P[^/]+)/(?Pvideo|folder) - (?:\?.*?Id=|/)(?P.*?)(&|$) + (?P[^/\?#]+)(?:/(?Pvideo|folder) + (?:(?:\?.*?Id=|/)(?P.*?)(&|$))?)? ''' _TESTS = [{ 'url': 'http://original.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', @@ -244,52 +250,61 @@ class LivestreamOriginalIE(InfoExtractor): 'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3', }, 'playlist_mincount': 4, + }, { + # live stream + 'url': 'http://www.livestream.com/znsbahamas', + 'only_matching': True, }] - def _extract_video(self, user, video_id): - api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) - + def _extract_video_info(self, user, video_id): + api_url = 'http://x%sx.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id=%s' % (user, video_id) info = self._download_xml(api_url, video_id) - # this url is used on mobile devices - stream_url = 'http://x{0}x.api.channel.livestream.com/3.0/getstream.json?id={1}'.format(user, video_id) - stream_info = self._download_json(stream_url, video_id) - is_live = stream_info.get('isLive') + item = info.find('channel').find('item') + title = xpath_text(item, 'title') media_ns = {'media': 'http://search.yahoo.com/mrss'} - thumbnail_url = xpath_attr(item, xpath_with_ns('media:thumbnail', media_ns), 'url') - duration = float_or_none(xpath_attr(item, xpath_with_ns('media:content', media_ns), 'duration')) + thumbnail_url = xpath_attr( + item, xpath_with_ns('media:thumbnail', media_ns), 'url') + duration = float_or_none(xpath_attr( + item, xpath_with_ns('media:content', media_ns), 'duration')) ls_ns = {'ls': 'http://api.channel.livestream.com/2.0'} - view_count = int_or_none(xpath_text(item, xpath_with_ns('ls:viewsCount', ls_ns))) + view_count = int_or_none(xpath_text( + item, xpath_with_ns('ls:viewsCount', ls_ns))) - formats = [{ - 'url': stream_info['progressiveUrl'], - 'format_id': 'http', - }] + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail_url, + 'duration': duration, + 'view_count': view_count, + } - m3u8_url = stream_info.get('httpUrl') + def _extract_video_formats(self, video_data, video_id, entry_protocol): + formats = [] + + progressive_url = video_data.get('progressiveUrl') + if progressive_url: + formats.append({ + 'url': progressive_url, + 'format_id': 'http', + }) + + m3u8_url = video_data.get('httpUrl') if m3u8_url: m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False) if m3u8_formats: formats.extend(m3u8_formats) - rtsp_url = stream_info.get('rtspUrl') + rtsp_url = video_data.get('rtspUrl') if rtsp_url: formats.append({ 'url': rtsp_url, 'format_id': 'rtsp', }) - self._sort_formats(formats) - return { - 'id': video_id, - 'title': self._live_title(xpath_text(item, 'title')) if is_live else xpath_text(item, 'title'), - 'formats': formats, - 'thumbnail': thumbnail_url, - 'duration': duration, - 'view_count': view_count, - 'is_live': is_live, - } + self._sort_formats(formats) + return formats def _extract_folder(self, url, folder_id): webpage = self._download_webpage(url, folder_id) @@ -308,13 +323,36 @@ class LivestreamOriginalIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - id = mobj.group('id') user = mobj.group('user') url_type = mobj.group('type') + content_id = mobj.group('id') if url_type == 'folder': - return self._extract_folder(url, id) + return self._extract_folder(url, content_id) else: - return self._extract_video(user, id) + # this url is used on mobile devices + stream_url = 'http://x%sx.api.channel.livestream.com/3.0/getstream.json' % user + info = {} + if content_id: + stream_url += '?id=%s' % content_id + info = self._extract_video_info(user, content_id) + else: + content_id = user + webpage = self._download_webpage(url, content_id) + info = { + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._search_regex(r'channelLogo.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None), + } + video_data = self._download_json(stream_url, content_id) + is_live = video_data.get('isLive') + entry_protocol = 'm3u8' if is_live else 'm3u8_native' + info.update({ + 'id': content_id, + 'title': self._live_title(info['title']) if is_live else info['title'], + 'formats': self._extract_video_formats(video_data, content_id, entry_protocol), + 'is_live': is_live, + }) + return info # The server doesn't support HEAD request, the generic extractor can't detect From 9f610f3a9e0e7536fe9c0ca167ea084aa2cd54df Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 27 Dec 2015 13:11:53 +0100 Subject: [PATCH 0177/1105] [sportdeutschland] Do not abort if meta info is missing This fixes http://sportdeutschland.tv/badminton/yonex-copenhagen-masters-2015 . No testcase though since the event will be over by 2016. --- youtube_dl/extractor/sportdeutschland.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py index ebb75f059..a9927f6e2 100644 --- a/youtube_dl/extractor/sportdeutschland.py +++ b/youtube_dl/extractor/sportdeutschland.py @@ -70,10 +70,12 @@ class SportDeutschlandIE(InfoExtractor): smil_doc = self._download_xml( smil_url, video_id, note='Downloading SMIL metadata') - base_url = smil_doc.find('./head/meta').attrib['base'] + base_url_el = smil_doc.find('./head/meta') + if base_url_el: + base_url = base_url_el.attrib['base'] formats.extend([{ 'format_id': 'rmtp', - 'url': base_url, + 'url': base_url if base_url_el else n.attrib['src'], 'play_path': n.attrib['src'], 'ext': 'flv', 'preference': -100, From 7b81316508d676393d89a99b42fc15e3dcfeab73 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 27 Dec 2015 14:58:57 +0100 Subject: [PATCH 0178/1105] [livestream] skip m3u8 manifest in progressive_urls --- youtube_dl/extractor/livestream.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 11168fc4d..9c8d826c4 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -97,6 +97,8 @@ class LivestreamIE(InfoExtractor): video_url = video_data.get(key) if video_url: ext = determine_ext(video_url) + if ext == 'm3u8': + continue bitrate = int_or_none(self._search_regex( r'(\d+)\.%s' % ext, video_url, 'bitrate', default=None)) formats.append({ From 8d29e47f543152bf91db0167a313e56ea2f132e3 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 27 Dec 2015 15:33:39 +0100 Subject: [PATCH 0179/1105] [common] simplify the use of _extract_m3u8_formats and _extract_f4m_formats --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 828f58f12..3d39781a4 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -885,7 +885,7 @@ class InfoExtractor(object): fatal=fatal) if manifest is False: - return manifest + return [] formats = [] manifest_version = '1.0' @@ -956,7 +956,7 @@ class InfoExtractor(object): errnote=errnote or 'Failed to download m3u8 information', fatal=fatal) if res is False: - return res + return [] m3u8_doc, urlh = res m3u8_url = urlh.geturl() last_info = None From e73277c7e811a93a318a7707f82636c8093942b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Dec 2015 21:41:21 +0600 Subject: [PATCH 0180/1105] [abc7news] Remove redundant formats sorting --- youtube_dl/extractor/abc7news.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/abc7news.py b/youtube_dl/extractor/abc7news.py index c04949c21..122dc9099 100644 --- a/youtube_dl/extractor/abc7news.py +++ b/youtube_dl/extractor/abc7news.py @@ -44,7 +44,6 @@ class Abc7NewsIE(InfoExtractor): 'contentURL', webpage, 'm3u8 url', fatal=True) formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4') - self._sort_formats(formats) title = self._og_search_title(webpage).strip() description = self._og_search_description(webpage).strip() From 62bdc9feccbab0265243f8554bdbc7a84b5cca65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Dec 2015 21:41:36 +0600 Subject: [PATCH 0181/1105] [esri] Fix typo --- youtube_dl/extractor/esri.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/esri.py b/youtube_dl/extractor/esri.py index bf5d2019f..d4205d7fb 100644 --- a/youtube_dl/extractor/esri.py +++ b/youtube_dl/extractor/esri.py @@ -61,7 +61,7 @@ class EsriVideoIE(InfoExtractor): webpage, 'duration', fatal=False)) upload_date = unified_strdate(self._html_search_meta( - 'last-modified', webpage, 'upload date', fatal=None)) + 'last-modified', webpage, 'upload date', fatal=False)) return { 'id': video_id, From e0898585a188eb0538624b227eb00c90c2da26d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Dec 2015 21:41:55 +0600 Subject: [PATCH 0182/1105] [jwplatform] Fix typo --- youtube_dl/extractor/jwplatform.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index cdc095a79..29eba77fc 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -44,7 +44,7 @@ class JWPlatformIE(InfoExtractor): source_url = self._proto_relative_url(source['file']) source_type = source.get('type') or '' if source_type == 'application/vnd.apple.mpegurl': - m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None) + m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=False) if m3u8_formats: formats.extend(m3u8_formats) elif source_type.startswith('audio'): From 30f51acbc8b0f4f3161102501527ed81d162d9e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Dec 2015 21:42:12 +0600 Subject: [PATCH 0183/1105] [rai] Fix typos --- youtube_dl/extractor/rai.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 14f1ccbb4..278b1d2bf 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -118,13 +118,13 @@ class RaiTVIE(InfoExtractor): if ext == 'm3u8': m3u8_formats = self._extract_m3u8_formats( media_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', - fatal=None) + fatal=False) if m3u8_formats: formats.extend(m3u8_formats) elif ext == 'f4m': f4m_formats = self._extract_f4m_formats( media_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', - video_id, f4m_id='hds', fatal=None) + video_id, f4m_id='hds', fatal=False) if f4m_formats: formats.extend(f4m_formats) elif ext == 'stl': From 0b0a17ae9d8c53b75f7f3cfa7e6fd236ba83e280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Dec 2015 21:42:26 +0600 Subject: [PATCH 0184/1105] [viki] Fix typo --- youtube_dl/extractor/viki.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index ca3f20a3d..9a1c377a4 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -279,7 +279,7 @@ class VikiIE(VikiBaseIE): if format_id == 'm3u8': m3u8_formats = self._extract_m3u8_formats( format_dict['url'], video_id, 'mp4', 'm3u8_native', - m3u8_id='m3u8-%s' % protocol, fatal=None) + m3u8_id='m3u8-%s' % protocol, fatal=False) if m3u8_formats: formats.extend(m3u8_formats) else: From a95c26a06a88ab464638d16b42d1659a15e71c3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Dec 2015 21:43:14 +0600 Subject: [PATCH 0185/1105] [jwplatform] Carry long line --- youtube_dl/extractor/jwplatform.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index 29eba77fc..a92adf2b3 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -44,7 +44,8 @@ class JWPlatformIE(InfoExtractor): source_url = self._proto_relative_url(source['file']) source_type = source.get('type') or '' if source_type == 'application/vnd.apple.mpegurl': - m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=False) + m3u8_formats = self._extract_m3u8_formats( + source_url, video_id, 'mp4', 'm3u8_native', fatal=False) if m3u8_formats: formats.extend(m3u8_formats) elif source_type.startswith('audio'): From 82e02ea5fce6bbc595dbfefc8eafce21e2bb53cc Mon Sep 17 00:00:00 2001 From: gam2046 Date: Fri, 18 Dec 2015 14:18:56 +0800 Subject: [PATCH 0186/1105] Update iqiyi.py Fix part of the address can not be resolved. eg:http://www.iqiyi.com/w_19rt6o8t9p.html --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index c3731a110..f0e2e5104 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -16,7 +16,7 @@ class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' IE_DESC = '爱奇艺' - _VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html' + _VALID_URL = r'http://(?:www\.)iqiyi.com/._.+?\.html' _TESTS = [{ 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', From bee83e84f64248a47f2f0b79e8c2698bc89abf55 Mon Sep 17 00:00:00 2001 From: forDream Date: Tue, 22 Dec 2015 09:28:12 +0800 Subject: [PATCH 0187/1105] [iqiyi]fix valid url eg: http://yule.iqiyi.com/zbj.html --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index f0e2e5104..38348e0e7 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -16,7 +16,7 @@ class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' IE_DESC = '爱奇艺' - _VALID_URL = r'http://(?:www\.)iqiyi.com/._.+?\.html' + _VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html' _TESTS = [{ 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', From 59185202c6854b39ceb5fe6ea960ad1b308ceca8 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 28 Dec 2015 00:19:36 +0800 Subject: [PATCH 0188/1105] [iqiyi] Add tests for #7894 --- youtube_dl/extractor/iqiyi.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 38348e0e7..66a70a181 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -84,6 +84,15 @@ class IqiyiIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', + 'only_matching': True, + }, { + 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html', + 'only_matching': True, + }, { + 'url': 'http://yule.iqiyi.com/pcb.html', + 'only_matching': True, }] _FORMATS_MAP = [ From c71d2e2087e6daf2196bb26ba7fa7e00c10f2d53 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 27 Dec 2015 17:27:20 +0100 Subject: [PATCH 0189/1105] [livestream] change test url --- youtube_dl/extractor/livestream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 9c8d826c4..688eb2308 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -254,7 +254,7 @@ class LivestreamOriginalIE(InfoExtractor): 'playlist_mincount': 4, }, { # live stream - 'url': 'http://www.livestream.com/znsbahamas', + 'url': 'http://original.livestream.com/znsbahamas', 'only_matching': True, }] From 97f18fac3a90da163bb6692038be560a5b93b525 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 27 Dec 2015 17:44:55 +0100 Subject: [PATCH 0190/1105] [vgtv] fix f4m downloading(fixes #7843) --- youtube_dl/extractor/vgtv.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 811ee197d..129668a99 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -86,9 +86,10 @@ class VGTVIE(XstreamIE): { # streamType: wasLive 'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla', + 'md5': '458f4841239dab414343b50e5af8869c', 'info_dict': { 'id': '113063', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'V75 fra Solvalla 30.05.15', 'description': 'md5:b3743425765355855f88e096acc93231', 'thumbnail': 're:^https?://.*\.jpg', @@ -97,10 +98,6 @@ class VGTVIE(XstreamIE): 'upload_date': '20150530', 'view_count': int, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, }, { 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more', @@ -160,12 +157,15 @@ class VGTVIE(XstreamIE): formats.extend(m3u8_formats) hds_url = streams.get('hds') - # wasLive hds are always 404 - if hds_url and stream_type != 'wasLive': + if hds_url: + hdcore_sign = 'hdcore=3.7.0' f4m_formats = self._extract_f4m_formats( - hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id, f4m_id='hds', fatal=False) + hds_url + '?%s' % hdcore_sign, video_id, f4m_id='hds', fatal=False) if f4m_formats: - formats.extend(f4m_formats) + for entry in f4m_formats: + # URLs without the extra param induce an 404 error + entry.update({'extra_param_to_segment_url': hdcore_sign}) + formats.append(entry) mp4_urls = streams.get('pseudostreaming') or [] mp4_url = streams.get('mp4') From c047270c02bef6c8303c985d1e1b8da5be394235 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 28 Dec 2015 01:09:18 +0800 Subject: [PATCH 0191/1105] [utils] Remove Content-encoding from headers after decompression With cn_verification_proxy, our http_response() is called twice, one from PerRequestProxyHandler.proxy_open() and another from normal YoutubeDL.urlopen(). As a result, for proxies honoring Accept-Encoding, the following bug occurs: $ youtube-dl -vs --cn-verification-proxy https://secure.uku.im:993 "test:letv" [debug] System config: [] [debug] User config: [] [debug] Command-line args: ['-vs', '--cn-verification-proxy', 'https://secure.uku.im:993', 'test:letv'] [debug] Encodings: locale UTF-8, fs utf-8, out UTF-8, pref UTF-8 [debug] youtube-dl version 2015.12.23 [debug] Git HEAD: 97f18fa [debug] Python version 3.5.1 - Linux-4.3.3-1-ARCH-x86_64-with-arch-Arch-Linux [debug] exe versions: ffmpeg 2.8.4, ffprobe 2.8.4, rtmpdump 2.4 [debug] Proxy map: {} [TestURL] Test URL: http://www.letv.com/ptv/vplay/22005890.html [Letv] 22005890: Downloading webpage [Letv] 22005890: Downloading playJson data ERROR: Unable to download JSON metadata: Not a gzipped file (b'{"') (caused by OSError('Not a gzipped file (b\'{"\')',)); please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see https://yt-dl.org/update on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output. File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/extractor/common.py", line 330, in _request_webpage return self._downloader.urlopen(url_or_request) File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/YoutubeDL.py", line 1886, in urlopen return self._opener.open(req, timeout=self._socket_timeout) File "/usr/lib/python3.5/urllib/request.py", line 471, in open response = meth(req, response) File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/utils.py", line 773, in http_response raise original_ioerror File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/utils.py", line 761, in http_response uncompressed = io.BytesIO(gz.read()) File "/usr/lib/python3.5/gzip.py", line 274, in read return self._buffer.read(size) File "/usr/lib/python3.5/gzip.py", line 461, in read if not self._read_gzip_header(): File "/usr/lib/python3.5/gzip.py", line 409, in _read_gzip_header raise OSError('Not a gzipped file (%r)' % magic) --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1737ac5f6..0ed6c45c8 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -773,11 +773,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): raise original_ioerror resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg + del resp.headers['Content-encoding'] # deflate if resp.headers.get('Content-encoding', '') == 'deflate': gz = io.BytesIO(self.deflate(resp.read())) resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg + del resp.headers['Content-encoding'] # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see # https://github.com/rg3/youtube-dl/issues/6457). if 300 <= resp.code < 400: From bd3f9ecabeb5e189f16580c61c56d93148079363 Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 28 Dec 2015 00:36:57 +0100 Subject: [PATCH 0192/1105] [tunein] add support for tunein topic,clip and program(fixes #7348) --- youtube_dl/extractor/__init__.py | 8 +- youtube_dl/extractor/tunein.py | 187 +++++++++++++++++++++---------- 2 files changed, 136 insertions(+), 59 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 165835f63..c1dbf9f21 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -703,7 +703,13 @@ from .tube8 import Tube8IE from .tubitv import TubiTvIE from .tudou import TudouIE from .tumblr import TumblrIE -from .tunein import TuneInIE +from .tunein import ( + TuneInClipIE, + TuneInStationIE, + TuneInProgramIE, + TuneInTopicIE, + TuneInShortenerIE, +) from .turbo import TurboIE from .tutv import TutvIE from .tv2 import ( diff --git a/youtube_dl/extractor/tunein.py b/youtube_dl/extractor/tunein.py index b6b1f2568..8322cc14d 100644 --- a/youtube_dl/extractor/tunein.py +++ b/youtube_dl/extractor/tunein.py @@ -2,74 +2,33 @@ from __future__ import unicode_literals import json -import re from .common import InfoExtractor from ..utils import ExtractorError +from ..compat import compat_urlparse -class TuneInIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)? - (?: - tunein\.com/ - (?: - radio/.*?-s| - station/.*?StationId\= - )(?P[0-9]+) - |tun\.in/(?P[A-Za-z0-9]+) - ) - ''' - _API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station' - - _INFO_DICT = { - 'id': '34682', - 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2', - 'ext': 'aac', - 'thumbnail': 're:^https?://.*\.png$', - 'location': 'Tacoma, WA', - } - _TESTS = [ - { - 'url': 'http://tunein.com/radio/Jazz24-885-s34682/', - 'info_dict': _INFO_DICT, - 'params': { - 'skip_download': True, # live stream - }, - }, - { # test redirection - 'url': 'http://tun.in/ser7s', - 'info_dict': _INFO_DICT, - 'params': { - 'skip_download': True, # live stream - }, - }, - ] +class TuneInBaseIE(InfoExtractor): + _API_BASE_URL = 'http://tunein.com/tuner/tune/' def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - redirect_id = mobj.group('redirect_id') - if redirect_id: - # The server doesn't support HEAD requests - urlh = self._request_webpage( - url, redirect_id, note='Downloading redirect page') - url = urlh.geturl() - self.to_screen('Following redirect: %s' % url) - mobj = re.match(self._VALID_URL, url) - station_id = mobj.group('id') + content_id = self._match_id(url) - station_info = self._download_json( - self._API_URL_TEMPLATE.format(station_id), - station_id, note='Downloading station JSON') + content_info = self._download_json( + self._API_BASE_URL + self._API_URL_QUERY % content_id, + content_id, note='Downloading JSON metadata') - title = station_info['Title'] - thumbnail = station_info.get('Logo') - location = station_info.get('Location') - streams_url = station_info.get('StreamUrl') + title = content_info['Title'] + thumbnail = content_info.get('Logo') + location = content_info.get('Location') + streams_url = content_info.get('StreamUrl') if not streams_url: - raise ExtractorError('No downloadable streams found', - expected=True) + raise ExtractorError('No downloadable streams found', expected=True) + if not streams_url.startswith('http://'): + streams_url = compat_urlparse.urljoin(url, streams_url) + stream_data = self._download_webpage( - streams_url, station_id, note='Downloading stream data') + streams_url, content_id, note='Downloading stream data') streams = json.loads(self._search_regex( r'\((.*)\);', stream_data, 'stream info'))['Streams'] @@ -97,10 +56,122 @@ class TuneInIE(InfoExtractor): self._sort_formats(formats) return { - 'id': station_id, + 'id': content_id, 'title': title, 'formats': formats, 'thumbnail': thumbnail, 'location': location, 'is_live': is_live, } + + +class TuneInClipIE(TuneInBaseIE): + IE_NAME = 'tunein:clip' + _VALID_URL = r'https?://(?:www\.)?tunein\.com/station/.*?audioClipId\=(?P\d+)' + _API_URL_QUERY = '?tuneType=AudioClip&audioclipId=%s' + + _TESTS = [ + { + 'url': 'http://tunein.com/station/?stationId=246119&audioClipId=816', + 'md5': '99f00d772db70efc804385c6b47f4e77', + 'info_dict': { + 'id': '816', + 'title': '32m', + 'ext': 'mp3', + }, + }, + ] + + +class TuneInStationIE(TuneInBaseIE): + IE_NAME = 'tunein:station' + _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-s|station/.*?StationId\=)(?P\d+)' + _API_URL_QUERY = '?tuneType=Station&stationId=%s' + + @classmethod + def suitable(cls, url): + return False if TuneInClipIE.suitable(url) else super(TuneInStationIE, cls).suitable(url) + + _TESTS = [ + { + 'url': 'http://tunein.com/radio/Jazz24-885-s34682/', + 'info_dict': { + 'id': '34682', + 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2', + 'ext': 'mp3', + 'location': 'Tacoma, WA', + }, + 'params': { + 'skip_download': True, # live stream + }, + }, + ] + + +class TuneInProgramIE(TuneInBaseIE): + IE_NAME = 'tunein:program' + _VALID_URL = r'https?://(?:www\.)?tunein\.com/(?:radio/.*?-p|program/.*?ProgramId\=)(?P\d+)' + _API_URL_QUERY = '?tuneType=Program&programId=%s' + + _TESTS = [ + { + 'url': 'http://tunein.com/radio/Jazz-24-p2506/', + 'info_dict': { + 'id': '2506', + 'title': 'Jazz 24 on 91.3 WUKY-HD3', + 'ext': 'mp3', + 'location': 'Lexington, KY', + }, + 'params': { + 'skip_download': True, # live stream + }, + }, + ] + + +class TuneInTopicIE(TuneInBaseIE): + IE_NAME = 'tunein:topic' + _VALID_URL = r'https?://(?:www\.)?tunein\.com/topic/.*?TopicId\=(?P\d+)' + _API_URL_QUERY = '?tuneType=Topic&topicId=%s' + + _TESTS = [ + { + 'url': 'http://tunein.com/topic/?TopicId=101830576', + 'md5': 'c31a39e6f988d188252eae7af0ef09c9', + 'info_dict': { + 'id': '101830576', + 'title': 'Votez pour moi du 29 octobre 2015 (29/10/15)', + 'ext': 'mp3', + 'location': 'Belgium', + }, + }, + ] + + +class TuneInShortenerIE(InfoExtractor): + IE_NAME = 'tunein:shortener' + IE_DESC = False # Do not list + _VALID_URL = r'https?://tun\.in/(?P[A-Za-z0-9]+)' + + _TEST = { + # test redirection + 'url': 'http://tun.in/ser7s', + 'info_dict': { + 'id': '34682', + 'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2', + 'ext': 'mp3', + 'location': 'Tacoma, WA', + }, + 'params': { + 'skip_download': True, # live stream + }, + } + + def _real_extract(self, url): + redirect_id = self._match_id(url) + # The server doesn't support HEAD requests + urlh = self._request_webpage( + url, redirect_id, note='Downloading redirect page') + url = urlh.geturl() + self.to_screen('Following redirect: %s' % url) + return self.url_result(url) From bca9bea1c1a13d2f9ad3244a2b11979220c30484 Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 28 Dec 2015 10:27:17 +0100 Subject: [PATCH 0193/1105] [dcn] make m3u8 formats extraction non fatal --- youtube_dl/extractor/dcn.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 3857ba334..d9485cd86 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -54,10 +54,14 @@ class DCNBaseIE(InfoExtractor): } def _extract_video_formats(self, webpage, video_id, entry_protocol): + formats = [] m3u8_url = self._html_search_regex( - r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url') - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls') + r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False) + if m3u8_url: + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None) + if m3u8_formats: + formats.extend(m3u8_formats) rtsp_url = self._search_regex( r']+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) @@ -117,21 +121,6 @@ class DCNVideoIE(DCNBaseIE): class DCNLiveIE(DCNBaseIE): IE_NAME = 'dcn:live' _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P\d+)' - _TEST = { - 'url': 'http://www.dcndigital.ae/#/live/6/dubai-tv', - 'info_dict': - { - 'id': '6', - 'ext': 'mp4', - 'title': 're:^Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'thumbnail': 're:^https?://.*\.png$', - 'is_live': True, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } def _real_extract(self, url): channel_id = self._match_id(url) From 04e24906be1dd0b6626b50de7e65609219a88116 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 28 Dec 2015 13:06:30 +0100 Subject: [PATCH 0194/1105] [cspan] Initialize 'video_type' to avoid 'UnboundLocalError' exceptions (#8032) --- youtube_dl/extractor/cspan.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 7b685d157..154bddb43 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -58,6 +58,7 @@ class CSpanIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + video_type = None webpage = self._download_webpage(url, video_id) matches = re.search(r'data-(prog|clip)id=\'([0-9]+)\'', webpage) if matches: @@ -70,6 +71,8 @@ class CSpanIE(InfoExtractor): title = self._og_search_title(webpage) surl = smuggle_url(senate_isvp_url, {'force_title': title}) return self.url_result(surl, 'SenateISVP', video_id, title) + if video_type is None or video_id is None: + raise ExtractorError('unable to find video id and type') def get_text_attr(d, attr): return d.get(attr, {}).get('#text') From 6c6b8bd5ccef10aad73ee5a556830f9b00b93661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 28 Dec 2015 13:48:10 +0100 Subject: [PATCH 0195/1105] [cspan] Fix extraction (fixes #8032) --- youtube_dl/extractor/cspan.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py index 154bddb43..b3ee67018 100644 --- a/youtube_dl/extractor/cspan.py +++ b/youtube_dl/extractor/cspan.py @@ -60,11 +60,13 @@ class CSpanIE(InfoExtractor): video_id = self._match_id(url) video_type = None webpage = self._download_webpage(url, video_id) - matches = re.search(r'data-(prog|clip)id=\'([0-9]+)\'', webpage) - if matches: + # We first look for clipid, because clipprog always appears before + patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] + results = list(filter(None, (re.search(p, webpage) for p in patterns))) + if results: + matches = results[0] video_type, video_id = matches.groups() - if video_type == 'prog': - video_type = 'program' + video_type = 'clip' if video_type == 'id' else 'program' else: senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) if senate_isvp_url: From 9f66931e160ec7c4cc3be8bf731da32b0bfa9043 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 28 Dec 2015 21:20:09 +0600 Subject: [PATCH 0196/1105] [facebook] Extract login error --- youtube_dl/extractor/facebook.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 39c481068..1692ec1cb 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -100,6 +100,11 @@ class FacebookIE(InfoExtractor): login_results = self._download_webpage(request, None, note='Logging in', errnote='unable to fetch login page') if re.search(r'', login_results) is not None: + error = self._html_search_regex( + r'(?s)]+class=(["\']).*?login_error_box.*?\1[^>]*>]*>.*?]*>(?P.+?)', + login_results, 'login error', default=None, group='error') + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') return From c1f49e16843bb9bdfdf132a472eb0fc2c6b920e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 28 Dec 2015 21:37:02 +0600 Subject: [PATCH 0197/1105] [facebook] Fix authentication --- youtube_dl/extractor/facebook.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 1692ec1cb..5e43f2359 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -74,7 +74,7 @@ class FacebookIE(InfoExtractor): return login_page_req = sanitized_Request(self._LOGIN_URL) - login_page_req.add_header('Cookie', 'locale=en_US') + self._set_cookie('facebook.com', 'locale', 'en_US') login_page = self._download_webpage(login_page_req, None, note='Downloading login page', errnote='Unable to download login page') @@ -108,10 +108,17 @@ class FacebookIE(InfoExtractor): self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') return + fb_dtsg = self._search_regex( + r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None) + h = self._search_regex( + r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None) + + if not fb_dtsg or not h: + return + check_form = { - 'fb_dtsg': self._search_regex(r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg'), - 'h': self._search_regex( - r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h'), + 'fb_dtsg': fb_dtsg, + 'h': h, 'name_action_selected': 'dont_save', } check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) From a5c1d95500898541cd6b19b32963883db421c4b4 Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 28 Dec 2015 17:49:59 +0100 Subject: [PATCH 0198/1105] [zdf] fix formats extraction --- youtube_dl/extractor/phoenix.py | 9 +- youtube_dl/extractor/zdf.py | 253 ++++++++++++++++++++------------ 2 files changed, 161 insertions(+), 101 deletions(-) diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py index 46cebc0d7..6ce2ec19d 100644 --- a/youtube_dl/extractor/phoenix.py +++ b/youtube_dl/extractor/phoenix.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals -from .common import InfoExtractor -from .zdf import extract_from_xml_url +from .zdf import ZDFIE -class PhoenixIE(InfoExtractor): +class PhoenixIE(ZDFIE): _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/ (?: phoenix/die_sendungen/(?:[^/]+/)? @@ -41,5 +40,5 @@ class PhoenixIE(InfoExtractor): r'
[^_]+)_(?P[^_]+)_(?P[^_]+)_ - (?P[^_]+)_(?P[^_]+)_(?P[^_]+) - ''', format_id) - - ext = format_m.group('container') - proto = format_m.group('proto').lower() - - quality = xpath_text(fnode, './quality', 'quality') - abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000) - vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000) - - width = int_or_none(xpath_text(fnode, './width', 'width')) - height = int_or_none(xpath_text(fnode, './height', 'height')) - - filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize')) - - format_note = '' - if not format_note: - format_note = None - - return { - 'format_id': format_id + '-' + quality, - 'url': video_url, - 'ext': ext, - 'acodec': format_m.group('acodec'), - 'vcodec': format_m.group('vcodec'), - 'abr': abr, - 'vbr': vbr, - 'width': width, - 'height': height, - 'filesize': filesize, - 'format_note': format_note, - 'protocol': proto, - '_available': is_available, - } - - def xml_to_thumbnails(fnode): - thumbnails = [] - for node in fnode: - thumbnail_url = node.text - if not thumbnail_url: - continue - thumbnail = { - 'url': thumbnail_url, - } - if 'key' in node.attrib: - m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key']) - if m: - thumbnail['width'] = int(m.group(1)) - thumbnail['height'] = int(m.group(2)) - thumbnails.append(thumbnail) - return thumbnails - - thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage')) - - format_nodes = doc.findall('.//formitaeten/formitaet') - formats = list(filter( - lambda f: f['_available'], - map(xml_to_format, format_nodes))) - ie._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'duration': duration, - 'thumbnails': thumbnails, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'upload_date': upload_date, - 'formats': formats, - } - - class ZDFIE(InfoExtractor): _VALID_URL = r'(?:zdf:|zdf:video:|https?://www\.zdf\.de/ZDFmediathek(?:#)?/(.*beitrag/(?:video/)?))(?P[0-9]+)(?:/[^/?]+)?(?:\?.*)?' - _TEST = { + _TESTS = [{ 'url': 'http://www.zdf.de/ZDFmediathek/beitrag/video/2037704/ZDFspezial---Ende-des-Machtpokers--?bc=sts;stt', 'info_dict': { 'id': '2037704', @@ -122,12 +32,163 @@ class ZDFIE(InfoExtractor): 'upload_date': '20131127', }, 'skip': 'Videos on ZDF.de are depublicised in short order', - } + }] + + def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): + param_groups = {} + for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)): + group_id = param_group.attrib.get(self._xpath_ns('id', 'http://www.w3.org/XML/1998/namespace')) + params = {} + for param in param_group: + params[param.get('name')] = param.get('value') + param_groups[group_id] = params + + formats = [] + for video in smil.findall(self._xpath_ns('.//video', namespace)): + src = video.get('src') + if not src: + continue + bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) + group_id = video.get('paramGroup') + param_group = param_groups[group_id] + for proto in param_group['protocols'].split(','): + formats.append({ + 'url': '%s://%s' % (proto, param_group['host']), + 'app': param_group['app'], + 'play_path': src, + 'ext': 'flv', + 'format_id': '%s-%d' % (proto, bitrate), + 'tbr': bitrate, + 'protocol': proto, + }) + self._sort_formats(formats) + return formats + + def extract_from_xml_url(self, video_id, xml_url): + doc = self._download_xml( + xml_url, video_id, + note='Downloading video info', + errnote='Failed to download video info') + + title = doc.find('.//information/title').text + description = xpath_text(doc, './/information/detail', 'description') + duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration')) + uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader') + uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id') + upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date')) + + def xml_to_thumbnails(fnode): + thumbnails = [] + for node in fnode: + thumbnail_url = node.text + if not thumbnail_url: + continue + thumbnail = { + 'url': thumbnail_url, + } + if 'key' in node.attrib: + m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key']) + if m: + thumbnail['width'] = int(m.group(1)) + thumbnail['height'] = int(m.group(2)) + thumbnails.append(thumbnail) + return thumbnails + + thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage')) + + format_nodes = doc.findall('.//formitaeten/formitaet') + quality = qualities(['veryhigh', 'high', 'med', 'low']) + + def get_quality(elem): + return quality(xpath_text(elem, 'quality')) + format_nodes.sort(key=get_quality) + format_ids = [] + formats = [] + for fnode in format_nodes: + video_url = fnode.find('url').text + is_available = 'http://www.metafilegenerator' not in video_url + if not is_available: + continue + format_id = fnode.attrib['basetype'] + quality = xpath_text(fnode, './quality', 'quality') + format_m = re.match(r'''(?x) + (?P[^_]+)_(?P[^_]+)_(?P[^_]+)_ + (?P[^_]+)_(?P[^_]+)_(?P[^_]+) + ''', format_id) + + ext = determine_ext(video_url, None) or format_m.group('container') + if ext not in ('smil', 'f4m', 'm3u8'): + format_id = format_id + '-' + quality + if format_id in format_ids: + continue + + if ext == 'meta': + continue + elif ext == 'smil': + smil_formats = self._extract_smil_formats( + video_url, video_id, fatal=False) + if smil_formats: + formats.extend(smil_formats) + elif ext == 'm3u8': + m3u8_formats = self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + if m3u8_formats: + formats.extend(m3u8_formats) + elif ext == 'f4m': + f4m_formats = self._extract_f4m_formats( + video_url, video_id, f4m_id='hds', fatal=False) + if f4m_formats: + formats.extend(f4m_formats) + else: + proto = format_m.group('proto').lower() + + abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000) + vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000) + + width = int_or_none(xpath_text(fnode, './width', 'width')) + height = int_or_none(xpath_text(fnode, './height', 'height')) + + filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize')) + + format_note = '' + if not format_note: + format_note = None + + formats.append({ + 'format_id': format_id, + 'url': video_url, + 'ext': ext, + 'acodec': format_m.group('acodec'), + 'vcodec': format_m.group('vcodec'), + 'abr': abr, + 'vbr': vbr, + 'width': width, + 'height': height, + 'filesize': filesize, + 'format_note': format_note, + 'protocol': proto, + '_available': is_available, + }) + format_ids.append(format_id) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'thumbnails': thumbnails, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'upload_date': upload_date, + 'formats': formats, + } def _real_extract(self, url): video_id = self._match_id(url) xml_url = 'http://www.zdf.de/ZDFmediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id - return extract_from_xml_url(self, video_id, xml_url) + return self.extract_from_xml_url(video_id, xml_url) class ZDFChannelIE(InfoExtractor): From d497a201cab0eb588e9b7f75494f5494c9663e67 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 5 Dec 2015 21:14:43 +0100 Subject: [PATCH 0199/1105] [common] use specific variable for protocol preference in _sort_formats --- youtube_dl/extractor/common.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6ab2d68d6..76f5b8b05 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -18,7 +18,6 @@ from ..compat import ( compat_http_client, compat_urllib_error, compat_urllib_parse, - compat_urllib_parse_urlparse, compat_urlparse, compat_str, compat_etree_fromstring, @@ -42,6 +41,7 @@ from ..utils import ( url_basename, xpath_text, xpath_with_ns, + determine_protocol, ) @@ -776,14 +776,12 @@ class InfoExtractor(object): preference = f.get('preference') if preference is None: - proto = f.get('protocol') - if proto is None: - proto = compat_urllib_parse_urlparse(f.get('url', '')).scheme - - preference = 0 if proto in ['http', 'https'] else -0.1 + preference = 0 if f.get('ext') in ['f4f', 'f4m']: # Not yet supported preference -= 0.5 + proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1 + if f.get('vcodec') == 'none': # audio only if self._downloader.params.get('prefer_free_formats'): ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus'] @@ -814,6 +812,7 @@ class InfoExtractor(object): f.get('vbr') if f.get('vbr') is not None else -1, f.get('height') if f.get('height') is not None else -1, f.get('width') if f.get('width') is not None else -1, + proto_preference, ext_preference, f.get('abr') if f.get('abr') is not None else -1, audio_ext_preference, From 7e5edcfd333f05c610bd70c33f3d3fd0ff6a7cec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Dec 2015 00:58:24 +0600 Subject: [PATCH 0200/1105] Simplify formats accumulation for f4m/m3u8/smil formats Now all _extract_*_formats routines return a list --- youtube_dl/extractor/amp.py | 12 ++++------- youtube_dl/extractor/ard.py | 12 ++++------- youtube_dl/extractor/atresplayer.py | 10 +++------ youtube_dl/extractor/audimedia.py | 8 ++------ youtube_dl/extractor/bbc.py | 6 ++---- youtube_dl/extractor/bloomberg.py | 12 ++++------- youtube_dl/extractor/br.py | 12 ++++------- youtube_dl/extractor/brightcove.py | 6 ++---- youtube_dl/extractor/common.py | 16 +++++---------- youtube_dl/extractor/dailymotion.py | 12 ++++------- youtube_dl/extractor/dcn.py | 6 ++---- youtube_dl/extractor/eitb.py | 12 ++++------- youtube_dl/extractor/funimation.py | 6 ++---- youtube_dl/extractor/funnyordie.py | 6 ++---- youtube_dl/extractor/globo.py | 6 ++---- youtube_dl/extractor/hotstar.py | 4 +--- youtube_dl/extractor/ign.py | 8 ++------ youtube_dl/extractor/jwplatform.py | 6 ++---- youtube_dl/extractor/kaltura.py | 6 ++---- youtube_dl/extractor/livestream.py | 32 +++++++++-------------------- youtube_dl/extractor/nba.py | 8 ++------ youtube_dl/extractor/ooyala.py | 12 +++-------- youtube_dl/extractor/rai.py | 12 ++++------- youtube_dl/extractor/rutube.py | 12 ++++------- youtube_dl/extractor/ruutu.py | 12 ++++------- youtube_dl/extractor/tele13.py | 4 +--- youtube_dl/extractor/toggle.py | 6 ++---- youtube_dl/extractor/vgtv.py | 6 ++---- youtube_dl/extractor/viki.py | 6 ++---- youtube_dl/extractor/vimeo.py | 6 ++---- youtube_dl/extractor/wdr.py | 14 ++++--------- youtube_dl/extractor/zdf.py | 18 ++++++---------- 32 files changed, 99 insertions(+), 215 deletions(-) diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py index dcc3c97f1..1035d1c48 100644 --- a/youtube_dl/extractor/amp.py +++ b/youtube_dl/extractor/amp.py @@ -53,16 +53,12 @@ class AMPIE(InfoExtractor): media = media_data['@attributes'] media_type = media['type'] if media_type == 'video/f4m': - f4m_formats = self._extract_f4m_formats( + formats.extend(self._extract_f4m_formats( media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', - video_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + video_id, f4m_id='hds', fatal=False)) elif media_type == 'application/x-mpegURL': - m3u8_formats = self._extract_m3u8_formats( - media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)) else: formats.append({ 'format_id': media_data['media-category']['@attributes']['label'], diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 687eb9f82..9fb84911a 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -113,16 +113,12 @@ class ARDMediathekIE(InfoExtractor): if quality != 'auto' and ext in ('f4m', 'm3u8'): continue if ext == 'f4m': - f4m_formats = self._extract_f4m_formats( + formats.extend(self._extract_f4m_formats( stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', - video_id, preference=-1, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + video_id, preference=-1, f4m_id='hds', fatal=False)) elif ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - stream_url, video_id, 'mp4', preference=1, m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + stream_url, video_id, 'mp4', preference=1, m3u8_id='hls', fatal=False)) else: if server and server.startswith('rtmp'): f = { diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 7ac3044c7..3fb042cea 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -134,10 +134,8 @@ class AtresPlayerIE(InfoExtractor): m3u8_url = player.get('urlVideoHls') if m3u8_url: - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, episode_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + m3u8_url, episode_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) timestamp = int_or_none(self._download_webpage( self._TIME_API_URL, @@ -170,9 +168,7 @@ class AtresPlayerIE(InfoExtractor): continue else: f4m_url = video_url[:-9] + '/manifest.f4m' - f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) path_data = player.get('pathData') diff --git a/youtube_dl/extractor/audimedia.py b/youtube_dl/extractor/audimedia.py index 4382a302b..9b037bb0c 100644 --- a/youtube_dl/extractor/audimedia.py +++ b/youtube_dl/extractor/audimedia.py @@ -45,15 +45,11 @@ class AudiMediaIE(InfoExtractor): stream_url_hls = json_data.get('stream_url_hls') if stream_url_hls: - m3u8_formats = self._extract_m3u8_formats(stream_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats(stream_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) stream_url_hds = json_data.get('stream_url_hds') if stream_url_hds: - f4m_formats = self._extract_f4m_formats(json_data.get('stream_url_hds') + '?hdcore=3.4.0', video_id, -1, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats(json_data.get('stream_url_hds') + '?hdcore=3.4.0', video_id, -1, f4m_id='hds', fatal=False)) for video_version in json_data.get('video_versions'): video_version_url = video_version.get('download_url') or video_version.get('stream_url') diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 691aecc0d..923273fb2 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -223,11 +223,9 @@ class BBCCoUkIE(InfoExtractor): elif transfer_format == 'dash': pass elif transfer_format == 'hls': - m3u8_formats = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( href, programme_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id=supplier, fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + m3u8_id=supplier, fatal=False)) # Direct link else: formats.append({ diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index ebeef8f2a..13343bc25 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -41,15 +41,11 @@ class BloombergIE(InfoExtractor): if not stream_url: continue if stream['muxing_format'] == 'TS': - m3u8_formats = self._extract_m3u8_formats( - stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) else: - f4m_formats = self._extract_f4m_formats( - stream_url, video_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats( + stream_url, video_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index e66854538..11cf49851 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -121,15 +121,11 @@ class BRIE(InfoExtractor): format_url = xpath_text(asset, ['downloadUrl', 'url']) asset_type = asset.get('type') if asset_type == 'HDS': - f4m_formats = self._extract_f4m_formats( - format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats( + format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)) elif asset_type == 'HLS': - m3u8_formats = self._extract_m3u8_formats( - format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)) else: format_info = { 'ext': xpath_text(asset, 'mediaType'), diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 03a4f446e..c947337f9 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -469,11 +469,9 @@ class BrightcoveNewIE(InfoExtractor): if source_type == 'application/x-mpegURL': if not src: continue - m3u8_formats = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( src, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + m3u8_id='hls', fatal=False)) else: streaming_src = source.get('streaming_src') stream_name, app_name = source.get('stream_name'), source.get('app_name') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 34a28c126..655207447 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -910,10 +910,8 @@ class InfoExtractor(object): # may differ leading to inability to resolve the format by requested # bitrate in f4m downloader if determine_ext(manifest_url) == 'f4m': - f4m_formats = self._extract_f4m_formats( - manifest_url, video_id, preference, f4m_id, fatal=fatal) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats( + manifest_url, video_id, preference, f4m_id, fatal=fatal)) continue tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ @@ -1147,10 +1145,8 @@ class InfoExtractor(object): src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) if proto == 'm3u8' or src_ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)) continue if src_ext == 'f4m': @@ -1162,9 +1158,7 @@ class InfoExtractor(object): } f4m_url += '&' if '?' in f4m_url else '?' f4m_url += compat_urllib_parse.urlencode(f4m_params) - f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) continue if src_url.startswith('http') and self._is_valid_url(src, video_id): diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 0c5b6617f..439fd42e8 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -148,15 +148,11 @@ class DailymotionIE(DailymotionBaseInfoExtractor): continue ext = determine_ext(media_url) if type_ == 'application/x-mpegURL' or ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - media_url, video_id, 'mp4', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) elif type_ == 'application/f4m' or ext == 'f4m': - f4m_formats = self._extract_f4m_formats( - media_url, video_id, preference=-1, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats( + media_url, video_id, preference=-1, f4m_id='hds', fatal=False)) else: f = { 'url': media_url, diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 0d140f12f..8f48571de 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -56,10 +56,8 @@ class DCNBaseIE(InfoExtractor): m3u8_url = self._html_search_regex( r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False) if m3u8_url: - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None)) rtsp_url = self._search_regex( r']+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) diff --git a/youtube_dl/extractor/eitb.py b/youtube_dl/extractor/eitb.py index c83845fc2..713cb7b32 100644 --- a/youtube_dl/extractor/eitb.py +++ b/youtube_dl/extractor/eitb.py @@ -65,18 +65,14 @@ class EitbIE(InfoExtractor): if token_data: token = token_data.get('token') if token: - m3u8_formats = self._extract_m3u8_formats( - '%s?hdnts=%s' % (hls_url, token), video_id, m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + '%s?hdnts=%s' % (hls_url, token), video_id, m3u8_id='hls', fatal=False)) hds_url = media.get('HDS_SURL') if hds_url: - f4m_formats = self._extract_f4m_formats( + formats.extend(self._extract_f4m_formats( '%s?hdcore=3.7.0' % hds_url.replace('euskalsvod', 'euskalvod'), - video_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + video_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index d1a95d87f..0f37ed786 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -151,11 +151,9 @@ class FunimationIE(InfoExtractor): errors.append(format_url) continue if determine_ext(format_url) == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( format_url + auth_token, display_id, 'mp4', entry_protocol='m3u8_native', - preference=preference, m3u8_id='%s-hls' % funimation_id, fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + preference=preference, m3u8_id='%s-hls' % funimation_id, fatal=False)) else: tbr = int_or_none(self._search_regex( r'-(\d+)[Kk]', format_url, 'tbr', default=None)) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 7f21d7410..4c4a87e2a 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -51,10 +51,8 @@ class FunnyOrDieIE(InfoExtractor): formats = [] - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)] bitrates.sort() diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index c65ef6bcf..b241c4868 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -369,11 +369,9 @@ class GloboIE(InfoExtractor): resource_url = resource['url'] signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash') if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): - m3u8_formats = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( signed_url, resource_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + m3u8_id='hls', fatal=False)) else: formats.append({ 'url': signed_url, diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 05d27e75d..a7c3ce4ab 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -55,9 +55,7 @@ class HotStarIE(InfoExtractor): format_url = format_data['src'] ext = determine_ext(format_url) if ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) elif ext == 'f4m': # produce broken files continue diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index a2e18c8a7..d1c1c210c 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -129,14 +129,10 @@ class IGNIE(InfoExtractor): formats = [] m3u8_url = api_data['refs'].get('m3uUrl') if m3u8_url: - m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) f4m_url = api_data['refs'].get('f4mUrl') if f4m_url: - f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) for asset in api_data['assets']: formats.append({ 'url': asset['url'], diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py index a92adf2b3..8e90d5986 100644 --- a/youtube_dl/extractor/jwplatform.py +++ b/youtube_dl/extractor/jwplatform.py @@ -44,10 +44,8 @@ class JWPlatformIE(InfoExtractor): source_url = self._proto_relative_url(source['file']) source_type = source.get('type') or '' if source_type == 'application/vnd.apple.mpegurl': - m3u8_formats = self._extract_m3u8_formats( - source_url, video_id, 'mp4', 'm3u8_native', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + source_url, video_id, 'mp4', 'm3u8_native', fatal=False)) elif source_type.startswith('audio'): formats.append({ 'url': source_url, diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 4807c8110..ccbc39c66 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -163,10 +163,8 @@ class KalturaIE(InfoExtractor): m3u8_url = info['dataUrl'].replace('format/url', 'format/applehttp') if referrer: m3u8_url += '?referrer=%s' % referrer - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) self._check_formats(formats, entry_id) self._sort_formats(formats) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 688eb2308..38fb3d9e4 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -110,23 +110,17 @@ class LivestreamIE(InfoExtractor): smil_url = video_data.get('smil_url') if smil_url: - smil_formats = self._extract_smil_formats(smil_url, video_id) - if smil_formats: - formats.extend(smil_formats) + formats.extend(self._extract_smil_formats(smil_url, video_id)) m3u8_url = video_data.get('m3u8_url') if m3u8_url: - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) f4m_url = video_data.get('f4m_url') if f4m_url: - f4m_formats = self._extract_f4m_formats( - f4m_url, video_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats( + f4m_url, video_id, f4m_id='hds', fatal=False)) self._sort_formats(formats) comments = [{ @@ -158,17 +152,13 @@ class LivestreamIE(InfoExtractor): formats = [] smil_url = stream_info.get('play_url') if smil_url: - smil_formats = self._extract_smil_formats(smil_url, broadcast_id) - if smil_formats: - formats.extend(smil_formats) + formats.extend(self._extract_smil_formats(smil_url, broadcast_id)) entry_protocol = 'm3u8' if is_live else 'm3u8_native' m3u8_url = stream_info.get('m3u8_url') if m3u8_url: - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, broadcast_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + m3u8_url, broadcast_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False)) rtsp_url = stream_info.get('rtsp_url') if rtsp_url: @@ -293,10 +283,8 @@ class LivestreamOriginalIE(InfoExtractor): m3u8_url = video_data.get('httpUrl') if m3u8_url: - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=False)) rtsp_url = video_data.get('rtspUrl') if rtsp_url: diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index 7c6b7841d..9d26030d3 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -68,13 +68,9 @@ class NBAIE(InfoExtractor): if video_url.startswith('/'): continue if video_url.endswith('.m3u8'): - m3u8_formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', fatal=False)) elif video_url.endswith('.f4m'): - f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False)) else: key = video_file.attrib.get('bitrate') format_info = { diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 8603fd692..3960d522e 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -44,17 +44,11 @@ class OoyalaBaseIE(InfoExtractor): urls.append(url) delivery_type = stream['delivery_type'] if delivery_type == 'hls' or '.m3u8' in url: - m3u8_formats = self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif delivery_type == 'hds' or '.f4m' in url: - f4m_formats = self._extract_f4m_formats(url, embed_code, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats(url, embed_code, f4m_id='hds', fatal=False)) elif '.smil' in url: - smil_formats = self._extract_smil_formats(url, embed_code, fatal=False) - if smil_formats: - formats.extend(smil_formats) + formats.extend(self._extract_smil_formats(url, embed_code, fatal=False)) else: formats.append({ 'url': url, diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 278b1d2bf..f2679591b 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -116,17 +116,13 @@ class RaiTVIE(InfoExtractor): ext = determine_ext(media_url) content_type = xpath_text(element, 'content-type') if ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( media_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', - fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + fatal=False)) elif ext == 'f4m': - f4m_formats = self._extract_f4m_formats( + formats.extend(self._extract_f4m_formats( media_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', - video_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + video_id, f4m_id='hds', fatal=False)) elif ext == 'stl': has_subtitle = True elif content_type.startswith('video/'): diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 9db62adb1..c5c47d01e 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -58,15 +58,11 @@ class RutubeIE(InfoExtractor): for format_id, format_url in options['video_balancer'].items(): ext = determine_ext(format_url) if ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) elif ext == 'f4m': - f4m_formats = self._extract_f4m_formats( - format_url, video_id, f4m_id=format_id, fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_id, fatal=False)) else: formats.append({ 'url': format_url, diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index e417bf661..41fddc375 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -63,15 +63,11 @@ class RuutuIE(InfoExtractor): processed_urls.append(video_url) ext = determine_ext(video_url) if ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) elif ext == 'f4m': - f4m_formats = self._extract_f4m_formats( - video_url, video_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id='hds', fatal=False)) else: proto = compat_urllib_parse_urlparse(video_url).scheme if not child.tag.startswith('HTTP') and proto != 'rtmp': diff --git a/youtube_dl/extractor/tele13.py b/youtube_dl/extractor/tele13.py index a363b4d40..fe11c20df 100644 --- a/youtube_dl/extractor/tele13.py +++ b/youtube_dl/extractor/tele13.py @@ -57,9 +57,7 @@ class Tele13IE(InfoExtractor): if format_url and format_url not in urls: ext = determine_ext(format_url) if ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats(format_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats(format_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif YoutubeIE.suitable(format_url): return self.url_result(format_url, 'Youtube') else: diff --git a/youtube_dl/extractor/toggle.py b/youtube_dl/extractor/toggle.py index a47239952..c54b876d3 100644 --- a/youtube_dl/extractor/toggle.py +++ b/youtube_dl/extractor/toggle.py @@ -132,13 +132,11 @@ class ToggleIE(InfoExtractor): vid_format = vid_format.replace(' ', '') # if geo-restricted, m3u8 is inaccessible, but mp4 is okay if ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( video_url, video_id, ext='mp4', m3u8_id=vid_format, note='Downloading %s m3u8 information' % vid_format, errnote='Failed to download %s m3u8 information' % vid_format, - fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + fatal=False)) elif ext in ('mp4', 'wvm'): # wvm are drm-protected files formats.append({ diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py index 129668a99..86ba70ed9 100644 --- a/youtube_dl/extractor/vgtv.py +++ b/youtube_dl/extractor/vgtv.py @@ -151,10 +151,8 @@ class VGTVIE(XstreamIE): hls_url = streams.get('hls') if hls_url: - m3u8_formats = self._extract_m3u8_formats( - hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) hds_url = streams.get('hds') if hds_url: diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 9a1c377a4..433fc9914 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -277,11 +277,9 @@ class VikiIE(VikiBaseIE): r'^(\d+)[pP]$', format_id, 'height', default=None)) for protocol, format_dict in stream_dict.items(): if format_id == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( + formats.extend(self._extract_m3u8_formats( format_dict['url'], video_id, 'mp4', 'm3u8_native', - m3u8_id='m3u8-%s' % protocol, fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + m3u8_id='m3u8-%s' % protocol, fatal=False)) else: formats.append({ 'url': format_dict['url'], diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index ce08e6955..7af699982 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -437,10 +437,8 @@ class VimeoIE(VimeoBaseInfoExtractor): }) m3u8_url = config_files.get('hls', {}).get('url') if m3u8_url: - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps # at the same time without actual units specified. This lead to wrong sorting. self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id')) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index ef096cbd2..e8511398c 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -157,16 +157,12 @@ class WDRIE(InfoExtractor): preference = qualities(['S', 'M', 'L', 'XL']) if video_url.endswith('.f4m'): - f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', page_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', page_id, f4m_id='hds', fatal=False)) elif video_url.endswith('.smil'): - smil_formats = self._extract_smil_formats(video_url, page_id, False, { + formats.extend(self._extract_smil_formats(video_url, page_id, False, { 'hdcore': '3.3.0', 'plugin': 'aasp-3.3.0.99.43', - }) - if smil_formats: - formats.extend(smil_formats) + })) else: formats.append({ 'url': video_url, @@ -177,9 +173,7 @@ class WDRIE(InfoExtractor): m3u8_url = self._search_regex(r'rel="adaptiv"[^>]+href="([^"]+)"', webpage, 'm3u8 url', default=None) if m3u8_url: - m3u8_formats = self._extract_m3u8_formats(m3u8_url, page_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats(m3u8_url, page_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) direct_urls = re.findall(r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage) if direct_urls: diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 92c12bac6..2a1f2f6d1 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -125,20 +125,14 @@ class ZDFIE(InfoExtractor): if ext == 'meta': continue elif ext == 'smil': - smil_formats = self._extract_smil_formats( - video_url, video_id, fatal=False) - if smil_formats: - formats.extend(smil_formats) + formats.extend(self._extract_smil_formats( + video_url, video_id, fatal=False)) elif ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id='hls', fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) elif ext == 'f4m': - f4m_formats = self._extract_f4m_formats( - video_url, video_id, f4m_id='hds', fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id='hds', fatal=False)) else: proto = format_m.group('proto').lower() From 7447661e9b433d8871d0a898c9af271d5a49ec82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 28 Dec 2015 19:23:54 +0100 Subject: [PATCH 0201/1105] [franceculture] Fix test --- youtube_dl/extractor/franceculture.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py index 1e83a4e7e..2ac6157cd 100644 --- a/youtube_dl/extractor/franceculture.py +++ b/youtube_dl/extractor/franceculture.py @@ -22,8 +22,8 @@ class FranceCultureIE(InfoExtractor): 'alt_title': 'Carnet nomade | 13-14', 'vcodec': 'none', 'upload_date': '20140301', - 'thumbnail': r're:^http://www\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$', - 'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche des « geeks », une enquête menée aux Etats', + 'thumbnail': r're:^http://static\.franceculture\.fr/.*/images/player/Carnet-nomade\.jpg$', + 'description': 'startswith:Avec :Jean-Baptiste Péretié pour son documentaire sur Arte "La revanche', 'timestamp': 1393700400, } } From ecf17d165317078d0e31191e40903a74fc3da3e8 Mon Sep 17 00:00:00 2001 From: flatgreen Date: Sun, 27 Dec 2015 15:30:45 +0100 Subject: [PATCH 0202/1105] [franceculture] Add extractor for '/emission-*' urls (closes #3777, closes #8022) --- youtube_dl/extractor/__init__.py | 5 +++- youtube_dl/extractor/franceculture.py | 40 +++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 971047ad4..68c9c1288 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -203,7 +203,10 @@ from .fourtube import FourTubeIE from .foxgay import FoxgayIE from .foxnews import FoxNewsIE from .foxsports import FoxSportsIE -from .franceculture import FranceCultureIE +from .franceculture import ( + FranceCultureIE, + FranceCultureEmissionIE, +) from .franceinter import FranceInterIE from .francetv import ( PluzzIE, diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py index 2ac6157cd..e2ca96283 100644 --- a/youtube_dl/extractor/franceculture.py +++ b/youtube_dl/extractor/franceculture.py @@ -8,6 +8,7 @@ from ..compat import ( from ..utils import ( determine_ext, int_or_none, + ExtractorError, ) @@ -28,8 +29,7 @@ class FranceCultureIE(InfoExtractor): } } - def _real_extract(self, url): - video_id = self._match_id(url) + def _extract_from_player(self, url, video_id): webpage = self._download_webpage(url, video_id) video_path = self._search_regex( @@ -42,6 +42,9 @@ class FranceCultureIE(InfoExtractor): r'\s+emission-(.*?)', webpage, 'display_id') + title = self._html_search_regex( r'(.*?)', webpage, 'title') alt_title = self._html_search_regex( @@ -66,4 +69,37 @@ class FranceCultureIE(InfoExtractor): 'alt_title': alt_title, 'thumbnail': thumbnail, 'description': description, + 'display_id': display_id, } + + def _real_extract(self, url): + video_id = self._match_id(url) + return self._extract_from_player(url, video_id) + + +class FranceCultureEmissionIE(FranceCultureIE): + _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emission-(?P[^?#]+)' + _TEST = { + 'url': 'http://www.franceculture.fr/emission-les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13', + 'info_dict': { + 'title': 'Jean-Gabriel Périot, cinéaste', + 'alt_title': 'Les Carnets de la création', + 'id': '5093239', + 'display_id': 'les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13', + 'ext': 'mp3', + 'timestamp': 1444762500, + 'upload_date': '20151013', + 'description': 'startswith:Aujourd\'hui dans "Les carnets de la création", le cinéaste', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_path = self._html_search_regex( + r'[0-9]+)', video_path, 'new_id', group='id') + video_url = compat_urlparse.urljoin(url, video_path) + return self._extract_from_player(video_url, new_id) From e974356f32aa31ac8d9e5f2a664c6c521fb45892 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 29 Dec 2015 11:02:58 +0100 Subject: [PATCH 0203/1105] release 2015.12.29 --- docs/supportedsites.md | 16 ++++++++++++++-- youtube_dl/version.py | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1a5c7cde9..1646277ec 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -37,6 +37,7 @@ - **appletrailers:section** - **archive.org**: archive.org videos - **ARD** + - **ARD:mediathek**: Saarländischer Rundfunk - **ARD:mediathek** - **arte.tv** - **arte.tv:+7** @@ -124,8 +125,12 @@ - **dailymotion:user** - **DailymotionCloud** - **daum.net** + - **daum.net** - **DBTV** - **DCN** + - **dcn:live** + - **dcn:season** + - **dcn:video** - **DctpTv** - **DeezerPlaylist** - **defense.gouv.fr** @@ -181,6 +186,7 @@ - **FoxSports** - **france2.fr:generation-quoi** - **FranceCulture** + - **FranceCultureEmission** - **FranceInter** - **francetv**: France 2, 3, 4, 5 and Ô - **francetvinfo.fr** @@ -225,6 +231,7 @@ - **hitbox:live** - **HornBunny** - **HotNewHipHop** + - **HotStar** - **Howcast** - **HowStuffWorks** - **HuffPost**: Huffington Post @@ -405,6 +412,7 @@ - **parliamentlive.tv**: UK parliament videos - **Patreon** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) + - **pcmag** - **Periscope**: Periscope - **PhilharmonieDeParis**: Philharmonie de Paris - **Phoenix** @@ -445,6 +453,7 @@ - **radiofrance** - **RadioJavan** - **Rai** + - **RaiTV** - **RBMARadio** - **RDS**: RDS.ca - **RedTube** @@ -527,7 +536,6 @@ - **SportDeutschland** - **Sportschau** - **Srf** - - **SRMediathek**: Saarländischer Rundfunk - **SSA** - **stanfordoc**: Stanford Open ClassRoom - **Steam** @@ -561,6 +569,7 @@ - **TenPlay** - **TestTube** - **TF1** + - **TheIntercept** - **TheOnion** - **ThePlatform** - **ThePlatformFeed** @@ -586,7 +595,10 @@ - **TubiTv** - **Tudou** - **Tumblr** - - **TuneIn** + - **tunein:clip** + - **tunein:program** + - **tunein:station** + - **tunein:topic** - **Turbo** - **Tutv** - **tv.dfb.de** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 255d64269..a62baa305 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.12.23' +__version__ = '2015.12.29' From 422f7c112c12d92bf06c95ff33782c8e9a0df43e Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 29 Dec 2015 11:36:04 +0100 Subject: [PATCH 0204/1105] [srgssr] update tests --- youtube_dl/extractor/rts.py | 30 +++++++++++++++++---------- youtube_dl/extractor/srgssr.py | 38 ++++++++++++---------------------- 2 files changed, 32 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py index dc10fd88b..f063ab5dd 100644 --- a/youtube_dl/extractor/rts.py +++ b/youtube_dl/extractor/rts.py @@ -28,7 +28,7 @@ class RTSIE(SRGSSRIE): 'info_dict': { 'id': '3449373', 'display_id': 'les-enfants-terribles', - 'ext': 'flv', + 'ext': 'mp4', 'duration': 1488, 'title': 'Les Enfants Terribles', 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.', @@ -38,6 +38,10 @@ class RTSIE(SRGSSRIE): 'thumbnail': 're:^https?://.*\.image', 'view_count': int, }, + 'params': { + # m3u8 download + 'skip_download': True, + } }, { 'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html', @@ -45,7 +49,7 @@ class RTSIE(SRGSSRIE): 'info_dict': { 'id': '5742494', 'display_id': '5742494', - 'ext': 'flv', + 'ext': 'mp4', 'duration': 3720, 'title': 'Les yeux dans les cieux - Mon homard au Canada', 'description': 'md5:d22ee46f5cc5bac0912e5a0c6d44a9f7', @@ -55,6 +59,10 @@ class RTSIE(SRGSSRIE): 'thumbnail': 're:^https?://.*\.image', 'view_count': int, }, + 'params': { + # m3u8 download + 'skip_download': True, + } }, { 'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html', @@ -80,7 +88,7 @@ class RTSIE(SRGSSRIE): 'info_dict': { 'id': '5745356', 'display_id': 'londres-cachee-par-un-epais-smog', - 'ext': 'flv', + 'ext': 'mp4', 'duration': 33, 'title': 'Londres cachée par un épais smog', 'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.', @@ -90,6 +98,10 @@ class RTSIE(SRGSSRIE): 'thumbnail': 're:^https?://.*\.image', 'view_count': int, }, + 'params': { + # m3u8 download + 'skip_download': True, + } }, { 'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html', @@ -180,16 +192,12 @@ class RTSIE(SRGSSRIE): auth_params = xpath_text(token, './/authparams', 'auth params') if not auth_params: continue - f4m_formats = self._extract_f4m_formats( + formats.extend(self._extract_f4m_formats( '%s?%s&hdcore=3.4.0&plugin=aasp-3.4.0.132.66' % (format_url, auth_params), - media_id, f4m_id=format_id, fatal=False) - if f4m_formats: - formats.extend(f4m_formats) + media_id, f4m_id=format_id, fatal=False)) elif format_url.endswith('.m3u8'): - m3u8_formats = self._extract_m3u8_formats( - format_url, media_id, 'mp4', m3u8_id=format_id, fatal=False) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats( + format_url, media_id, 'mp4', m3u8_id=format_id, fatal=False)) else: formats.append({ 'format_id': format_id, diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index 563851b3f..4962fe885 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -47,31 +47,24 @@ class SRGSSRIE(InfoExtractor): created_date = media_data.get('createdDate') or metadata.get('createdDate') timestamp = parse_iso8601(created_date) - thumbnails = [] - if 'Image' in media_data: - for image in media_data['Image']['ImageRepresentations']['ImageRepresentation']: - thumbnails.append({ - 'id': image.get('id'), - 'url': image['url'], - }) + thumbnails = [{ + 'id': image.get('id'), + 'url': image['url'], + } for image in media_data.get('Image', {}).get('ImageRepresentations', {}).get('ImageRepresentation', [])] preference = qualities(['LQ', 'MQ', 'SD', 'HQ', 'HD']) formats = [] - for source in media_data['Playlists']['Playlist']: - assets = {} + for source in media_data.get('Playlists', {}).get('Playlist', []) + media_data.get('Downloads', {}).get('Download', []): protocol = source.get('@protocol') if protocol in ('HTTP-HDS', 'HTTP-HLS'): + assets = {} for quality in source['url']: assets[quality['@quality']] = quality['text'] asset_url = assets.get('HD') or assets.get('HQ') or assets.get('SD') or assets.get('MQ') or assets.get('LQ') if '.f4m' in asset_url: - f4m_formats = formats.extend(self._extract_f4m_formats(asset_url + '?hdcore=3.4.0', media_id, f4m_id='hds', fatal=False)) - if f4m_formats: - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats(asset_url + '?hdcore=3.4.0', media_id, f4m_id='hds', fatal=False)) elif '.m3u8' in asset_url: - m3u8_formats = formats.extend(self._extract_m3u8_formats(asset_url, media_id, m3u8_id='hls', fatal=False)) - if m3u8_formats: - formats.extend(m3u8_formats) + formats.extend(self._extract_m3u8_formats(asset_url, media_id, m3u8_id='hls', fatal=False)) else: for asset in source['url']: asset_url = asset['text'] @@ -84,15 +77,6 @@ class SRGSSRIE(InfoExtractor): 'preference': preference(asset['@quality']), 'ext': ext, }) - - if 'Downloads' in media_data: - for source in media_data['Downloads']['Download']: - for asset in source['url']: - formats.append({ - 'format_id': asset['@quality'], - 'url': asset['text'], - 'preference': preference(asset['@quality']) - }) self._sort_formats(formats) return { @@ -149,7 +133,7 @@ class SRGSSRPlayIE(InfoExtractor): 'info_dict': { 'id': '6348260', 'display_id': '6348260', - 'ext': 'flv', + 'ext': 'mp4', 'duration': 1796, 'title': 'Le 19h30', 'description': '', @@ -159,6 +143,10 @@ class SRGSSRPlayIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.image', 'view_count': int, }, + 'params': { + # m3u8 download + 'skip_download': True, + } }] def _real_extract(self, url): From db710571fd7bf6b697a7999fa1ea76dd2b8b1382 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 29 Dec 2015 11:59:27 +0100 Subject: [PATCH 0205/1105] [daum] fix IE_NAME --- youtube_dl/extractor/daum.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index a083cc0dc..f08f57157 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -77,7 +77,7 @@ class DaumIE(InfoExtractor): class DaumClipIE(InfoExtractor): _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.do|mypot/View.do)\?.*?clipid=(?P\d+)' - IE_NAME = 'daum.net' + IE_NAME = 'daum.net:clip' _TESTS = [{ 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', From 6c4d6609ad14d33ef1a9b13a938a9413ac438e2c Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 29 Dec 2015 12:00:52 +0100 Subject: [PATCH 0206/1105] [phoenix] fix IE_NAME --- youtube_dl/extractor/phoenix.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py index 6ce2ec19d..ac009f60f 100644 --- a/youtube_dl/extractor/phoenix.py +++ b/youtube_dl/extractor/phoenix.py @@ -4,6 +4,7 @@ from .zdf import ZDFIE class PhoenixIE(ZDFIE): + IE_NAME = 'phoenix.de' _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/ (?: phoenix/die_sendungen/(?:[^/]+/)? From ce7d243c7e6b6f5eb5129a3e85aa75a3773b3a73 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 29 Dec 2015 12:01:22 +0100 Subject: [PATCH 0207/1105] [srgssr] fix IE_DESC --- youtube_dl/extractor/srgssr.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index 4962fe885..2c8b4276b 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -90,6 +90,7 @@ class SRGSSRIE(InfoExtractor): class SRGSSRPlayIE(InfoExtractor): + IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites' _VALID_URL = r'https?://(?:(?:www|play)\.)?(?Psrf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/[^/]+/(?Pvideo|audio)/[^?]+\?id=(?P[0-9a-f\-]{36}|\d+)' _TESTS = [{ From eccde5e9de26dbe0c289b5bd1bf40b0482da59b1 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 29 Dec 2015 14:53:06 +0100 Subject: [PATCH 0208/1105] [audimedia] split long lines --- youtube_dl/extractor/audimedia.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/audimedia.py b/youtube_dl/extractor/audimedia.py index 9b037bb0c..3b2effa15 100644 --- a/youtube_dl/extractor/audimedia.py +++ b/youtube_dl/extractor/audimedia.py @@ -45,11 +45,15 @@ class AudiMediaIE(InfoExtractor): stream_url_hls = json_data.get('stream_url_hls') if stream_url_hls: - formats.extend(self._extract_m3u8_formats(stream_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_m3u8_formats( + stream_url_hls, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) stream_url_hds = json_data.get('stream_url_hds') if stream_url_hds: - formats.extend(self._extract_f4m_formats(json_data.get('stream_url_hds') + '?hdcore=3.4.0', video_id, -1, f4m_id='hds', fatal=False)) + formats.extend(self._extract_f4m_formats( + stream_url_hds + '?hdcore=3.4.0', + video_id, f4m_id='hds', fatal=False)) for video_version in json_data.get('video_versions'): video_version_url = video_version.get('download_url') or video_version.get('stream_url') From f889ac45b83cd771919fba29c14cee9d24e637b7 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 29 Dec 2015 14:55:36 +0100 Subject: [PATCH 0209/1105] [ign] split long lines --- youtube_dl/extractor/ign.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index d1c1c210c..c45c68c1d 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -120,19 +120,24 @@ class IGNIE(InfoExtractor): video_id = self._find_video_id(webpage) if not video_id: - return self.url_result(self._search_regex(self._EMBED_RE, webpage, 'embed url')) + return self.url_result(self._search_regex( + self._EMBED_RE, webpage, 'embed url')) return self._get_video_info(video_id) def _get_video_info(self, video_id): - api_data = self._download_json(self._API_URL_TEMPLATE % video_id, video_id) + api_data = self._download_json( + self._API_URL_TEMPLATE % video_id, video_id) formats = [] m3u8_url = api_data['refs'].get('m3uUrl') if m3u8_url: - formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) f4m_url = api_data['refs'].get('f4mUrl') if f4m_url: - formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) + formats.extend(self._extract_f4m_formats( + f4m_url, video_id, f4m_id='hds', fatal=False)) for asset in api_data['assets']: formats.append({ 'url': asset['url'], From f2017cb020efe0956049bda11df4eec7c5985fda Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 29 Dec 2015 14:58:22 +0100 Subject: [PATCH 0210/1105] [srgssr] split long lines and use m3u8_native --- youtube_dl/extractor/rts.py | 2 +- youtube_dl/extractor/srgssr.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py index f063ab5dd..3cc32847b 100644 --- a/youtube_dl/extractor/rts.py +++ b/youtube_dl/extractor/rts.py @@ -197,7 +197,7 @@ class RTSIE(SRGSSRIE): media_id, f4m_id=format_id, fatal=False)) elif format_url.endswith('.m3u8'): formats.extend(self._extract_m3u8_formats( - format_url, media_id, 'mp4', m3u8_id=format_id, fatal=False)) + format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) else: formats.append({ 'format_id': format_id, diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index 2c8b4276b..d06d34316 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -29,7 +29,8 @@ class SRGSSRIE(InfoExtractor): media_id)[media_type.capitalize()] if media_data.get('block') and media_data['block'] in self._ERRORS: - raise ExtractorError('%s said: %s' % (self.IE_NAME, self._ERRORS[media_data['block']]), expected=True) + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, self._ERRORS[media_data['block']]), expected=True) return media_data @@ -62,9 +63,13 @@ class SRGSSRIE(InfoExtractor): assets[quality['@quality']] = quality['text'] asset_url = assets.get('HD') or assets.get('HQ') or assets.get('SD') or assets.get('MQ') or assets.get('LQ') if '.f4m' in asset_url: - formats.extend(self._extract_f4m_formats(asset_url + '?hdcore=3.4.0', media_id, f4m_id='hds', fatal=False)) + formats.extend(self._extract_f4m_formats( + asset_url + '?hdcore=3.4.0', media_id, + f4m_id='hds', fatal=False)) elif '.m3u8' in asset_url: - formats.extend(self._extract_m3u8_formats(asset_url, media_id, m3u8_id='hls', fatal=False)) + formats.extend(self._extract_m3u8_formats( + asset_url, media_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) else: for asset in source['url']: asset_url = asset['text'] From 98d7c0f4f7d88cdfed8dfd3ad81efec36090c7fb Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 29 Dec 2015 15:02:18 +0100 Subject: [PATCH 0211/1105] [tele13] split long lines --- youtube_dl/extractor/tele13.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/tele13.py b/youtube_dl/extractor/tele13.py index fe11c20df..4e860db0a 100644 --- a/youtube_dl/extractor/tele13.py +++ b/youtube_dl/extractor/tele13.py @@ -46,8 +46,12 @@ class Tele13IE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - setup_js = self._search_regex(r"(?s)jwplayer\('player-vivo'\).setup\((\{.*?\})\)", webpage, 'setup code') - sources = self._parse_json(self._search_regex(r'sources\s*:\s*(\[[^\]]+\])', setup_js, 'sources'), display_id, js_to_json) + setup_js = self._search_regex( + r"(?s)jwplayer\('player-vivo'\).setup\((\{.*?\})\)", + webpage, 'setup code') + sources = self._parse_json(self._search_regex( + r'sources\s*:\s*(\[[^\]]+\])', setup_js, 'sources'), + display_id, js_to_json) preference = qualities(['Móvil', 'SD', 'HD']) formats = [] @@ -57,7 +61,9 @@ class Tele13IE(InfoExtractor): if format_url and format_url not in urls: ext = determine_ext(format_url) if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats(format_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_m3u8_formats( + format_url, display_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) elif YoutubeIE.suitable(format_url): return self.url_result(format_url, 'Youtube') else: @@ -72,8 +78,11 @@ class Tele13IE(InfoExtractor): return { 'id': display_id, - 'title': self._search_regex(r'title\s*:\s*"([^"]+)"', setup_js, 'title'), - 'description': self._html_search_meta('description', webpage, 'description'), - 'thumbnail': self._search_regex(r'image\s*:\s*"([^"]+)"', setup_js, 'thumbnail', default=None), + 'title': self._search_regex( + r'title\s*:\s*"([^"]+)"', setup_js, 'title'), + 'description': self._html_search_meta( + 'description', webpage, 'description'), + 'thumbnail': self._search_regex( + r'image\s*:\s*"([^"]+)"', setup_js, 'thumbnail', default=None), 'formats': formats, } From bf96b45238b221a01ea9e07d7747313272cde142 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 29 Dec 2015 15:03:14 +0100 Subject: [PATCH 0212/1105] [rai] split long lines --- youtube_dl/extractor/rai.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index f2679591b..a4dc5c335 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -107,7 +107,8 @@ class RaiTVIE(InfoExtractor): return xml.replace(' tag elementi', '').replace('>/', ' Date: Tue, 29 Dec 2015 15:05:21 +0100 Subject: [PATCH 0213/1105] [ooyala] split long lines, fix test duration and add hdcode param to hds url --- youtube_dl/extractor/ooyala.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 3960d522e..20b984288 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -13,6 +13,9 @@ from ..compat import compat_urllib_parse class OoyalaBaseIE(InfoExtractor): + _PLAYER_BASE = 'http://player.ooyala.com/' + _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' + _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v1/authorization/embed_code/%s/%s?' def _extract(self, content_tree_url, video_id, domain='example.org'): content_tree = self._download_json(content_tree_url, video_id)['content_tree'] @@ -31,24 +34,33 @@ class OoyalaBaseIE(InfoExtractor): formats = [] for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'): auth_data = self._download_json( - 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?' % (pcode, embed_code) + compat_urllib_parse.urlencode({'domain': domain, 'supportedFormats': supported_format}), + self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + + compat_urllib_parse.urlencode({ + 'domain': domain, + 'supportedFormats': supported_format + }), video_id, 'Downloading %s JSON' % supported_format) cur_auth_data = auth_data['authorization_data'][embed_code] if cur_auth_data['authorized']: for stream in cur_auth_data['streams']: - url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8') + url = base64.b64decode( + stream['url']['data'].encode('ascii')).decode('utf-8') if url in urls: continue urls.append(url) delivery_type = stream['delivery_type'] if delivery_type == 'hls' or '.m3u8' in url: - formats.extend(self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_m3u8_formats( + url, embed_code, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) elif delivery_type == 'hds' or '.f4m' in url: - formats.extend(self._extract_f4m_formats(url, embed_code, f4m_id='hds', fatal=False)) + formats.extend(self._extract_f4m_formats( + url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) elif '.smil' in url: - formats.extend(self._extract_smil_formats(url, embed_code, fatal=False)) + formats.extend(self._extract_smil_formats( + url, embed_code, fatal=False)) else: formats.append({ 'url': url, @@ -62,7 +74,8 @@ class OoyalaBaseIE(InfoExtractor): 'fps': float_or_none(stream.get('framerate')), }) else: - raise ExtractorError('%s said: %s' % (self.IE_NAME, cur_auth_data['message']), expected=True) + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, cur_auth_data['message']), expected=True) self._sort_formats(formats) video_info['formats'] = formats @@ -120,7 +133,7 @@ class OoyalaIE(OoyalaBaseIE): url, smuggled_data = unsmuggle_url(url, {}) embed_code = self._match_id(url) domain = smuggled_data.get('domain') - content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/embed_code/%s/%s' % (embed_code, embed_code) + content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code) return self._extract(content_tree_url, embed_code, domain) @@ -147,7 +160,7 @@ class OoyalaExternalIE(OoyalaBaseIE): 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', 'ext': 'mp4', 'title': 'dm_140128_30for30Shorts___JudgingJewellv2', - 'duration': 1302000, + 'duration': 1302.0, }, 'params': { # m3u8 download @@ -157,5 +170,5 @@ class OoyalaExternalIE(OoyalaBaseIE): def _real_extract(self, url): partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups() - content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/external_id/%s/%s:%s' % (pcode, partner_id, video_id) + content_tree_url = self._CONTENT_TREE_BASE + 'external_id/%s/%s:%s' % (pcode, partner_id, video_id) return self._extract(content_tree_url, video_id) From 0368181998cd5ec365bb8ed10c5da0188fbed798 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 29 Dec 2015 15:07:21 +0100 Subject: [PATCH 0214/1105] [wdr] split long lines --- youtube_dl/extractor/wdr.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index e8511398c..a851578e0 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -108,7 +108,9 @@ class WDRIE(InfoExtractor): if mobj.group('player') is None: entries = [ self.url_result(page_url + href, 'WDR') - for href in re.findall(r']+href="([^"]+)"', webpage, 'm3u8 url', default=None) + m3u8_url = self._search_regex( + r'rel="adaptiv"[^>]+href="([^"]+)"', + webpage, 'm3u8 url', default=None) if m3u8_url: - formats.extend(self._extract_m3u8_formats(m3u8_url, page_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_m3u8_formats( + m3u8_url, page_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) - direct_urls = re.findall(r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage) + direct_urls = re.findall( + r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage) if direct_urls: for quality, video_url in direct_urls: formats.append({ From 53365f74a7142c0e16eeb8db2c08f1f200de2406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 29 Dec 2015 16:14:05 +0100 Subject: [PATCH 0215/1105] Credit @flatgreen for FranceCultureEmission (#8022) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index ce350e96c..20f51009e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -149,3 +149,4 @@ Marco Ferragina reiv Muratcan Simsek Evan Lu +flatgreen From 0d8a0fdc30845aecfb9c98ccd4884dd1456dda86 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 29 Dec 2015 16:38:06 +0100 Subject: [PATCH 0216/1105] [srgssr] use SRFIE format ids --- youtube_dl/extractor/srgssr.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py index d06d34316..4707029ca 100644 --- a/youtube_dl/extractor/srgssr.py +++ b/youtube_dl/extractor/srgssr.py @@ -57,29 +57,26 @@ class SRGSSRIE(InfoExtractor): formats = [] for source in media_data.get('Playlists', {}).get('Playlist', []) + media_data.get('Downloads', {}).get('Download', []): protocol = source.get('@protocol') - if protocol in ('HTTP-HDS', 'HTTP-HLS'): - assets = {} - for quality in source['url']: - assets[quality['@quality']] = quality['text'] - asset_url = assets.get('HD') or assets.get('HQ') or assets.get('SD') or assets.get('MQ') or assets.get('LQ') - if '.f4m' in asset_url: + for asset in source['url']: + asset_url = asset['text'] + quality = asset['@quality'] + format_id = '%s-%s' % (protocol, quality) + if protocol == 'HTTP-HDS': formats.extend(self._extract_f4m_formats( asset_url + '?hdcore=3.4.0', media_id, - f4m_id='hds', fatal=False)) - elif '.m3u8' in asset_url: + f4m_id=format_id, fatal=False)) + elif protocol == 'HTTP-HLS': formats.extend(self._extract_m3u8_formats( asset_url, media_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - else: - for asset in source['url']: - asset_url = asset['text'] + m3u8_id=format_id, fatal=False)) + else: ext = None - if asset_url.startswith('rtmp'): + if protocol == 'RTMP': ext = self._search_regex(r'([a-z0-9]+):[^/]+', asset_url, 'ext') formats.append({ - 'format_id': asset['@quality'], + 'format_id': format_id, 'url': asset_url, - 'preference': preference(asset['@quality']), + 'preference': preference(quality), 'ext': ext, }) self._sort_formats(formats) From 030dfb04e0deff7e824194a7ddea2b55cf050e34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Dec 2015 23:57:46 +0600 Subject: [PATCH 0217/1105] [videomore] Add extractor (Closes #8040) --- youtube_dl/extractor/__init__.py | 5 + youtube_dl/extractor/videomore.py | 186 ++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 youtube_dl/extractor/videomore.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 75d191d5d..fb7151443 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -781,6 +781,11 @@ from .viddler import ViddlerIE from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE +from .videomore import ( + VideomoreIE, + VideomoreVideoIE, + VideomoreSeasonIE, +) from .videopremium import VideoPremiumIE from .videott import VideoTtIE from .vidme import VidmeIE diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py new file mode 100644 index 000000000..dbc9dbdb7 --- /dev/null +++ b/youtube_dl/extractor/videomore.py @@ -0,0 +1,186 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_age_limit, + parse_iso8601, + xpath_text, +) + + +class VideomoreIE(InfoExtractor): + _VALID_URL = r'videomore:(?P\d+)$|https?://videomore\.ru/(?:(?:embed|[^/]+/[^/]+)/|[^/]+\?.*\btrack_id=)(?P\d+)(?:[/?#&]|\.(?:xml|json)|$)' + _TESTS = [{ + 'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617', + 'md5': '70875fbf57a1cd004709920381587185', + 'info_dict': { + 'id': '367617', + 'ext': 'flv', + 'title': 'В гостях Алексей Чумаков и Юлия Ковальчук', + 'description': 'В гостях – лучшие романтические комедии года, «Выживший» Иньярриту и «Стив Джобс» Дэнни Бойла.', + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 2910, + 'age_limit': 16, + 'view_count': int, + }, + }, { + 'url': 'http://videomore.ru/elki_3?track_id=364623', + 'only_matching': True, + }, { + 'url': 'http://videomore.ru/embed/364623', + 'only_matching': True, + }, { + 'url': 'http://videomore.ru/video/tracks/364623.xml', + 'only_matching': True, + }, { + 'url': 'http://videomore.ru/video/tracks/364623.json', + 'only_matching': True, + }, { + 'url': 'http://videomore.ru/video/tracks/158031/quotes/33248', + 'only_matching': True, + }, { + 'url': 'videomore:367617', + 'only_matching': True, + }] + + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+data=(["\'])https?://videomore.ru/player\.swf\?.*config=(?Phttps?://videomore\.ru/(?:[^/]+/)+\d+\.xml).*\1', + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('sid') or mobj.group('id') + + video = self._download_xml( + 'http://videomore.ru/video/tracks/%s.xml' % video_id, + video_id, 'Downloading video XML') + + video_url = xpath_text(video, './/video_url', 'video url', fatal=True) + formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds') + + data = self._download_json( + 'http://videomore.ru/video/tracks/%s.json' % video_id, + video_id, 'Downloadinng video JSON') + + title = data.get('title') or data['project_title'] + description = data.get('description') or data.get('description_raw') + timestamp = parse_iso8601(data.get('published_at')) + duration = int_or_none(data.get('duration')) + view_count = int_or_none(data.get('views')) + age_limit = parse_age_limit(data.get('min_age')) + thumbnails = [{ + 'url': thumbnail, + } for thumbnail in data.get('big_thumbnail_urls', [])] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnails': thumbnails, + 'timestamp': timestamp, + 'duration': duration, + 'view_count': view_count, + 'age_limit': age_limit, + 'formats': formats, + } + + +class VideomoreVideoIE(InfoExtractor): + IE_NAME = 'videomore:video' + _VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P[^/?#&]+)[/?#&]*$' + _TESTS = [{ + # single video with og:video:iframe + 'url': 'http://videomore.ru/elki_3', + 'info_dict': { + 'id': '364623', + 'ext': 'flv', + 'title': 'Ёлки 3', + 'description': '', + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 5579, + 'age_limit': 6, + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, + }, { + # season single serie with og:video:iframe + 'url': 'http://videomore.ru/poslednii_ment/1_sezon/14_seriya', + 'only_matching': True, + }, { + 'url': 'http://videomore.ru/sejchas_v_seti/serii_221-240/226_vypusk', + 'only_matching': True, + }, { + # single video without og:video:iframe + 'url': 'http://videomore.ru/marin_i_ego_druzya', + 'info_dict': { + 'id': '359073', + 'ext': 'flv', + 'title': '1 серия. Здравствуй, Аквавилль!', + 'description': 'md5:c6003179538b5d353e7bcd5b1372b2d7', + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 754, + 'age_limit': 6, + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, + }] + + @classmethod + def suitable(cls, url): + return False if VideomoreIE.suitable(url) else super(VideomoreVideoIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_url = self._og_search_property( + 'video:iframe', webpage, 'video url', default=None) + + if not video_url: + video_id = self._search_regex( + (r'config\s*:\s*["\']https?://videomore\.ru/video/tracks/(\d+)\.xml', + r'track-id=["\'](\d+)', + r'xcnt_product_id\s*=\s*(\d+)'), webpage, 'video id') + video_url = 'videomore:%s' % video_id + + return self.url_result(video_url, VideomoreIE.ie_key()) + + +class VideomoreSeasonIE(InfoExtractor): + IE_NAME = 'videomore:season' + _VALID_URL = r'https?://videomore\.ru/(?!embed)(?P[^/]+/[^/?#&]+)[/?#&]*$' + _TESTS = [{ + 'url': 'http://videomore.ru/molodezhka/sezon_promo', + 'info_dict': { + 'id': 'molodezhka/sezon_promo', + 'title': 'Молодежка Промо', + }, + 'playlist_mincount': 12, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + title = self._og_search_title(webpage) + + entries = [ + self.url_result(item) for item in re.findall( + r']+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"' + % display_id, webpage)] + + return self.playlist_result(entries, display_id, title) From ff18735cb2440dd2e628f0bf497325b50848353b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 29 Dec 2015 23:58:23 +0600 Subject: [PATCH 0218/1105] [extractor/generic] Add support for videomore embeds --- youtube_dl/extractor/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 70a8d8eb9..d79e1adc9 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -54,6 +54,7 @@ from .snagfilms import SnagFilmsEmbedIE from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE from .pladform import PladformIE +from .videomore import VideomoreIE from .googledrive import GoogleDriveIE from .jwplatform import JWPlatformIE from .ultimedia import UltimediaIE @@ -1743,6 +1744,11 @@ class GenericIE(InfoExtractor): if pladform_url: return self.url_result(pladform_url) + # Look for Videomore embeds + videomore_url = VideomoreIE._extract_url(webpage) + if videomore_url: + return self.url_result(videomore_url) + # Look for Playwire embeds mobj = re.search( r']+data-config=(["\'])(?P(?:https?:)?//config\.playwire\.com/.+?)\1', webpage) From 453fe2a345358064609df9adeee302e19aa636bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 30 Dec 2015 00:13:00 +0600 Subject: [PATCH 0219/1105] [dramafever] Fix subtitles extraction (Closes #8049) --- youtube_dl/extractor/dramafever.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 60ed438f8..b3b21d65f 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -108,7 +108,7 @@ class DramaFeverIE(DramaFeverBaseIE): if value: subfile = value[0].get('subfile') or value[0].get('new_subfile') if subfile and subfile != 'http://www.dramafever.com/st/': - info['subtitiles'].setdefault('English', []).append({ + info.setdefault('subtitles', {}).setdefault('English', []).append({ 'ext': 'srt', 'url': subfile, }) From 69759a5990ad897bdb1af9c44d5c73a2be1e92c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 30 Dec 2015 00:44:07 +0600 Subject: [PATCH 0220/1105] [videomore] Set IE_NAME --- youtube_dl/extractor/videomore.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index dbc9dbdb7..16078ac19 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -13,6 +13,7 @@ from ..utils import ( class VideomoreIE(InfoExtractor): + IE_NAME = 'videomore' _VALID_URL = r'videomore:(?P\d+)$|https?://videomore\.ru/(?:(?:embed|[^/]+/[^/]+)/|[^/]+\?.*\btrack_id=)(?P\d+)(?:[/?#&]|\.(?:xml|json)|$)' _TESTS = [{ 'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617', From f5bc4b5f95393b5fc83f5c64d44ee78093f9d798 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 30 Dec 2015 23:12:35 +0600 Subject: [PATCH 0221/1105] [options] Prefer --convert-subs spelling --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index c46e136bf..ade58c375 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -752,7 +752,7 @@ def parseOpts(overrideArguments=None): metavar='CMD', dest='exec_cmd', help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'') postproc.add_option( - '--convert-subtitles', '--convert-subs', + '--convert-subs', '--convert-subtitles', metavar='FORMAT', dest='convertsubtitles', default=None, help='Convert the subtitles to other format (currently supported: srt|ass|vtt)') From 3092fc4035dd3a0841a92227e9bd80b7b9e4d871 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 31 Dec 2015 01:09:21 +0600 Subject: [PATCH 0222/1105] [udemy] Fix typo --- youtube_dl/extractor/udemy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 59832b1ec..ce7e211fc 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -147,7 +147,7 @@ class UdemyIE(InfoExtractor): # Error could possibly mean we are not enrolled in the course if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: self._enroll_course(webpage, course_id) - lecture_id = self._download_lecture(course_id, lecture_id) + lecture = self._download_lecture(course_id, lecture_id) else: raise From 7109903e6140e01b5d5980fc80c6ef1b6c86797a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 31 Dec 2015 03:10:44 +0600 Subject: [PATCH 0223/1105] [extractor/common] Document chapter and series fields --- youtube_dl/extractor/common.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 655207447..c443daf20 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -200,6 +200,26 @@ class InfoExtractor(object): end_time: Time in seconds where the reproduction should end, as specified in the URL. + The following fields should only be used when the video belongs to some logical + chapter or section: + + chapter: Name or title of the chapter the video belongs to. + chapter_id: Number or id of the chapter the video belongs to, as an integer + or unicode string. + + The following fields should only be used when the video is an episode of some + series or programme: + + series: Title of the series or programme the video episode belongs to. + season: Title of the season the video episode belongs to. + season_id: Number or id of the season the video episode belongs to, as an + integer or unicode string. + episode: Title of the video episode. Unlike mandatory video title field, + this field should denote the exact title of the video episode + without any kind of decoration. + episode_id: Number or id of the video episode within a season, as an integer + or unicode string. + Unless mentioned otherwise, the fields should be Unicode strings. Unless mentioned otherwise, None is equivalent to absence of information. From 4d402db52140458b1b4707c30fb01bb92861a8cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 31 Dec 2015 03:11:21 +0600 Subject: [PATCH 0224/1105] [udemy] Extract chapter info --- youtube_dl/extractor/udemy.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index ce7e211fc..41097829d 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -244,10 +244,25 @@ class UdemyCourseIE(UdemyIE): 'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id, course_id, 'Downloading course curriculum') - entries = [ - self.url_result( - 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy') - for asset in response if asset.get('assetType') or asset.get('asset_type') == 'Video' - ] + entries = [] + chapter, chapter_id = None, None + for asset in response: + asset_type = asset.get('assetType') or asset.get('asset_type') + if asset_type == 'Video': + asset_id = asset.get('id') + if asset_id: + entry = { + '_type': 'url_transparent', + 'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), + 'ie_key': UdemyIE.ie_key(), + } + if chapter_id: + entry['chapter_id'] = chapter_id + if chapter: + entry['chapter'] = chapter + entries.append(entry) + elif asset.get('type') == 'chapter': + chapter_id = asset.get('index') or asset.get('object_index') + chapter = asset.get('title') return self.playlist_result(entries, course_id, course_title) From 7a0b07c719b621b1572120ed3f04870fa5737bfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 31 Dec 2015 03:13:02 +0600 Subject: [PATCH 0225/1105] [videomore] Extract series info --- youtube_dl/extractor/videomore.py | 56 +++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index 16078ac19..3bd96e445 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -23,11 +23,56 @@ class VideomoreIE(InfoExtractor): 'ext': 'flv', 'title': 'В гостях Алексей Чумаков и Юлия Ковальчук', 'description': 'В гостях – лучшие романтические комедии года, «Выживший» Иньярриту и «Стив Джобс» Дэнни Бойла.', + 'series': 'Кино в деталях', + 'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук', + 'episode_id': None, + 'season': 'Сезон 2015', + 'season_id': 5, 'thumbnail': 're:^https?://.*\.jpg', 'duration': 2910, 'age_limit': 16, 'view_count': int, }, + }, { + 'url': 'http://videomore.ru/embed/259974', + 'info_dict': { + 'id': '259974', + 'ext': 'flv', + 'title': '80 серия', + 'description': '«Медведей» ждет решающий матч. Макеев выясняет отношения со Стрельцовым. Парни узнают подробности прошлого Макеева.', + 'series': 'Молодежка', + 'episode': '80 серия', + 'episode_id': 40, + 'season': '2 сезон', + 'season_id': 2, + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 2809, + 'age_limit': 16, + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://videomore.ru/molodezhka/sezon_promo/341073', + 'info_dict': { + 'id': '341073', + 'ext': 'flv', + 'title': 'Команда проиграла из-за Бакина?', + 'description': 'Молодежка 3 сезон скоро', + 'series': 'Молодежка', + 'episode': 'Команда проиграла из-за Бакина?', + 'episode_id': None, + 'season': 'Промо', + 'season_id': 99, + 'thumbnail': 're:^https?://.*\.jpg', + 'duration': 29, + 'age_limit': 16, + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://videomore.ru/elki_3?track_id=364623', 'only_matching': True, @@ -81,10 +126,21 @@ class VideomoreIE(InfoExtractor): 'url': thumbnail, } for thumbnail in data.get('big_thumbnail_urls', [])] + series = data.get('project_title') + episode = data.get('title') + episode_id = data.get('episode_of_season') or None + season = data.get('season_title') + season_id = data.get('season_pos') or None + return { 'id': video_id, 'title': title, 'description': description, + 'series': series, + 'episode': episode, + 'episode_id': episode_id, + 'season': season, + 'season_id': season_id, 'thumbnails': thumbnails, 'timestamp': timestamp, 'duration': duration, From c7932289e73f62d5d73b942175c66510acc2be7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 30 Dec 2015 23:57:19 +0100 Subject: [PATCH 0226/1105] [cbsnews] Fix extraction of the URL for the 'RtmpDesktop' format (fixes #8048) --- youtube_dl/extractor/cbsnews.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index f9a64a0a2..d211ec23b 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -5,6 +5,7 @@ import re import json from .common import InfoExtractor +from ..utils import remove_start class CBSNewsIE(InfoExtractor): @@ -62,6 +63,7 @@ class CBSNewsIE(InfoExtractor): uri = item.get('media' + format_id + 'URI') if not uri: continue + uri = remove_start(uri, '{manifest:none}') fmt = { 'url': uri, 'format_id': format_id, @@ -70,6 +72,8 @@ class CBSNewsIE(InfoExtractor): play_path = re.sub( r'{slistFilePath}', '', uri.split('')[-1].split('{break}')[-1]) + play_path = re.sub( + r'{manifest:.+}.*$', '', play_path) fmt.update({ 'app': 'ondemand?auth=cbs', 'play_path': 'mp4:' + play_path, From 29f36839016e989f4a335fdd51d63f083e26d555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 31 Dec 2015 05:31:01 +0600 Subject: [PATCH 0227/1105] [espn] Remove broken flag --- youtube_dl/extractor/espn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index e6f8f0337..7d758102c 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -5,7 +5,6 @@ from .common import InfoExtractor class ESPNIE(InfoExtractor): _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P[^/]+)' - _WORKING = False _TESTS = [{ 'url': 'http://espn.go.com/video/clip?id=10365079', 'info_dict': { From df827a983aa94ba99542230c6f7585b36ab49713 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 31 Dec 2015 06:06:36 +0600 Subject: [PATCH 0228/1105] [discovery] Allow https (Closes #8065) --- youtube_dl/extractor/discovery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index d6723ecf2..507f4bdad 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -9,7 +9,7 @@ from ..compat import compat_str class DiscoveryIE(InfoExtractor): - _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P[a-zA-Z0-9_\-]*)(?:\.htm)?' + _VALID_URL = r'https?://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P[a-zA-Z0-9_\-]*)(?:\.htm)?' _TESTS = [{ 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', 'info_dict': { From d5f6429de87da4bffa0be7703d774681393f1ffb Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 31 Dec 2015 04:02:08 +0100 Subject: [PATCH 0229/1105] [canalplus] improve extraction(fixes #6301) - extract data from json instead of xml - fix http format urls - extract more metadata - update tests - make m3u8 and f4m format extraction non fatal - use m3u8_native implementation --- youtube_dl/extractor/canalplus.py | 59 ++++++++++++++++++------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 004372f8d..25b2d4efe 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -10,13 +10,14 @@ from ..utils import ( unified_strdate, url_basename, qualities, + int_or_none, ) class CanalplusIE(InfoExtractor): IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv' _VALID_URL = r'https?://(?:www\.(?Pcanalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P.*)|player\.canalplus\.fr/#/(?P[0-9]+))' - _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s' + _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json' _SITE_ID_MAP = { 'canalplus.fr': 'cplus', 'piwiplus.fr': 'teletoon', @@ -26,10 +27,10 @@ class CanalplusIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092', - 'md5': 'b3481d7ca972f61e37420798d0a9d934', + 'md5': '12164a6f14ff6df8bd628e8ba9b10b78', 'info_dict': { 'id': '1263092', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Le Zapping - 13/05/15', 'description': 'md5:09738c0d06be4b5d06a0940edb0da73f', 'upload_date': '20150513', @@ -56,10 +57,10 @@ class CanalplusIE(InfoExtractor): 'skip': 'videos get deleted after a while', }, { 'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559', - 'md5': 'f3a46edcdf28006598ffaf5b30e6a2d4', + 'md5': '38b8f7934def74f0d6f3ba6c036a5f82', 'info_dict': { 'id': '1213714', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45', 'description': 'md5:8216206ec53426ea6321321f3b3c16db', 'upload_date': '20150211', @@ -82,15 +83,16 @@ class CanalplusIE(InfoExtractor): webpage, 'video id', group='id') info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) - doc = self._download_xml(info_url, video_id, 'Downloading video XML') + video_data = self._download_json(info_url, video_id, 'Downloading video JSON') - video_info = [video for video in doc if video.find('ID').text == video_id][0] - media = video_info.find('MEDIA') - infos = video_info.find('INFOS') + if isinstance(video_data, list): + video_data = [video for video in video_data if video.get('ID') == video_id][0] + media = video_data['MEDIA'] + infos = video_data['INFOS'] - preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS']) + preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD']) - fmt_url = next(iter(media.find('VIDEOS'))).text + fmt_url = next(iter(media.get('VIDEOS'))) if '/geo' in fmt_url.lower(): response = self._request_webpage( HEADRequest(fmt_url), video_id, @@ -101,35 +103,42 @@ class CanalplusIE(InfoExtractor): expected=True) formats = [] - for fmt in media.find('VIDEOS'): - format_url = fmt.text + for format_id, format_url in media['VIDEOS'].items(): if not format_url: continue - format_id = fmt.tag if format_id == 'HLS': formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', preference=preference(format_id))) + format_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) elif format_id == 'HDS': formats.extend(self._extract_f4m_formats( - format_url + '?hdcore=2.11.3', video_id, preference=preference(format_id))) + format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False)) else: formats.append({ - 'url': format_url, + # the secret extracted ya function in http://player.canalplus.fr/common/js/canalPlayer.js + 'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes', 'format_id': format_id, 'preference': preference(format_id), }) self._sort_formats(formats) + thumbnails = [{ + 'id': image_id, + 'url': image_url, + } for image_id, image_url in media.get('images', {}).items()] + + titrage = infos['TITRAGE'] + return { 'id': video_id, 'display_id': display_id, - 'title': '%s - %s' % (infos.find('TITRAGE/TITRE').text, - infos.find('TITRAGE/SOUS_TITRE').text), - 'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text), - 'thumbnail': media.find('IMAGES/GRAND').text, - 'description': infos.find('DESCRIPTION').text, - 'view_count': int(infos.find('NB_VUES').text), - 'like_count': int(infos.find('NB_LIKES').text), - 'comment_count': int(infos.find('NB_COMMENTS').text), + 'title': '%s - %s' % (titrage['TITRE'], + titrage['SOUS_TITRE']), + 'upload_date': unified_strdate(infos.get('PUBLICATION', {}).get('DATE')), + 'thumbnails': thumbnails, + 'description': infos.get('DESCRIPTION'), + 'duration': int_or_none(infos.get('DURATION')), + 'view_count': int_or_none(infos.get('NB_VUES')), + 'like_count': int_or_none(infos.get('NB_LIKES')), + 'comment_count': int_or_none(infos.get('NB_COMMENTS')), 'formats': formats, } From 9787c5f4c89714acd36685b84671e6ea69307a6f Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 31 Dec 2015 12:02:33 +0100 Subject: [PATCH 0230/1105] [fox] Add new extractor(closes #3063) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/fox.py | 39 ++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 youtube_dl/extractor/fox.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fb7151443..2063ef633 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -203,6 +203,7 @@ from .flickr import FlickrIE from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .fourtube import FourTubeIE +from .fox import FOXIE from .foxgay import FoxgayIE from .foxnews import FoxNewsIE from .foxsports import FoxSportsIE diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py new file mode 100644 index 000000000..ab97b3196 --- /dev/null +++ b/youtube_dl/extractor/fox.py @@ -0,0 +1,39 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import smuggle_url + + +class FOXIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.fox.com/watch/255180355939/7684182528', + 'info_dict': { + 'id': '255180355939', + 'ext': 'mp4', + 'title': 'Official Trailer: Gotham', + 'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.', + 'duration': 129, + }, + 'add_ie': ['ThePlatform'], + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + release_url = self._parse_json(self._search_regex( + r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'), + video_id)['release_url'] + '&manifest=m3u' + + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url(release_url, {'force_smil_url': True}), + 'id': video_id, + } From 80b8b72cb847bd286eea01819bf43fd8bbe6aa92 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 31 Dec 2015 13:36:07 +0100 Subject: [PATCH 0231/1105] [animalplanet] Add new extractor(closes #5303) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/animalplanet.py | 53 ++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 youtube_dl/extractor/animalplanet.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2063ef633..abfabc7da 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -19,6 +19,7 @@ from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE +from .animalplanet import AnimalPlanetIE from .anitube import AnitubeIE from .anysex import AnySexIE from .aol import AolIE diff --git a/youtube_dl/extractor/animalplanet.py b/youtube_dl/extractor/animalplanet.py new file mode 100644 index 000000000..0cebc8185 --- /dev/null +++ b/youtube_dl/extractor/animalplanet.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + parse_duration, + parse_iso8601, +) + + +class AnimalPlanetIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/([^/]+/)*(?P[^/\?#]+)' + _TESTS = [{ + 'url': 'http://www.animalplanet.com/tv-shows/i-shouldnt-be-alive/videos/dog-saves-injured-owner/', + 'info_dict': { + 'id': '10608', + 'ext': 'mp4', + 'title': 'Dog Saves Injured Owner', + 'description': 'A world class athlete is put to the test when she falls into a canyon and breaks her hip. Her only companion is her dog, Taz, who is on a mission to save her!', + 'upload_date': '20100410', + 'timestamp': 1270857727, + 'duration': 220, + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://www.animalplanet.com/longfin-eels-maneaters/', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + video_data = self._parse_json(self._search_regex( + r'initialVideoData\s*=\s*({.+?});', + webpage, 'initialVideoData'), display_id)['playlist'][0] + + return { + 'id': compat_str(video_data['id']), + 'display_id': display_id, + 'title': video_data['title'], + 'description': video_data.get('description'), + 'thumbnail': video_data.get('thumbnailURL'), + 'duration': parse_duration(video_data.get('video_length')), + 'timestamp': parse_iso8601(video_data.get('publishedDate')), + 'formats': self._extract_m3u8_formats( + video_data['src'], display_id, 'mp4', + 'm3u8_native', m3u8_id='hls') + } From 9d46608efabe25454288b5decaa96f9216506770 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 31 Dec 2015 16:35:51 +0100 Subject: [PATCH 0232/1105] [ora] Add new extractor(closes #7732) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ora.py | 75 ++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 youtube_dl/extractor/ora.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index abfabc7da..3df80bf00 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -491,6 +491,7 @@ from .ooyala import ( OoyalaIE, OoyalaExternalIE, ) +from .ora import OraTVIE from .orf import ( ORFTVthekIE, ORFOE1IE, diff --git a/youtube_dl/extractor/ora.py b/youtube_dl/extractor/ora.py new file mode 100644 index 000000000..9c4255a2d --- /dev/null +++ b/youtube_dl/extractor/ora.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + get_element_by_attribute, + qualities, + unescapeHTML, +) + + +class OraTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ora\.tv/([^/]+/)*(?P[^/\?#]+)' + _TEST = { + 'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq', + 'md5': 'fa33717591c631ec93b04b0e330df786', + 'info_dict': { + 'id': '50178', + 'ext': 'mp4', + 'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!', + 'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1', + 'duration': 1477, + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + video_data = self._search_regex( + r'"current"\s*:\s*({[^}]+?})', webpage, 'current video') + m3u8_url = self._search_regex( + r'"hls_stream"\s*:\s*"([^"]+)', video_data, 'm3u8 url', None) + if m3u8_url: + formats = self._extract_m3u8_formats( + m3u8_url, display_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False) + # simular to GameSpotIE + m3u8_path = compat_urlparse.urlparse(m3u8_url).path + QUALITIES_RE = r'((,[a-z]+\d+)+,?)' + available_qualities = self._search_regex( + QUALITIES_RE, m3u8_path, 'qualities').strip(',').split(',') + http_path = m3u8_path[1:].split('/', 1)[1] + http_template = re.sub(QUALITIES_RE, r'%s', http_path) + http_template = http_template.replace('.csmil/master.m3u8', '') + http_template = compat_urlparse.urljoin( + 'http://videocdn-pmd.ora.tv/', http_template) + preference = qualities( + ['mobile400', 'basic400', 'basic600', 'sd900', 'sd1200', 'sd1500', 'hd720', 'hd1080']) + for q in available_qualities: + formats.append({ + 'url': http_template % q, + 'format_id': q, + 'preference': preference(q), + }) + self._sort_formats(formats) + else: + return self.url_result(self._search_regex( + r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube') + + return { + 'id': self._search_regex( + r'"video_id"\s*:\s*(\d+)', video_data, 'video id'), + 'display_id': display_id, + 'title': unescapeHTML(self._og_search_title(webpage)), + 'description': get_element_by_attribute( + 'class', 'video_txt_decription', webpage), + 'thumbnail': self._proto_relative_url(self._search_regex( + r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)), + 'duration': int(self._search_regex( + r'"duration"\s*:\s*(\d+)', video_data, 'duration')), + 'formats': formats, + } From 7de81fcc530bfad9023a1a89622c15e4443de5eb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 31 Dec 2015 16:50:53 +0100 Subject: [PATCH 0233/1105] release 2015.12.31 --- README.md | 2 +- docs/supportedsites.md | 16 +++++++++++----- youtube_dl/version.py | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 3a4707227..4fc83b8e3 100644 --- a/README.md +++ b/README.md @@ -400,7 +400,7 @@ which means you can modify it, redistribute it or use it however you like. downloading, similar to find's -exec syntax. Example: --exec 'adb push {} /sdcard/Music/ && rm {}' - --convert-subtitles FORMAT Convert the subtitles to other format + --convert-subs FORMAT Convert the subtitles to other format (currently supported: srt|ass|vtt) # CONFIGURATION diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1646277ec..2297edc6b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,6 +28,7 @@ - **AlJazeera** - **Allocine** - **AlphaPorno** + - **AnimalPlanet** - **anitube.se** - **AnySex** - **Aparat** @@ -125,7 +126,7 @@ - **dailymotion:user** - **DailymotionCloud** - **daum.net** - - **daum.net** + - **daum.net:clip** - **DBTV** - **DCN** - **dcn:live** @@ -164,7 +165,7 @@ - **Eporner** - **EroProfile** - **Escapist** - - **ESPN** (Currently broken) + - **ESPN** - **EsriVideo** - **Europa** - **EveryonesMixtape** @@ -181,6 +182,7 @@ - **Flickr** - **Folketinget**: Folketinget (ft.dk; Danish parliament) - **FootyRoom** + - **FOX** - **Foxgay** - **FoxNews**: Fox News and Fox Business Video - **FoxSports** @@ -259,7 +261,6 @@ - **JeuxVideo** - **Jove** - **jpopsuki.tv** - - **Jukebox** - **JWPlatform** - **Kaltura** - **KanalPlay**: Kanal 5/9/11 Play @@ -405,6 +406,7 @@ - **OnionStudios** - **Ooyala** - **OoyalaExternal** + - **OraTV** - **orf:fm4**: radio FM4 - **orf:iptv**: iptv.ORF.at - **orf:oe1**: Radio Österreich 1 @@ -415,7 +417,7 @@ - **pcmag** - **Periscope**: Periscope - **PhilharmonieDeParis**: Philharmonie de Paris - - **Phoenix** + - **phoenix.de** - **Photobucket** - **Pinkbike** - **Pladform** @@ -535,7 +537,8 @@ - **SportBoxEmbed** - **SportDeutschland** - **Sportschau** - - **Srf** + - **SRGSSR** + - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **SSA** - **stanfordoc**: Stanford Open ClassRoom - **Steam** @@ -646,6 +649,9 @@ - **VideoDetective** - **videofy.me** - **VideoMega** + - **videomore** + - **videomore:season** + - **videomore:video** - **VideoPremium** - **VideoTt**: video.tt - Your True Tube - **videoweed**: VideoWeed diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a62baa305..cf6c4d74d 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.12.29' +__version__ = '2015.12.31' From c43fda4c1a6cc6d1a1726c3f37c82a0b256625e8 Mon Sep 17 00:00:00 2001 From: j Date: Sun, 20 Dec 2015 03:17:23 +0100 Subject: [PATCH 0234/1105] [regiotv] Add new extractor (closes #7797) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/regiotv.py | 61 ++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 youtube_dl/extractor/regiotv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3df80bf00..732229c14 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -552,6 +552,7 @@ from .rai import ( from .rbmaradio import RBMARadioIE from .rds import RDSIE from .redtube import RedTubeIE +from .regiotv import RegioTVIE from .restudy import RestudyIE from .reverbnation import ReverbNationIE from .ringtv import RingTVIE diff --git a/youtube_dl/extractor/regiotv.py b/youtube_dl/extractor/regiotv.py new file mode 100644 index 000000000..a5c21c085 --- /dev/null +++ b/youtube_dl/extractor/regiotv.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from ..utils import ( + sanitized_Request, + xpath_with_ns, +) + + +class RegioTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?regio-tv\.de/video/(?P[0-9]+).html' + _TESTS = [ + { + 'url': 'http://www.regio-tv.de/video/395808.html', + 'info_dict': { + 'id': '395808', + 'ext': 'mp4', + 'title': u'Wir in Ludwigsburg', + 'description': u'Mit unseren zuckers\xfc\xdfen Adventskindern, au\xdferdem besuchen wir die Abendsterne!', + } + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + key = self._html_search_regex(r''',key: "(.*?)"''', webpage, 'key') + + title = self._html_search_regex( + r'', + webpage, 'title') + + soapxml = '%s' % key + request = sanitized_Request('http://v.telvi.de/?wsdl', soapxml) + request.add_header('Origin', 'http://www.regio-tv.de') + request.add_header('Referer', url) + video_data = self._download_xml(request, video_id, 'video data') + + NS_MAP = { + 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', + 'soap': 'http://schemas.xmlsoap.org/soap/envelope/', + } + + url = video_data.find(xpath_with_ns('.//video', NS_MAP)).text + thumbnail = video_data.find(xpath_with_ns('.//image', NS_MAP)).text + + description = self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'title': title, + 'url': url, + 'thumbnail': thumbnail, + 'description': description, + } From 34a9da136f1a29889af01815b696672b9dd5b57d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 31 Dec 2015 22:12:47 +0600 Subject: [PATCH 0235/1105] [regiotv] Improve extraction (Closes #7915) --- youtube_dl/extractor/regiotv.py | 65 +++++++++++++++++---------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/regiotv.py b/youtube_dl/extractor/regiotv.py index a5c21c085..e250a52f0 100644 --- a/youtube_dl/extractor/regiotv.py +++ b/youtube_dl/extractor/regiotv.py @@ -1,61 +1,62 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( sanitized_Request, + xpath_text, xpath_with_ns, ) class RegioTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?regio-tv\.de/video/(?P[0-9]+).html' - _TESTS = [ - { - 'url': 'http://www.regio-tv.de/video/395808.html', - 'info_dict': { - 'id': '395808', - 'ext': 'mp4', - 'title': u'Wir in Ludwigsburg', - 'description': u'Mit unseren zuckers\xfc\xdfen Adventskindern, au\xdferdem besuchen wir die Abendsterne!', - } - }, - ] + _VALID_URL = r'https?://(?:www\.)?regio-tv\.de/video/(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://www.regio-tv.de/video/395808.html', + 'info_dict': { + 'id': '395808', + 'ext': 'mp4', + 'title': 'Wir in Ludwigsburg', + 'description': 'Mit unseren zuckersüßen Adventskindern, außerdem besuchen wir die Abendsterne!', + } + }, { + 'url': 'http://www.regio-tv.de/video/395808', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - key = self._html_search_regex(r''',key: "(.*?)"''', webpage, 'key') - title = self._html_search_regex( - r'', - webpage, 'title') + key = self._search_regex( + r'key\s*:\s*(["\'])(?P.+?)\1', webpage, 'key', group='key') + title = self._og_search_title(webpage) - soapxml = '%s' % key - request = sanitized_Request('http://v.telvi.de/?wsdl', soapxml) - request.add_header('Origin', 'http://www.regio-tv.de') - request.add_header('Referer', url) - video_data = self._download_xml(request, video_id, 'video data') + SOAP_TEMPLATE = '<{0} xmlns="http://v.telvi.de/">{1}' + + request = sanitized_Request( + 'http://v.telvi.de/', + SOAP_TEMPLATE.format('GetHTML5VideoData', key).encode('utf-8')) + video_data = self._download_xml(request, video_id, 'Downloading video XML') NS_MAP = { 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', 'soap': 'http://schemas.xmlsoap.org/soap/envelope/', } - url = video_data.find(xpath_with_ns('.//video', NS_MAP)).text - thumbnail = video_data.find(xpath_with_ns('.//image', NS_MAP)).text - - description = self._html_search_meta('description', webpage) + video_url = xpath_text( + video_data, xpath_with_ns('.//video', NS_MAP), 'video url', fatal=True) + thumbnail = xpath_text( + video_data, xpath_with_ns('.//image', NS_MAP), 'thumbnail') + description = self._og_search_description( + webpage) or self._html_search_meta('description', webpage) return { 'id': video_id, + 'url': video_url, 'title': title, - 'url': url, - 'thumbnail': thumbnail, 'description': description, + 'thumbnail': thumbnail, } From fec040e7549d06e91dae31da53bee855630d0adf Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 31 Dec 2015 17:29:37 +0100 Subject: [PATCH 0236/1105] [discovery] add support for discovery related sites - investigationdiscovery.com - discoverylife.com - animalplanet.com - ahctv.com - destinationamerica.com - sciencechannel.com - tlc.com - velocity.com --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/animalplanet.py | 53 ---------------------------- youtube_dl/extractor/discovery.py | 39 +++++++++++++++----- 3 files changed, 30 insertions(+), 63 deletions(-) delete mode 100644 youtube_dl/extractor/animalplanet.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 732229c14..bae151e58 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -19,7 +19,6 @@ from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE -from .animalplanet import AnimalPlanetIE from .anitube import AnitubeIE from .anysex import AnySexIE from .aol import AolIE diff --git a/youtube_dl/extractor/animalplanet.py b/youtube_dl/extractor/animalplanet.py deleted file mode 100644 index 0cebc8185..000000000 --- a/youtube_dl/extractor/animalplanet.py +++ /dev/null @@ -1,53 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - parse_duration, - parse_iso8601, -) - - -class AnimalPlanetIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/([^/]+/)*(?P[^/\?#]+)' - _TESTS = [{ - 'url': 'http://www.animalplanet.com/tv-shows/i-shouldnt-be-alive/videos/dog-saves-injured-owner/', - 'info_dict': { - 'id': '10608', - 'ext': 'mp4', - 'title': 'Dog Saves Injured Owner', - 'description': 'A world class athlete is put to the test when she falls into a canyon and breaks her hip. Her only companion is her dog, Taz, who is on a mission to save her!', - 'upload_date': '20100410', - 'timestamp': 1270857727, - 'duration': 220, - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - 'url': 'http://www.animalplanet.com/longfin-eels-maneaters/', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - video_data = self._parse_json(self._search_regex( - r'initialVideoData\s*=\s*({.+?});', - webpage, 'initialVideoData'), display_id)['playlist'][0] - - return { - 'id': compat_str(video_data['id']), - 'display_id': display_id, - 'title': video_data['title'], - 'description': video_data.get('description'), - 'thumbnail': video_data.get('thumbnailURL'), - 'duration': parse_duration(video_data.get('video_length')), - 'timestamp': parse_iso8601(video_data.get('publishedDate')), - 'formats': self._extract_m3u8_formats( - video_data['src'], display_id, 'mp4', - 'm3u8_native', m3u8_id='hls') - } diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 507f4bdad..2626d85e3 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -9,7 +9,17 @@ from ..compat import compat_str class DiscoveryIE(InfoExtractor): - _VALID_URL = r'https?://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P[a-zA-Z0-9_\-]*)(?:\.htm)?' + _VALID_URL = r'''(?x)http://(?:www\.)?(?: + discovery| + investigationdiscovery| + discoverylife| + animalplanet| + ahctv| + destinationamerica| + sciencechannel| + tlc| + velocity + )\.com/([^/]+/)*(?P[^\./\?#]+)''' _TESTS = [{ 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', 'info_dict': { @@ -21,8 +31,8 @@ class DiscoveryIE(InfoExtractor): 'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s' ' back.'), 'duration': 156, - 'timestamp': 1303099200, - 'upload_date': '20110418', + 'timestamp': 1302032462, + 'upload_date': '20110405', }, 'params': { 'skip_download': True, # requires ffmpeg @@ -33,27 +43,38 @@ class DiscoveryIE(InfoExtractor): 'id': 'mythbusters-the-simpsons', 'title': 'MythBusters: The Simpsons', }, - 'playlist_count': 9, + 'playlist_mincount': 10, + }, { + 'url': 'http://www.animalplanet.com/longfin-eels-maneaters/', + 'info_dict': { + 'id': '78326', + 'ext': 'mp4', + 'title': 'Longfin Eels: Maneaters?', + 'description': 'Jeremy Wade tests whether or not New Zealand\'s longfin eels are man-eaters by covering himself in fish guts and getting in the water with them.', + 'upload_date': '20140725', + 'timestamp': 1406246400, + 'duration': 116, + }, }] def _real_extract(self, url): - video_id = self._match_id(url) - info = self._download_json(url + '?flat=1', video_id) + display_id = self._match_id(url) + info = self._download_json(url + '?flat=1', display_id) video_title = info.get('playlist_title') or info.get('video_title') entries = [{ 'id': compat_str(video_info['id']), 'formats': self._extract_m3u8_formats( - video_info['src'], video_id, ext='mp4', + video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls', note='Download m3u8 information for video %d' % (idx + 1)), 'title': video_info['title'], 'description': video_info.get('description'), 'duration': parse_duration(video_info.get('video_length')), - 'webpage_url': video_info.get('href'), + 'webpage_url': video_info.get('href') or video_info.get('url'), 'thumbnail': video_info.get('thumbnailURL'), 'alt_title': video_info.get('secondary_title'), 'timestamp': parse_iso8601(video_info.get('publishedDate')), } for idx, video_info in enumerate(info['playlist'])] - return self.playlist_result(entries, video_id, video_title) + return self.playlist_result(entries, display_id, video_title) From b05641ce405ef281b395c26b094208c17141d027 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 31 Dec 2015 18:24:49 +0100 Subject: [PATCH 0237/1105] [discovery] improve _VALID_URL regex --- youtube_dl/extractor/discovery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 2626d85e3..ce680a9f3 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -19,7 +19,7 @@ class DiscoveryIE(InfoExtractor): sciencechannel| tlc| velocity - )\.com/([^/]+/)*(?P[^\./\?#]+)''' + )\.com/(?:[^/]+/)*(?P[^./?#]+)''' _TESTS = [{ 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', 'info_dict': { From f1e21efe63bc08abbc5cec0f8d9c3112c580bd48 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 31 Dec 2015 18:33:40 +0100 Subject: [PATCH 0238/1105] [tlc] remove TlcIE --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/tlc.py | 23 ----------------------- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bae151e58..f3f9cd978 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -695,7 +695,7 @@ from .thesixtyone import TheSixtyOneIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .tinypic import TinyPicIE -from .tlc import TlcIE, TlcDeIE +from .tlc import TlcDeIE from .tmz import ( TMZIE, TMZArticleIE, diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index d6d038a8d..adc05ed5f 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -4,32 +4,9 @@ import re from .common import InfoExtractor from .brightcove import BrightcoveLegacyIE -from .discovery import DiscoveryIE from ..compat import compat_urlparse -class TlcIE(DiscoveryIE): - IE_NAME = 'tlc.com' - _VALID_URL = r'http://www\.tlc\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P[a-zA-Z0-9\-]*)(.htm)?' - - # DiscoveryIE has _TESTS - _TESTS = [{ - 'url': 'http://www.tlc.com/tv-shows/cake-boss/videos/too-big-to-fly.htm', - 'info_dict': { - 'id': '104493', - 'ext': 'mp4', - 'title': 'Too Big to Fly', - 'description': 'Buddy has taken on a high flying task.', - 'duration': 119, - 'timestamp': 1393365060, - 'upload_date': '20140225', - }, - 'params': { - 'skip_download': True, # requires ffmpef - }, - }] - - class TlcDeIE(InfoExtractor): IE_NAME = 'tlc.de' _VALID_URL = r'http://www\.tlc\.de/sendungen/[^/]+/videos/(?P[^/?]+)' From 9accfed4e724d3048e894c2323b3a478951d670d Mon Sep 17 00:00:00 2001 From: j <j@mailb.org> Date: Sun, 20 Dec 2015 15:37:57 +0100 Subject: [PATCH 0239/1105] [pandoratv] Add new extractor (closes #6884) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/pandoratv.py | 56 +++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 youtube_dl/extractor/pandoratv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f3f9cd978..6aed59d16 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -497,6 +497,7 @@ from .orf import ( ORFFM4IE, ORFIPTVIE, ) +from .pandoratv import PandoraTVIE from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py new file mode 100644 index 000000000..a0a0c114a --- /dev/null +++ b/youtube_dl/extractor/pandoratv.py @@ -0,0 +1,56 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..compat import ( + compat_urlparse, +) +from ..utils import ( + ExtractorError, +) + + +class PandoraTVIE(InfoExtractor): + _VALID_URL = r'http://(?:.+?\.)?channel.pandora.tv/channel/video.ptv\?' + _TESTS = [{ + 'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2', + 'info_dict': { + 'description': '\u982d\u3092\u64ab\u3067\u3066\u304f\u308c\u308b\uff1f', + 'ext': 'mp4', + 'id': '53294230', + 'title': '\u982d\u3092\u64ab\u3067\u3066\u304f\u308c\u308b\uff1f', + 'upload_date': '20151218', + } + }] + + + def _real_extract(self, url): + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + video_id = qs.get('prgid', [None])[0] + user_id = qs.get('ch_userid', [None])[0] + if any(not f for f in (video_id, user_id,)): + raise ExtractorError('Invalid URL', expected=True) + + data_url ='http://m.pandora.tv/?c=view&m=viewJsonApi&ch_userid={userid}&prgid={prgid}'.format(userid=user_id,prgid=video_id) + data = self._download_json(data_url, video_id) + info = data['data']['rows']['vod_play_info']['result'] + + formats = [] + for format_id in sorted([k for k in info if k.startswith('v') and k.endswith('Url') and info[k]]): + formats.append({ + 'format_id': format_id, + 'url': info[format_id], + 'ext': 'mp4', + 'height': int(format_id[1:-3]), + }) + + return { + 'description': info['body'], + 'thumbnail': info['thumbnail'], + 'formats': formats, + 'id': video_id, + 'title': info['subject'], + 'upload_date': info['fid'][:8], + 'view_count': info['hit'], + } From e4bd63f9c061cbf5c57e7aba29d0f46bffbf05d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Jan 2016 00:40:27 +0600 Subject: [PATCH 0240/1105] [pandoratv] Improve extraction (Closes #7921) --- youtube_dl/extractor/pandoratv.py | 60 ++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py index a0a0c114a..34cede4c1 100644 --- a/youtube_dl/extractor/pandoratv.py +++ b/youtube_dl/extractor/pandoratv.py @@ -2,28 +2,36 @@ from __future__ import unicode_literals from .common import InfoExtractor - from ..compat import ( + compat_str, compat_urlparse, ) from ..utils import ( ExtractorError, + float_or_none, + parse_duration, + str_to_int, ) class PandoraTVIE(InfoExtractor): - _VALID_URL = r'http://(?:.+?\.)?channel.pandora.tv/channel/video.ptv\?' - _TESTS = [{ + _VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?' + _TEST = { 'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2', 'info_dict': { - 'description': '\u982d\u3092\u64ab\u3067\u3066\u304f\u308c\u308b\uff1f', - 'ext': 'mp4', 'id': '53294230', - 'title': '\u982d\u3092\u64ab\u3067\u3066\u304f\u308c\u308b\uff1f', + 'ext': 'flv', + 'title': '頭を撫でてくれる?', + 'description': '頭を撫でてくれる?', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 39, 'upload_date': '20151218', + 'uploader': 'カワイイ動物まとめ', + 'uploader_id': 'mikakim', + 'view_count': int, + 'like_count': int, } - }] - + } def _real_extract(self, url): qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) @@ -32,25 +40,37 @@ class PandoraTVIE(InfoExtractor): if any(not f for f in (video_id, user_id,)): raise ExtractorError('Invalid URL', expected=True) - data_url ='http://m.pandora.tv/?c=view&m=viewJsonApi&ch_userid={userid}&prgid={prgid}'.format(userid=user_id,prgid=video_id) - data = self._download_json(data_url, video_id) + data = self._download_json( + 'http://m.pandora.tv/?c=view&m=viewJsonApi&ch_userid=%s&prgid=%s' + % (user_id, video_id), video_id) + info = data['data']['rows']['vod_play_info']['result'] formats = [] - for format_id in sorted([k for k in info if k.startswith('v') and k.endswith('Url') and info[k]]): + for format_id, format_url in info.items(): + if not format_url: + continue + height = self._search_regex( + r'^v(\d+)[Uu]rl$', format_id, 'height', default=None) + if not height: + continue formats.append({ - 'format_id': format_id, - 'url': info[format_id], - 'ext': 'mp4', - 'height': int(format_id[1:-3]), + 'format_id': '%sp' % height, + 'url': format_url, + 'height': int(height), }) + self._sort_formats(formats) return { - 'description': info['body'], - 'thumbnail': info['thumbnail'], - 'formats': formats, 'id': video_id, 'title': info['subject'], - 'upload_date': info['fid'][:8], - 'view_count': info['hit'], + 'description': info.get('body'), + 'thumbnail': info.get('thumbnail') or info.get('poster'), + 'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')), + 'upload_date': info['fid'][:8] if isinstance(info.get('fid'), compat_str) else None, + 'uploader': info.get('nickname'), + 'uploader_id': info.get('upload_userid'), + 'view_count': str_to_int(info.get('hit')), + 'like_count': str_to_int(info.get('likecnt')), + 'formats': formats, } From 72528252e303a084c4b95ae07c7a7213e53cad8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Jan 2016 00:42:42 +0600 Subject: [PATCH 0241/1105] [pandoratv] Add IE names --- youtube_dl/extractor/pandoratv.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py index 34cede4c1..8d49f5c4a 100644 --- a/youtube_dl/extractor/pandoratv.py +++ b/youtube_dl/extractor/pandoratv.py @@ -15,6 +15,8 @@ from ..utils import ( class PandoraTVIE(InfoExtractor): + IE_NAME = 'pandora.tv' + IE_DESC = '판도라TV' _VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?' _TEST = { 'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2', From 9f9f7664b543f2691a9100920a8b5962d4608bf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 31 Dec 2015 19:52:48 +0100 Subject: [PATCH 0242/1105] [espn] Update test --- youtube_dl/extractor/espn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 7d758102c..24780b82d 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -11,7 +11,7 @@ class ESPNIE(InfoExtractor): 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', 'ext': 'mp4', 'title': 'dm_140128_30for30Shorts___JudgingJewellv2', - 'description': '', + 'description': None, }, 'params': { # m3u8 download From 930087f2f69d887d768f234c5dc4563c5ad86ad6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 31 Dec 2015 19:56:54 +0100 Subject: [PATCH 0243/1105] [espn] Support 'intl' videos (#7858) --- youtube_dl/extractor/espn.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 24780b82d..0a917f9f6 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -17,6 +17,18 @@ class ESPNIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + }, { + # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season + 'url': 'http://espn.go.com/video/clip?id=2743663', + 'info_dict': { + 'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', + 'ext': 'mp4', + 'title': 'int_151206_Must_See_Moments_Best_of_MLS_2015_season', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', 'only_matching': True, @@ -43,12 +55,16 @@ class ESPNIE(InfoExtractor): r'class="video-play-button"[^>]+data-id="(\d+)', webpage, 'video id') + cms = 'espn' + if 'data-source="intl"' in webpage: + cms = 'intl' + player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms) player = self._download_webpage( - 'https://espn.go.com/video/iframe/twitter/?id=%s' % video_id, video_id) + player_url, video_id) pcode = self._search_regex( r'["\']pcode=([^"\']+)["\']', player, 'pcode') return self.url_result( - 'ooyalaexternal:espn:%s:%s' % (video_id, pcode), + 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode), 'OoyalaExternal') From 69f85952568def9ce13f71f3e8bac65f0c5f5f36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 31 Dec 2015 20:06:21 +0100 Subject: [PATCH 0244/1105] [espn] Extract better titles --- youtube_dl/extractor/espn.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 0a917f9f6..3762d8748 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import remove_end class ESPNIE(InfoExtractor): @@ -10,7 +11,7 @@ class ESPNIE(InfoExtractor): 'info_dict': { 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', 'ext': 'mp4', - 'title': 'dm_140128_30for30Shorts___JudgingJewellv2', + 'title': '30 for 30 Shorts: Judging Jewell', 'description': None, }, 'params': { @@ -23,7 +24,7 @@ class ESPNIE(InfoExtractor): 'info_dict': { 'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', 'ext': 'mp4', - 'title': 'int_151206_Must_See_Moments_Best_of_MLS_2015_season', + 'title': 'Must-See Moments: Best of the MLS season', }, 'params': { # m3u8 download @@ -65,6 +66,13 @@ class ESPNIE(InfoExtractor): pcode = self._search_regex( r'["\']pcode=([^"\']+)["\']', player, 'pcode') - return self.url_result( - 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode), - 'OoyalaExternal') + title = remove_end( + self._og_search_title(webpage), + '- ESPN Video').strip() + + return { + '_type': 'url_transparent', + 'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode), + 'ie_key': 'OoyalaExternal', + 'title': title, + } From 91e274546c4492dfa8f216f311356f641859a7cc Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Dec 2015 20:23:48 +0100 Subject: [PATCH 0245/1105] [tvland] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/tvland.py | 70 ++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 youtube_dl/extractor/tvland.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 6aed59d16..e20d90bac 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -739,6 +739,7 @@ from .tvc import ( TVCArticleIE, ) from .tvigle import TvigleIE +from .tvland import TVLandIE from .tvp import TvpIE, TvpSeriesIE from .tvplay import TVPlayIE from .tweakers import TweakersIE diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py new file mode 100644 index 000000000..7f049e076 --- /dev/null +++ b/youtube_dl/extractor/tvland.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .mtv import MTVIE + + +class TVLandIE(MTVIE): + IE_NAME = 'tvland.com' + _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)' + _FEED_URL = 'http://www.tvland.com/feeds/mrss/?uri=' + _TESTS = [{ + 'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048', + 'playlist': [ + { + 'md5': '227e9723b9669c05bf51098b10287aa7', + 'info_dict': { + 'id': 'bcbd3a83-3aca-4dca-809b-f78a87dcccdd', + 'ext': 'mp4', + 'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 1 of 5', + } + }, + { + 'md5': '9fa2b764ec0e8194fb3ebb01a83df88b', + 'info_dict': { + 'id': 'f4279548-6e13-40dd-92e8-860d27289197', + 'ext': 'mp4', + 'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 2 of 5', + } + }, + { + 'md5': 'fde4c3bccd7cc7e3576b338734153cec', + 'info_dict': { + 'id': '664e4a38-53ef-4115-9bc9-d0f789ec6334', + 'ext': 'mp4', + 'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 3 of 5', + } + }, + { + 'md5': '247f6780cda6891f2e49b8ae2b10e017', + 'info_dict': { + 'id': '9146ecf5-b15a-4d78-879c-6679b77f4960', + 'ext': 'mp4', + 'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 4 of 5', + } + }, + { + 'md5': 'fd269f33256e47bad5eb6c40de089ff6', + 'info_dict': { + 'id': '04334a2e-9a47-4214-a8c2-ae5792e2fab7', + 'ext': 'mp4', + 'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 5 of 5', + } + } + ], + }, { + 'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies', + 'md5': 'e2c6389401cf485df26c79c247b08713', + 'info_dict': { + 'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88', + 'ext': 'mp4', + 'title': 'Younger|Younger: Hilary Duff - Little Lies', + 'description': 'md5:7d192f56ca8d958645c83f0de8ef0269' + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mgid = self._search_regex(r'data-mgid="([^"]+)"', webpage, 'mgid') + return self._get_videos_info_from_url(self._FEED_URL + mgid, video_id) From 7f9134fb2db5559124a0f658bacb6bad54261043 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Dec 2015 20:52:47 +0100 Subject: [PATCH 0246/1105] [tvland] inherit from MTVServicesInfoExtractor --- youtube_dl/extractor/tvland.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py index 7f049e076..b73279dec 100644 --- a/youtube_dl/extractor/tvland.py +++ b/youtube_dl/extractor/tvland.py @@ -1,13 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals -from .mtv import MTVIE +from .mtv import MTVServicesInfoExtractor -class TVLandIE(MTVIE): +class TVLandIE(MTVServicesInfoExtractor): IE_NAME = 'tvland.com' _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)' - _FEED_URL = 'http://www.tvland.com/feeds/mrss/?uri=' + _FEED_URL = 'http://www.tvland.com/feeds/mrss/' _TESTS = [{ 'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048', 'playlist': [ @@ -62,9 +62,3 @@ class TVLandIE(MTVIE): 'description': 'md5:7d192f56ca8d958645c83f0de8ef0269' }, }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - mgid = self._search_regex(r'data-mgid="([^"]+)"', webpage, 'mgid') - return self._get_videos_info_from_url(self._FEED_URL + mgid, video_id) From 0416006a3051b15e4bebbb096960ca4fb8ffd0a9 Mon Sep 17 00:00:00 2001 From: j <j@mailb.org> Date: Tue, 15 Dec 2015 21:37:47 +0100 Subject: [PATCH 0247/1105] Fix einthusan parser --- youtube_dl/extractor/einthusan.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 5dfea0d39..bc6def65e 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -41,9 +41,12 @@ class EinthusanIE(InfoExtractor): video_title = self._html_search_regex( r'<h1><a class="movie-title".*?>(.*?)</a></h1>', webpage, 'title') - video_url = self._html_search_regex( - r'''(?s)jwplayer\("mediaplayer"\)\.setup\({.*?'file': '([^']+)'.*?}\);''', - webpage, 'video url') + movieid = self._html_search_regex( + r'data-movieid="(.*?)"', webpage, 'movieid') + + location = 'Washington' + geturl = 'http://cdn.einthusan.com/geturl/%s/hd/%s' % (movieid, location) + video_url = self._download_webpage(geturl, video_id) description = self._html_search_meta('description', webpage) thumbnail = self._html_search_regex( From b26afec81f408af37cca6298109ca2a59688bf79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Jan 2016 02:23:03 +0600 Subject: [PATCH 0248/1105] [einthusan] Improve extraction (Closes #7877) --- youtube_dl/extractor/einthusan.py | 34 ++++++++++++++++++------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index bc6def65e..f7339702c 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -1,9 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + remove_start, + sanitized_Request, +) class EinthusanIE(InfoExtractor): @@ -34,30 +37,33 @@ class EinthusanIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) + video_id = self._match_id(url) - video_title = self._html_search_regex( - r'<h1><a class="movie-title".*?>(.*?)</a></h1>', webpage, 'title') + request = sanitized_Request(url) + request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0') + webpage = self._download_webpage(request, video_id) - movieid = self._html_search_regex( - r'data-movieid="(.*?)"', webpage, 'movieid') + title = self._html_search_regex( + r'<h1><a[^>]+class=["\']movie-title["\'][^>]*>(.+?)</a></h1>', + webpage, 'title') - location = 'Washington' - geturl = 'http://cdn.einthusan.com/geturl/%s/hd/%s' % (movieid, location) - video_url = self._download_webpage(geturl, video_id) + video_id = self._search_regex( + r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id) + + video_url = self._download_webpage( + 'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/' + % video_id, video_id) description = self._html_search_meta('description', webpage) thumbnail = self._html_search_regex( r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''', webpage, "thumbnail url", fatal=False) if thumbnail is not None: - thumbnail = thumbnail.replace('..', 'http://www.einthusan.com') + thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..')) return { 'id': video_id, - 'title': video_title, + 'title': title, 'url': video_url, 'thumbnail': thumbnail, 'description': description, From a0d7ede350f13096f94a94f982925ba4ffac91e3 Mon Sep 17 00:00:00 2001 From: j <j@mailb.org> Date: Tue, 15 Dec 2015 21:37:47 +0100 Subject: [PATCH 0249/1105] Fix einthusan parser --- youtube_dl/extractor/einthusan.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 5dfea0d39..bc6def65e 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -41,9 +41,12 @@ class EinthusanIE(InfoExtractor): video_title = self._html_search_regex( r'<h1><a class="movie-title".*?>(.*?)</a></h1>', webpage, 'title') - video_url = self._html_search_regex( - r'''(?s)jwplayer\("mediaplayer"\)\.setup\({.*?'file': '([^']+)'.*?}\);''', - webpage, 'video url') + movieid = self._html_search_regex( + r'data-movieid="(.*?)"', webpage, 'movieid') + + location = 'Washington' + geturl = 'http://cdn.einthusan.com/geturl/%s/hd/%s' % (movieid, location) + video_url = self._download_webpage(geturl, video_id) description = self._html_search_meta('description', webpage) thumbnail = self._html_search_regex( From fec09bf15d632bc1694b32981d1468011e9d7da6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Jan 2016 02:39:00 +0600 Subject: [PATCH 0250/1105] [einthusan] Improve extraction (Closes #7877) --- youtube_dl/extractor/einthusan.py | 34 ++++++++++++++++++------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index bc6def65e..f7339702c 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -1,9 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + remove_start, + sanitized_Request, +) class EinthusanIE(InfoExtractor): @@ -34,30 +37,33 @@ class EinthusanIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) + video_id = self._match_id(url) - video_title = self._html_search_regex( - r'<h1><a class="movie-title".*?>(.*?)</a></h1>', webpage, 'title') + request = sanitized_Request(url) + request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0') + webpage = self._download_webpage(request, video_id) - movieid = self._html_search_regex( - r'data-movieid="(.*?)"', webpage, 'movieid') + title = self._html_search_regex( + r'<h1><a[^>]+class=["\']movie-title["\'][^>]*>(.+?)</a></h1>', + webpage, 'title') - location = 'Washington' - geturl = 'http://cdn.einthusan.com/geturl/%s/hd/%s' % (movieid, location) - video_url = self._download_webpage(geturl, video_id) + video_id = self._search_regex( + r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id) + + video_url = self._download_webpage( + 'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/' + % video_id, video_id) description = self._html_search_meta('description', webpage) thumbnail = self._html_search_regex( r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''', webpage, "thumbnail url", fatal=False) if thumbnail is not None: - thumbnail = thumbnail.replace('..', 'http://www.einthusan.com') + thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..')) return { 'id': video_id, - 'title': video_title, + 'title': title, 'url': video_url, 'thumbnail': thumbnail, 'description': description, From c1e90619bde0c8b3b17d938195cfeb5ed7803125 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Dec 2015 22:10:00 +0100 Subject: [PATCH 0251/1105] [mtv] extract mgid extraction and query building into separate methods --- youtube_dl/extractor/mtv.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index d887583e6..e8bb527b8 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -167,14 +167,16 @@ class MTVServicesInfoExtractor(InfoExtractor): 'description': description, } + def _get_feed_query(self, uri): + data = {'uri': uri} + if self._LANG: + data['lang'] = self._LANG + return compat_urllib_parse.urlencode(data) + def _get_videos_info(self, uri): video_id = self._id_from_uri(uri) feed_url = self._get_feed_url(uri) - data = compat_urllib_parse.urlencode({'uri': uri}) - info_url = feed_url + '?' - if self._LANG: - info_url += 'lang=%s&' % self._LANG - info_url += data + info_url = feed_url + '?' + self._get_feed_query(uri) return self._get_videos_info_from_url(info_url, video_id) def _get_videos_info_from_url(self, url, video_id): @@ -184,9 +186,7 @@ class MTVServicesInfoExtractor(InfoExtractor): return self.playlist_result( [self._get_video_info(item) for item in idoc.findall('.//item')]) - def _real_extract(self, url): - title = url_basename(url) - webpage = self._download_webpage(url, title) + def _extract_mgid(self, webpage): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -207,7 +207,12 @@ class MTVServicesInfoExtractor(InfoExtractor): 'sm4:video:embed', webpage, 'sm4 embed', default='') mgid = self._search_regex( r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid') + return mgid + def _real_extract(self, url): + title = url_basename(url) + webpage = self._download_webpage(url, title) + mgid = self._extract_mgid(webpage) videos_info = self._get_videos_info(mgid) return videos_info From a0e5beb0fbfa5f300087c18b12ff67fade51d578 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Dec 2015 22:12:05 +0100 Subject: [PATCH 0252/1105] [nick] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nick.py | 63 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 youtube_dl/extractor/nick.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e20d90bac..120ed9340 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -441,6 +441,7 @@ from .nhl import ( NHLNewsIE, NHLVideocenterIE, ) +from .nick import NickIE from .niconico import NiconicoIE, NiconicoPlaylistIE from .ninegag import NineGagIE from .noco import NocoIE diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py new file mode 100644 index 000000000..4840368ac --- /dev/null +++ b/youtube_dl/extractor/nick.py @@ -0,0 +1,63 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .mtv import MTVServicesInfoExtractor +from ..compat import compat_urllib_parse + + +class NickIE(MTVServicesInfoExtractor): + IE_NAME = 'nick.com' + _VALID_URL = r'https?://(?:www\.)?nick\.com/videos/clip/(?P<id>[^/?#.]+)' + _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm' + _TESTS = [{ + 'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html', + 'playlist': [ + { + 'md5': '6e5adc1e28253bbb1b28ab05403dd4d4', + 'info_dict': { + 'id': 'be6a17b0-412d-11e5-8ff7-0026b9414f30', + 'ext': 'mp4', + 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S1', + 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.', + + } + }, + { + 'md5': 'd7be441fc53a1d4882fa9508a1e5b3ce', + 'info_dict': { + 'id': 'be6b8f96-412d-11e5-8ff7-0026b9414f30', + 'ext': 'mp4', + 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S2', + 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.', + + } + }, + { + 'md5': 'efffe1728a234b2b0d2f2b343dd1946f', + 'info_dict': { + 'id': 'be6cf7e6-412d-11e5-8ff7-0026b9414f30', + 'ext': 'mp4', + 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S3', + 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.', + } + }, + { + 'md5': '1ec6690733ab9f41709e274a1d5c7556', + 'info_dict': { + 'id': 'be6e3354-412d-11e5-8ff7-0026b9414f30', + 'ext': 'mp4', + 'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S4', + 'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.', + } + }, + ], + }] + + def _get_feed_query(self, uri): + return compat_urllib_parse.urlencode({ + 'feed': 'nick_arc_player_prime', + 'mgid': uri + }) + + def _extract_mgid(self, webpage): + return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid') From e565cf6048739fdfdfbab19e4b35c37a53865807 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Thu, 31 Dec 2015 22:47:18 +0100 Subject: [PATCH 0253/1105] [nextmovie] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nextmovie.py | 30 ++++++++++++++++++++++++++++++ youtube_dl/extractor/nick.py | 2 +- 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/nextmovie.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 120ed9340..b3f7059e4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -434,6 +434,7 @@ from .nextmedia import ( NextMediaActionNewsIE, AppleDailyIE, ) +from .nextmovie import NextMovieIE from .nfb import NFBIE from .nfl import NFLIE from .nhl import ( diff --git a/youtube_dl/extractor/nextmovie.py b/youtube_dl/extractor/nextmovie.py new file mode 100644 index 000000000..657ae77a0 --- /dev/null +++ b/youtube_dl/extractor/nextmovie.py @@ -0,0 +1,30 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .mtv import MTVServicesInfoExtractor +from ..compat import compat_urllib_parse + + +class NextMovieIE(MTVServicesInfoExtractor): + IE_NAME = 'nextmovie.com' + _VALID_URL = r'https?://(?:www\.)?nextmovie\.com/shows/[^/]+/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)' + _FEED_URL = 'http://lite.dextr.mtvi.com/service1/dispatch.htm' + _TESTS = [{ + 'url': 'http://www.nextmovie.com/shows/exclusives/2013-03-10/mgid:uma:videolist:nextmovie.com:1715019/', + 'md5': '09a9199f2f11f10107d04fcb153218aa', + 'info_dict': { + 'id': '961726', + 'ext': 'mp4', + 'title': 'The Muppets\' Gravity', + }, + }] + + def _get_feed_query(self, uri): + return compat_urllib_parse.urlencode({ + 'feed': '1505', + 'mgid': uri, + }) + + def _real_extract(self, url): + mgid = self._match_id(url) + return self._get_videos_info(mgid) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py index 4840368ac..b62819ae5 100644 --- a/youtube_dl/extractor/nick.py +++ b/youtube_dl/extractor/nick.py @@ -56,7 +56,7 @@ class NickIE(MTVServicesInfoExtractor): def _get_feed_query(self, uri): return compat_urllib_parse.urlencode({ 'feed': 'nick_arc_player_prime', - 'mgid': uri + 'mgid': uri, }) def _extract_mgid(self, webpage): From 034caf70b24da1d1de12b00e9ac5620fb7664220 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 31 Dec 2015 13:05:46 +0800 Subject: [PATCH 0254/1105] [youku] Fix extraction (#8068) --- youtube_dl/extractor/youku.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 3a3432be8..f767fa15f 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -2,6 +2,9 @@ from __future__ import unicode_literals import base64 +import random +import string +import time from .common import InfoExtractor from ..compat import ( @@ -141,6 +144,11 @@ class YoukuIE(InfoExtractor): return video_urls_dict + @staticmethod + def get_ysuid(): + return '%d%s' % (int(time.time()), ''.join([ + random.choice(string.ascii_letters) for i in range(3)])) + def get_hd(self, fm): hd_id_dict = { '3gp': '0', @@ -189,6 +197,8 @@ class YoukuIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + self._set_cookie('youku.com', '__ysuid', self.get_ysuid()) + def retrieve_data(req_url, note): headers = { 'Referer': req_url, From 0d5095fc65d92251f7aeefd9cb504434590cee8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Jan 2016 15:14:41 +0600 Subject: [PATCH 0255/1105] [ccc] Update _VALID_URL (Closes #8097) --- youtube_dl/extractor/ccc.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py index 6924eac70..2e0a0b29f 100644 --- a/youtube_dl/extractor/ccc.py +++ b/youtube_dl/extractor/ccc.py @@ -12,13 +12,13 @@ from ..utils import ( class CCCIE(InfoExtractor): IE_NAME = 'media.ccc.de' - _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html' + _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/v/(?P<id>[^/?#&]+)' - _TEST = { - 'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video', + _TESTS = [{ + 'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video', 'md5': '3a1eda8f3a29515d27f5adb967d7e740', 'info_dict': { - 'id': '20131228183', + 'id': '30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor', 'ext': 'mp4', 'title': 'Introduction to Processor Design', 'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b', @@ -26,7 +26,10 @@ class CCCIE(InfoExtractor): 'view_count': int, 'upload_date': '20131229', } - } + }, { + 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) From c9154514c495bd2259df2250d4cf72a2564a9136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Jan 2016 15:22:22 +0600 Subject: [PATCH 0256/1105] [ccc] Fix upload date extraction --- youtube_dl/extractor/ccc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py index 2e0a0b29f..5b549b343 100644 --- a/youtube_dl/extractor/ccc.py +++ b/youtube_dl/extractor/ccc.py @@ -46,7 +46,7 @@ class CCCIE(InfoExtractor): r"(?s)<p class='description'>(.*?)</p>", webpage, 'description', fatal=False) upload_date = unified_strdate(self._html_search_regex( - r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>", + r"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>", webpage, 'upload date', fatal=False)) view_count = int_or_none(self._html_search_regex( r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>", From 8499d211583f3534c0d0e0f086d471c52ac6c803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Jan 2016 15:29:42 +0600 Subject: [PATCH 0257/1105] [ccc] Fix description extraction and update test --- youtube_dl/extractor/ccc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py index 5b549b343..45115c838 100644 --- a/youtube_dl/extractor/ccc.py +++ b/youtube_dl/extractor/ccc.py @@ -21,10 +21,10 @@ class CCCIE(InfoExtractor): 'id': '30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor', 'ext': 'mp4', 'title': 'Introduction to Processor Design', - 'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b', + 'description': 'md5:80be298773966f66d56cb11260b879af', 'thumbnail': 're:^https?://.*\.jpg$', 'view_count': int, - 'upload_date': '20131229', + 'upload_date': '20131228', } }, { 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', @@ -43,7 +43,7 @@ class CCCIE(InfoExtractor): title = self._html_search_regex( r'(?s)<h1>(.*?)</h1>', webpage, 'title') description = self._html_search_regex( - r"(?s)<p class='description'>(.*?)</p>", + r"(?s)<h3>About</h3>(.+?)<h3>", webpage, 'description', fatal=False) upload_date = unified_strdate(self._html_search_regex( r"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>", From 82597f0ec0b8ec208e77d27f62904de7804b914d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Jan 2016 15:41:52 +0600 Subject: [PATCH 0258/1105] [ccc] Extract duration --- youtube_dl/extractor/ccc.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py index 45115c838..710e5919c 100644 --- a/youtube_dl/extractor/ccc.py +++ b/youtube_dl/extractor/ccc.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( int_or_none, + parse_duration, qualities, unified_strdate, ) @@ -25,6 +26,7 @@ class CCCIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', 'view_count': int, 'upload_date': '20131228', + 'duration': 3660, } }, { 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', @@ -51,6 +53,9 @@ class CCCIE(InfoExtractor): view_count = int_or_none(self._html_search_regex( r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>", webpage, 'view count', fatal=False)) + duration = parse_duration(self._html_search_regex( + r'(?s)<span[^>]+class=(["\']).*?fa-clock-o.*?\1[^>]*></span>(?P<duration>.+?)</li', + webpage, 'duration', fatal=False, group='duration')) matches = re.finditer(r'''(?xs) <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s* @@ -98,5 +103,6 @@ class CCCIE(InfoExtractor): 'thumbnail': thumbnail, 'view_count': view_count, 'upload_date': upload_date, + 'duration': duration, 'formats': formats, } From 190ef0798103908d986e07f54323299084726d42 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 1 Jan 2016 12:17:10 +0100 Subject: [PATCH 0259/1105] release 2016.01.01 --- docs/supportedsites.md | 7 +++++-- youtube_dl/version.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2297edc6b..84c166805 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -28,7 +28,6 @@ - **AlJazeera** - **Allocine** - **AlphaPorno** - - **AnimalPlanet** - **anitube.se** - **AnySex** - **Aparat** @@ -368,11 +367,13 @@ - **Newstube** - **NextMedia**: 蘋果日報 - **NextMediaActionNews**: 蘋果日報 - 動新聞 + - **nextmovie.com** - **nfb**: National Film Board of Canada - **nfl.com** - **nhl.com** - **nhl.com:news**: NHL news - **nhl.com:videocenter**: NHL videocenter category + - **nick.com** - **niconico**: ニコニコ動画 - **NiconicoPlaylist** - **njoy**: N-JOY @@ -411,6 +412,7 @@ - **orf:iptv**: iptv.ORF.at - **orf:oe1**: Radio Österreich 1 - **orf:tvthek**: ORF TVthek + - **pandora.tv**: 판도라TV - **parliamentlive.tv**: UK parliament videos - **Patreon** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) @@ -459,6 +461,7 @@ - **RBMARadio** - **RDS**: RDS.ca - **RedTube** + - **RegioTV** - **Restudy** - **ReverbNation** - **RingTV** @@ -582,7 +585,6 @@ - **THVideo** - **THVideoPlaylist** - **tinypic**: tinypic.com videos - - **tlc.com** - **tlc.de** - **TMZ** - **TMZArticle** @@ -611,6 +613,7 @@ - **TVC** - **TVCArticle** - **tvigle**: Интернет-телевидение Tvigle.ru + - **tvland.com** - **tvp.pl** - **tvp.pl:Series** - **TVPlay**: TV3Play and related services diff --git a/youtube_dl/version.py b/youtube_dl/version.py index cf6c4d74d..790bd5b3b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.12.31' +__version__ = '2016.01.01' From 32f9036447d1211f9ce0750203d71671f0ee99dc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 1 Jan 2016 13:28:45 +0100 Subject: [PATCH 0260/1105] [ccc] Add language information to formats --- youtube_dl/YoutubeDL.py | 4 ++++ youtube_dl/extractor/ccc.py | 8 ++++++-- youtube_dl/extractor/common.py | 5 +++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 50425b8d7..3b2be3159 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1791,6 +1791,10 @@ class YoutubeDL(object): res = '' if fdict.get('ext') in ['f4f', 'f4m']: res += '(unsupported) ' + if fdict.get('language'): + if res: + res += ' ' + res += '[%s]' % fdict['language'] if fdict.get('format_note') is not None: res += fdict['format_note'] + ' ' if fdict.get('tbr') is not None: diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py index 710e5919c..e94b1e35b 100644 --- a/youtube_dl/extractor/ccc.py +++ b/youtube_dl/extractor/ccc.py @@ -58,11 +58,12 @@ class CCCIE(InfoExtractor): webpage, 'duration', fatal=False, group='duration')) matches = re.finditer(r'''(?xs) - <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s* + <(?:span|div)\s+class='label\s+filetype'>(?P<format>[^<]*)</(?:span|div)>\s* + <(?:span|div)\s+class='label\s+filetype'>(?P<lang>[^<]*)</(?:span|div)>\s* <a\s+download\s+href='(?P<http_url>[^']+)'>\s* (?: .*? - <a\s+href='(?P<torrent_url>[^']+\.torrent)' + <a\s+(?:download\s+)?href='(?P<torrent_url>[^']+\.torrent)' )?''', webpage) formats = [] for m in matches: @@ -70,12 +71,15 @@ class CCCIE(InfoExtractor): format_id = self._search_regex( r'.*/([a-z0-9_-]+)/[^/]*$', m.group('http_url'), 'format id', default=None) + if format_id: + format_id = m.group('lang') + '-' + format_id vcodec = 'h264' if 'h264' in format_id else ( 'none' if format_id in ('mp3', 'opus') else None ) formats.append({ 'format_id': format_id, 'format': format, + 'language': m.group('lang'), 'url': m.group('http_url'), 'vcodec': vcodec, 'preference': preference(format_id), diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 655207447..2823b1d18 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -108,8 +108,9 @@ class InfoExtractor(object): -2 or smaller for less than default. < -1000 to hide the format (if there is another one which is strictly better) - * language_preference Is this in the correct requested - language? + * language Language code, e.g. "de" or "en-US". + * language_preference Is this in the language mentioned in + the URL? 10 if it's what the URL is about, -1 for default (don't know), -10 otherwise, other values reserved for now. From ca227c8698340ad9170698cef81ab4bf4d832a55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 1 Jan 2016 14:32:00 +0100 Subject: [PATCH 0261/1105] [yahoo] Support pages that use an alias (fixes #8084) --- youtube_dl/extractor/yahoo.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index fca5ddc69..4a492f784 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -155,7 +155,16 @@ class YahooIE(InfoExtractor): 'description': 'md5:8fc39608213295748e1e289807838c97', 'duration': 1646, }, - } + }, { + # it uses an alias to get the video_id + 'url': 'https://www.yahoo.com/movies/the-stars-of-daddys-home-have-very-different-212843197.html', + 'info_dict': { + 'id': '40eda9c8-8e5f-3552-8745-830f67d0c737', + 'ext': 'mp4', + 'title': 'Will Ferrell & Mark Wahlberg Are Pro-Spanking', + 'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.', + }, + }, ] def _real_extract(self, url): @@ -199,13 +208,22 @@ class YahooIE(InfoExtractor): r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, default=None) if items_json is None: - CONTENT_ID_REGEXES = [ - r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', - r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', - r'"first_videoid"\s*:\s*"([^"]+)"', - r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), - ] - video_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID') + alias = self._search_regex( + r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None) + if alias is not None: + alias_info = self._download_json( + 'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias, + display_id, 'Downloading alias info') + video_id = alias_info[0]['id'] + else: + CONTENT_ID_REGEXES = [ + r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', + r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', + r'"first_videoid"\s*:\s*"([^"]+)"', + r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), + ] + video_id = self._search_regex( + CONTENT_ID_REGEXES, webpage, 'content ID') else: items = json.loads(items_json) info = items['mediaItems']['query']['results']['mediaObj'][0] From 27bfd4e5266623d9000ba17e6bb2a72eefb0e22d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Jan 2016 20:26:56 +0600 Subject: [PATCH 0262/1105] [extractor/common] Introduce number fields for chapters and series --- youtube_dl/extractor/common.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c443daf20..c63157619 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -204,21 +204,21 @@ class InfoExtractor(object): chapter or section: chapter: Name or title of the chapter the video belongs to. - chapter_id: Number or id of the chapter the video belongs to, as an integer - or unicode string. + chapter_number: Number of the chapter the video belongs to, as an integer. + chapter_id: Id of the chapter the video belongs to, as a unicode string. The following fields should only be used when the video is an episode of some series or programme: series: Title of the series or programme the video episode belongs to. season: Title of the season the video episode belongs to. - season_id: Number or id of the season the video episode belongs to, as an - integer or unicode string. + season_number: Number of the season the video episode belongs to, as an integer. + season_id: Id of the season the video episode belongs to, as a unicode string. episode: Title of the video episode. Unlike mandatory video title field, this field should denote the exact title of the video episode without any kind of decoration. - episode_id: Number or id of the video episode within a season, as an integer - or unicode string. + episode_number: Number of the video episode within a season, as an integer. + episode_id: Id of the video episode, as a unicode string. Unless mentioned otherwise, the fields should be Unicode strings. From 306c51c66922abb1846640b81c9b7c4d6a570d37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Jan 2016 20:30:08 +0600 Subject: [PATCH 0263/1105] [videomore] Use number fields for series --- youtube_dl/extractor/videomore.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index 3bd96e445..a66d6de23 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -25,9 +25,9 @@ class VideomoreIE(InfoExtractor): 'description': 'В гостях – лучшие романтические комедии года, «Выживший» Иньярриту и «Стив Джобс» Дэнни Бойла.', 'series': 'Кино в деталях', 'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук', - 'episode_id': None, + 'episode_number': None, 'season': 'Сезон 2015', - 'season_id': 5, + 'season_number': 5, 'thumbnail': 're:^https?://.*\.jpg', 'duration': 2910, 'age_limit': 16, @@ -42,9 +42,9 @@ class VideomoreIE(InfoExtractor): 'description': '«Медведей» ждет решающий матч. Макеев выясняет отношения со Стрельцовым. Парни узнают подробности прошлого Макеева.', 'series': 'Молодежка', 'episode': '80 серия', - 'episode_id': 40, + 'episode_number': 40, 'season': '2 сезон', - 'season_id': 2, + 'season_number': 2, 'thumbnail': 're:^https?://.*\.jpg', 'duration': 2809, 'age_limit': 16, @@ -62,9 +62,9 @@ class VideomoreIE(InfoExtractor): 'description': 'Молодежка 3 сезон скоро', 'series': 'Молодежка', 'episode': 'Команда проиграла из-за Бакина?', - 'episode_id': None, + 'episode_number': None, 'season': 'Промо', - 'season_id': 99, + 'season_number': 99, 'thumbnail': 're:^https?://.*\.jpg', 'duration': 29, 'age_limit': 16, @@ -128,9 +128,9 @@ class VideomoreIE(InfoExtractor): series = data.get('project_title') episode = data.get('title') - episode_id = data.get('episode_of_season') or None + episode_number = int_or_none(data.get('episode_of_season') or None) season = data.get('season_title') - season_id = data.get('season_pos') or None + season_number = int_or_none(data.get('season_pos') or None) return { 'id': video_id, @@ -138,9 +138,9 @@ class VideomoreIE(InfoExtractor): 'description': description, 'series': series, 'episode': episode, - 'episode_id': episode_id, + 'episode_number': episode_number, 'season': season, - 'season_id': season_id, + 'season_number': season_number, 'thumbnails': thumbnails, 'timestamp': timestamp, 'duration': duration, From 5bafcf65255261c2dc6482de6b1e545e5de1cbee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 1 Jan 2016 20:34:29 +0600 Subject: [PATCH 0264/1105] [udemy] Use chapter_number --- youtube_dl/extractor/udemy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 41097829d..1df636779 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -245,7 +245,7 @@ class UdemyCourseIE(UdemyIE): course_id, 'Downloading course curriculum') entries = [] - chapter, chapter_id = None, None + chapter, chapter_number = None, None for asset in response: asset_type = asset.get('assetType') or asset.get('asset_type') if asset_type == 'Video': @@ -256,13 +256,13 @@ class UdemyCourseIE(UdemyIE): 'url': 'https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'ie_key': UdemyIE.ie_key(), } - if chapter_id: - entry['chapter_id'] = chapter_id + if chapter_number: + entry['chapter_number'] = chapter_number if chapter: entry['chapter'] = chapter entries.append(entry) elif asset.get('type') == 'chapter': - chapter_id = asset.get('index') or asset.get('object_index') + chapter_number = asset.get('index') or asset.get('object_index') chapter = asset.get('title') return self.playlist_result(entries, course_id, course_title) From 054479754c661f40140f5db7f58916a0006c6b10 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 1 Jan 2016 21:03:16 +0100 Subject: [PATCH 0265/1105] [revision3] Add new extractor(closes #6388) - revision3.com - testtube.com - animalist.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/revision3.py | 103 ++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 youtube_dl/extractor/revision3.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b3f7059e4..7adce5499 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -557,6 +557,7 @@ from .redtube import RedTubeIE from .regiotv import RegioTVIE from .restudy import RestudyIE from .reverbnation import ReverbNationIE +from .revision3 import Revision3IE from .ringtv import RingTVIE from .ro220 import Ro220IE from .rottentomatoes import RottenTomatoesIE diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py new file mode 100644 index 000000000..25fe4ef16 --- /dev/null +++ b/youtube_dl/extractor/revision3.py @@ -0,0 +1,103 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + parse_iso8601, + unescapeHTML, +) + + +class Revision3IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|testtube|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)' + _TESTS = [{ + 'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016', + 'md5': 'd94a72d85d0a829766de4deb8daaf7df', + 'info_dict': { + 'id': '73034', + 'ext': 'webm', + 'title': '5 Google Predictions for 2016', + 'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.', + 'upload_date': '20151228', + 'timestamp': 1451325600, + 'duration': 187, + } + }, { + 'url': 'http://testtube.com/brainstuff', + 'info_dict': { + 'id': '251', + 'title': 'BrainStuff', + 'description': 'Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff.', + }, + 'playlist_mincount': 93, + }] + _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s' + _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62' + + def _real_extract(self, url): + domain, display_id = re.match(self._VALID_URL, url).groups() + page_info = self._download_json( + self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id) + + if page_info['data']['type'] == 'episode': + episode_data = page_info['data'] + video_id = compat_str(episode_data['video']['data']['id']) + video_data = self._download_json( + 'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id), + video_id)['items'][0] + + formats = [] + for media_type, media in video_data['media'].items(): + for quality_id, quality in media.items(): + if quality_id == 'hls': + formats.extend(self._extract_m3u8_formats( + quality['url'], video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': quality['url'], + 'format_id': '%s-%s' % (media_type, quality_id), + 'tbr': int_or_none(quality.get('bitrate')), + }) + self._sort_formats(formats) + + thumbnails = [{ + 'url': image_url, + 'id': image_id, + } for image_id, image_url in video_data.get('images', {}).items()] + + return { + 'id': video_id, + 'title': unescapeHTML(video_data['title']), + 'description': unescapeHTML(video_data.get('summary')), + 'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '), + 'author': episode_data.get('author'), + 'duration': int_or_none(video_data.get('duration')), + 'thumbnails': thumbnails, + 'formats': formats, + } + else: + show_data = page_info['show']['data'] + episodes_data = page_info['episodes']['data'] + num_episodes = page_info['meta']['totalEpisodes'] + processed_episodes = 0 + entries = [] + page_num = 1 + while True: + entries.extend([self.url_result( + url + '/%s' % episode['slug']) for episode in episodes_data]) + processed_episodes += len(episodes_data) + if processed_episodes == num_episodes: + break + page_num += 1 + episodes_data = self._download_json(self._PAGE_DATA_TEMPLATE % ( + domain, display_id + '/' + compat_str(page_num), domain), + display_id)['episodes']['data'] + + return self.playlist_result( + entries, compat_str(show_data['id']), + show_data.get('name'), show_data.get('summary')) From 8af2804a5d09e08d8916e7a7d2eab40864e9ecd8 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Fri, 1 Jan 2016 21:53:19 +0100 Subject: [PATCH 0266/1105] [testtube] Remove Extractor --- youtube_dl/extractor/revision3.py | 30 +++++++++-- youtube_dl/extractor/testtube.py | 90 ------------------------------- 2 files changed, 27 insertions(+), 93 deletions(-) delete mode 100644 youtube_dl/extractor/testtube.py diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py index 25fe4ef16..b1b8800b9 100644 --- a/youtube_dl/extractor/revision3.py +++ b/youtube_dl/extractor/revision3.py @@ -9,6 +9,7 @@ from ..utils import ( int_or_none, parse_iso8601, unescapeHTML, + qualities, ) @@ -19,12 +20,15 @@ class Revision3IE(InfoExtractor): 'md5': 'd94a72d85d0a829766de4deb8daaf7df', 'info_dict': { 'id': '73034', + 'display_id': 'technobuffalo/5-google-predictions-for-2016', 'ext': 'webm', 'title': '5 Google Predictions for 2016', 'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.', 'upload_date': '20151228', 'timestamp': 1451325600, 'duration': 187, + 'uploader': 'TechnoBuffalo', + 'uploader_id': 'technobuffalo', } }, { 'url': 'http://testtube.com/brainstuff', @@ -34,6 +38,20 @@ class Revision3IE(InfoExtractor): 'description': 'Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff.', }, 'playlist_mincount': 93, + }, { + 'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial', + 'info_dict': { + 'id': '60163', + 'display_id': 'dnews/5-weird-ways-plants-can-eat-animals', + 'duration': 275, + 'ext': 'webm', + 'title': '5 Weird Ways Plants Can Eat Animals', + 'description': 'Why have some plants evolved to eat meat?', + 'upload_date': '20150120', + 'timestamp': 1421763300, + 'uploader': 'DNews', + 'uploader_id': 'dnews', + }, }] _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s' _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62' @@ -51,7 +69,7 @@ class Revision3IE(InfoExtractor): video_id)['items'][0] formats = [] - for media_type, media in video_data['media'].items(): + for vcodec, media in video_data['media'].items(): for quality_id, quality in media.items(): if quality_id == 'hls': formats.extend(self._extract_m3u8_formats( @@ -60,22 +78,28 @@ class Revision3IE(InfoExtractor): else: formats.append({ 'url': quality['url'], - 'format_id': '%s-%s' % (media_type, quality_id), + 'format_id': '%s-%s' % (vcodec, quality_id), 'tbr': int_or_none(quality.get('bitrate')), + 'vcodec': vcodec, }) self._sort_formats(formats) + preference = qualities(['mini', 'small', 'medium', 'large']) thumbnails = [{ 'url': image_url, 'id': image_id, + 'preference': preference(image_id) } for image_id, image_url in video_data.get('images', {}).items()] return { 'id': video_id, + 'display_id': display_id, 'title': unescapeHTML(video_data['title']), 'description': unescapeHTML(video_data.get('summary')), 'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '), 'author': episode_data.get('author'), + 'uploader': video_data.get('show', {}).get('name'), + 'uploader_id': video_data.get('show', {}).get('slug'), 'duration': int_or_none(video_data.get('duration')), 'thumbnails': thumbnails, 'formats': formats, @@ -89,7 +113,7 @@ class Revision3IE(InfoExtractor): page_num = 1 while True: entries.extend([self.url_result( - url + '/%s' % episode['slug']) for episode in episodes_data]) + 'http://%s/%s/%s' % (domain, display_id, episode['slug'])) for episode in episodes_data]) processed_episodes += len(episodes_data) if processed_episodes == num_episodes: break diff --git a/youtube_dl/extractor/testtube.py b/youtube_dl/extractor/testtube.py deleted file mode 100644 index 26655d690..000000000 --- a/youtube_dl/extractor/testtube.py +++ /dev/null @@ -1,90 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - qualities, -) - - -class TestTubeIE(InfoExtractor): - _VALID_URL = r'https?://testtube\.com/[^/?#]+/(?P<id>[^/?#]+)' - _TESTS = [{ - 'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial', - 'info_dict': { - 'id': '60163', - 'display_id': '5-weird-ways-plants-can-eat-animals', - 'duration': 275, - 'ext': 'webm', - 'title': '5 Weird Ways Plants Can Eat Animals', - 'description': 'Why have some plants evolved to eat meat?', - 'thumbnail': 're:^https?://.*\.jpg$', - 'uploader': 'DNews', - 'uploader_id': 'dnews', - }, - }, { - 'url': 'https://testtube.com/iflscience/insane-jet-ski-flipping', - 'info_dict': { - 'id': 'fAGfJ4YjVus', - 'ext': 'mp4', - 'title': 'Flipping Jet-Ski Skills | Outrageous Acts of Science', - 'uploader': 'Science Channel', - 'uploader_id': 'ScienceChannel', - 'upload_date': '20150203', - 'description': 'md5:e61374030015bae1d2e22f096d4769d6', - } - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - youtube_url = self._html_search_regex( - r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"', - webpage, 'youtube iframe', default=None) - if youtube_url: - return self.url_result(youtube_url, 'Youtube', video_id=display_id) - - video_id = self._search_regex( - r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);", - webpage, 'video ID') - - all_info = self._download_json( - 'https://testtube.com/api/getPlaylist.json?api_key=ba9c741bce1b9d8e3defcc22193f3651b8867e62&codecs=h264,vp8,theora&video_id=%s' % video_id, - video_id) - info = all_info['items'][0] - - formats = [] - for vcodec, fdatas in info['media'].items(): - for name, fdata in fdatas.items(): - formats.append({ - 'format_id': '%s-%s' % (vcodec, name), - 'url': fdata['url'], - 'vcodec': vcodec, - 'tbr': fdata.get('bitrate'), - }) - self._sort_formats(formats) - - duration = int_or_none(info.get('duration')) - images = info.get('images') - thumbnails = None - preference = qualities(['mini', 'small', 'medium', 'large']) - if images: - thumbnails = [{ - 'id': thumbnail_id, - 'url': img_url, - 'preference': preference(thumbnail_id) - } for thumbnail_id, img_url in images.items()] - - return { - 'id': video_id, - 'display_id': display_id, - 'title': info['title'], - 'description': info.get('summary'), - 'thumbnails': thumbnails, - 'uploader': info.get('show', {}).get('name'), - 'uploader_id': info.get('show', {}).get('slug'), - 'duration': duration, - 'formats': formats, - } From 94de6cf59cb1933e395775f9ffca4fa311adb0dc Mon Sep 17 00:00:00 2001 From: pingtux <pingtux@users.noreply.github.com> Date: Sat, 2 Jan 2016 01:35:09 +0100 Subject: [PATCH 0267/1105] Remove testtube import Extractor got deleted in remitamine/youtube-dl@8af2804 --- youtube_dl/extractor/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7adce5499..4c7e5223d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -687,7 +687,6 @@ from .telemb import TeleMBIE from .teletask import TeleTaskIE from .tenplay import TenPlayIE from .testurl import TestURLIE -from .testtube import TestTubeIE from .tf1 import TF1IE from .theintercept import TheInterceptIE from .theonion import TheOnionIE From 76a353c9e5511a522b5331cb3f74d4215341791c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Jan 2016 07:44:30 +0600 Subject: [PATCH 0268/1105] [ruutu] Fix extraction (Closes #8107) --- youtube_dl/extractor/ruutu.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index 41fddc375..ffea438cc 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -75,9 +75,12 @@ class RuutuIE(InfoExtractor): preference = -1 if proto == 'rtmp' else 1 label = child.get('label') tbr = int_or_none(child.get('bitrate')) + format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto + if not self._is_valid_url(video_url, video_id, format_id): + continue width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]] formats.append({ - 'format_id': '%s-%s' % (proto, label if label else tbr), + 'format_id': format_id, 'url': video_url, 'width': width, 'height': height, From f20a11ed257f21a1c658f827e0c2129a72582adf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Jan 2016 19:22:39 +0600 Subject: [PATCH 0269/1105] [bbccouk] Extend _VALID_URL (Closes #8116) --- youtube_dl/extractor/bbc.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 923273fb2..5c621a32b 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -23,7 +23,17 @@ class BBCCoUkIE(InfoExtractor): IE_NAME = 'bbc.co.uk' IE_DESC = 'BBC iPlayer' _ID_REGEX = r'[pb][\da-z]{7}' - _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:programmes/(?!articles/)|iplayer(?:/[^/]+)?/(?:episode/|playlist/))|music/clips[/#])(?P<id>%s)' % _ID_REGEX + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?bbc\.co\.uk/ + (?: + programmes/(?!articles/)| + iplayer(?:/[^/]+)?/(?:episode/|playlist/)| + music/clips[/#]| + radio/player/ + ) + (?P<id>%s) + ''' % _ID_REGEX _MEDIASELECTOR_URLS = [ # Provides HQ HLS streams with even better quality that pc mediaset but fails @@ -193,6 +203,9 @@ class BBCCoUkIE(InfoExtractor): }, { 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo', 'only_matching': True, + }, { + 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf', + 'only_matching': True, } ] From c579c5e967cd223baa2c1f593b6ed65ac8643e42 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 2 Jan 2016 21:31:02 +0800 Subject: [PATCH 0270/1105] [baidu] Cleanups --- youtube_dl/extractor/baidu.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py index 84fab551b..734b50d1d 100644 --- a/youtube_dl/extractor/baidu.py +++ b/youtube_dl/extractor/baidu.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse class BaiduVideoIE(InfoExtractor): @@ -47,8 +46,6 @@ class BaiduVideoIE(InfoExtractor): entries = [] for episode in episodes_detail['videos']: - episode_id = '%s_%s' % (playlist_id, episode['episode']) - entries.append(self.url_result( episode['url'], video_title=episode['title'])) From a1d9f6c5dc15f1d62e0ab8c9dd6feaed4ff83a34 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 2 Jan 2016 21:36:35 +0800 Subject: [PATCH 0271/1105] [baidu] Improve playlist description --- youtube_dl/extractor/baidu.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py index 734b50d1d..d60cb0fdd 100644 --- a/youtube_dl/extractor/baidu.py +++ b/youtube_dl/extractor/baidu.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import unescapeHTML class BaiduVideoIE(InfoExtractor): @@ -14,7 +15,7 @@ class BaiduVideoIE(InfoExtractor): 'info_dict': { 'id': '1069', 'title': '中华小当家 TV版国语', - 'description': 'md5:40a9c1b1c7f4e05d642e7bb1c84eeda0', + 'description': 'md5:51be07afe461cf99fa61231421b5397c', }, 'playlist_count': 52, }, { @@ -40,7 +41,7 @@ class BaiduVideoIE(InfoExtractor): playlist_detail = self._call_api('xqinfo', category, playlist_id) playlist_title = playlist_detail['title'] - playlist_description = playlist_detail.get('intro') + playlist_description = unescapeHTML(playlist_detail.get('intro')) episodes_detail = self._call_api('xqsingle', category, playlist_id) From 88fb59d91bf5fd33dbe7863fbf1e4a9a641fa483 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 2 Jan 2016 19:42:11 +0600 Subject: [PATCH 0272/1105] [bbccouk] Extend title extraction --- youtube_dl/extractor/bbc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 5c621a32b..7b169881a 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -482,7 +482,8 @@ class BBCCoUkIE(InfoExtractor): if programme_id: formats, subtitles = self._download_media_selector(programme_id) - title = self._og_search_title(webpage) + title = self._og_search_title(webpage, default=None) or self._html_search_regex( + r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>', webpage, 'title') description = self._search_regex( r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>', webpage, 'description', default=None) From 03116772583103ba97eaf8ce2cbabba9742e1929 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 2 Jan 2016 21:44:49 +0800 Subject: [PATCH 0273/1105] [baidu] Add notes for API calls --- youtube_dl/extractor/baidu.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py index d60cb0fdd..d35116a7c 100644 --- a/youtube_dl/extractor/baidu.py +++ b/youtube_dl/extractor/baidu.py @@ -28,8 +28,9 @@ class BaiduVideoIE(InfoExtractor): 'playlist_mincount': 12, }] - def _call_api(self, path, category, playlist_id): - return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % (path, category, playlist_id), playlist_id) + def _call_api(self, path, category, playlist_id, note): + return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % ( + path, category, playlist_id), playlist_id, note) def _real_extract(self, url): category, playlist_id = re.match(self._VALID_URL, url).groups() @@ -38,12 +39,14 @@ class BaiduVideoIE(InfoExtractor): if category == 'tv': category = 'tvplay' - playlist_detail = self._call_api('xqinfo', category, playlist_id) + playlist_detail = self._call_api( + 'xqinfo', category, playlist_id, 'Download playlist JSON metadata') playlist_title = playlist_detail['title'] playlist_description = unescapeHTML(playlist_detail.get('intro')) - episodes_detail = self._call_api('xqsingle', category, playlist_id) + episodes_detail = self._call_api( + 'xqsingle', category, playlist_id, 'Download episodes JSON metadata') entries = [] for episode in episodes_detail['videos']: From b7546397f080e65fed42c69701f423e4f57f43f5 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 2 Jan 2016 21:46:40 +0800 Subject: [PATCH 0274/1105] [baidu] Use list comprehension --- youtube_dl/extractor/baidu.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py index d35116a7c..76b21e596 100644 --- a/youtube_dl/extractor/baidu.py +++ b/youtube_dl/extractor/baidu.py @@ -48,10 +48,9 @@ class BaiduVideoIE(InfoExtractor): episodes_detail = self._call_api( 'xqsingle', category, playlist_id, 'Download episodes JSON metadata') - entries = [] - for episode in episodes_detail['videos']: - entries.append(self.url_result( - episode['url'], video_title=episode['title'])) + entries = [self.url_result( + episode['url'], video_title=episode['title'] + ) for episode in episodes_detail['videos']] return self.playlist_result( entries, playlist_id, playlist_title, playlist_description) From 2fffb1dcd0790da264f86f4a8d9c54a127269aca Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 2 Jan 2016 22:33:33 +0800 Subject: [PATCH 0275/1105] [qqmusic:playlist] Capture errors and update tests --- youtube_dl/extractor/qqmusic.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 1ba3bbddf..03f04b724 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -11,6 +11,7 @@ from ..utils import ( strip_jsonp, unescapeHTML, clean_html, + ExtractorError, ) @@ -315,7 +316,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): IE_DESC = 'QQ音乐 - 歌单' _VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://y.qq.com/#type=taoge&id=3462654915', 'info_dict': { 'id': '3462654915', @@ -323,7 +324,16 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): 'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4', }, 'playlist_count': 40, - } + 'skip': 'playlist gone', + }, { + 'url': 'http://y.qq.com/#type=taoge&id=1374105607', + 'info_dict': { + 'id': '1374105607', + 'title': '易入人心的华语民谣', + 'description': '民谣的歌曲易于传唱、、歌词朗朗伤口、旋律简单温馨。属于那种才入耳孔。却上心头的感觉。没有太多的复杂情绪。简单而直接地表达乐者的情绪,就是这样的简单才易入人心。', + }, + 'playlist_count': 20, + }] def _real_extract(self, url): list_id = self._match_id(url) @@ -331,14 +341,21 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): list_json = self._download_json( 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s' % list_id, list_id, 'Download list page', - transform_source=strip_jsonp)['cdlist'][0] + transform_source=strip_jsonp) + if not len(list_json.get('cdlist', [])): + if list_json.get('code'): + raise ExtractorError( + 'QQ Music said: error %d in fetching playlist info' % list_json['code'], + expected=True) + raise ExtractorError('Unable to get playlist info') + cdlist = list_json['cdlist'][0] entries = [ self.url_result( 'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] - ) for song in list_json['songlist'] + ) for song in cdlist['songlist'] ] - list_name = list_json.get('dissname') - list_description = clean_html(unescapeHTML(list_json.get('desc'))) + list_name = cdlist.get('dissname') + list_description = clean_html(unescapeHTML(cdlist.get('desc'))) return self.playlist_result(entries, list_id, list_name, list_description) From 141a273a8b7a95fac5ccc0b5141c8d2eac48e3f9 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sat, 2 Jan 2016 22:39:09 +0800 Subject: [PATCH 0276/1105] [qqmusic] Update tests --- youtube_dl/extractor/qqmusic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 03f04b724..45a3c41c5 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -178,7 +178,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE): 'info_dict': { 'id': '001BLpXF2DyJe2', 'title': '林俊杰', - 'description': 'md5:2a222d89ba4455a3af19940c0481bb78', + 'description': 'md5:870ec08f7d8547c29c93010899103751', }, 'playlist_count': 12, } @@ -273,7 +273,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE): 'url': 'http://y.qq.com/#type=toplist&p=top_3', 'info_dict': { 'id': 'top_3', - 'title': 'QQ音乐巅峰榜·欧美', + 'title': '巅峰榜·欧美', 'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成,集结当下最流行的欧美新歌!:更新时间:每周四22点|统' '计周期:一周(上周四至本周三)|统计对象:三个月内发行的欧美歌曲|统计数量:100首|统计算法:根据' '歌曲在一周内的有效播放次数,由高到低取前100名(同一歌手最多允许5首歌曲同时上榜)|有效播放次数:' From 6b461026616b0e3ede9a302c74fc437541e19343 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Sat, 2 Jan 2016 21:24:57 +0100 Subject: [PATCH 0277/1105] [zdf] fix rtmpt format downloading handle errors --- youtube_dl/extractor/zdf.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 2a1f2f6d1..c619a75e2 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -13,6 +13,7 @@ from ..utils import ( determine_ext, qualities, float_or_none, + ExtractorError, ) @@ -59,7 +60,6 @@ class ZDFIE(InfoExtractor): 'ext': 'flv', 'format_id': '%s-%d' % (proto, bitrate), 'tbr': bitrate, - 'protocol': proto, }) self._sort_formats(formats) return formats @@ -70,6 +70,15 @@ class ZDFIE(InfoExtractor): note='Downloading video info', errnote='Failed to download video info') + status_code = doc.find('./status/statuscode') + if status_code is not None and status_code.text != 'ok': + code = status_code.text + if code == 'notVisibleAnymore': + message = 'Video %s is not available' % video_id + else: + message = '%s returned error: %s' % (self.IE_NAME, code) + raise ExtractorError(message, expected=True) + title = doc.find('.//information/title').text description = xpath_text(doc, './/information/detail', 'description') duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration')) @@ -129,10 +138,10 @@ class ZDFIE(InfoExtractor): video_url, video_id, fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( - video_url, video_id, f4m_id='hds', fatal=False)) + video_url, video_id, f4m_id=format_id, fatal=False)) else: proto = format_m.group('proto').lower() From 4059eabd58dd5e68aa1229c0ba0ffce0262ef7d8 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Sat, 2 Jan 2016 21:29:10 +0100 Subject: [PATCH 0278/1105] [dreisat] use extract_from_xml_url from ZDFIE for info extraction(fixes #7680)(fixes #8104)(closes #8121) --- youtube_dl/extractor/dreisat.py | 59 ++------------------------------- 1 file changed, 3 insertions(+), 56 deletions(-) diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 8ac8587be..028144f20 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -2,14 +2,10 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - unified_strdate, -) +from .zdf import ZDFIE -class DreiSatIE(InfoExtractor): +class DreiSatIE(ZDFIE): IE_NAME = '3sat' _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' _TESTS = [ @@ -35,53 +31,4 @@ class DreiSatIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id - details_doc = self._download_xml(details_url, video_id, 'Downloading video details') - - status_code = details_doc.find('./status/statuscode') - if status_code is not None and status_code.text != 'ok': - code = status_code.text - if code == 'notVisibleAnymore': - message = 'Video %s is not available' % video_id - else: - message = '%s returned error: %s' % (self.IE_NAME, code) - raise ExtractorError(message, expected=True) - - thumbnail_els = details_doc.findall('.//teaserimage') - thumbnails = [{ - 'width': int(te.attrib['key'].partition('x')[0]), - 'height': int(te.attrib['key'].partition('x')[2]), - 'url': te.text, - } for te in thumbnail_els] - - information_el = details_doc.find('.//information') - video_title = information_el.find('./title').text - video_description = information_el.find('./detail').text - - details_el = details_doc.find('.//details') - video_uploader = details_el.find('./channel').text - upload_date = unified_strdate(details_el.find('./airtime').text) - - format_els = details_doc.findall('.//formitaet') - formats = [{ - 'format_id': fe.attrib['basetype'], - 'width': int(fe.find('./width').text), - 'height': int(fe.find('./height').text), - 'url': fe.find('./url').text, - 'filesize': int(fe.find('./filesize').text), - 'video_bitrate': int(fe.find('./videoBitrate').text), - } for fe in format_els - if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')] - - self._sort_formats(formats) - - return { - '_type': 'video', - 'id': video_id, - 'title': video_title, - 'formats': formats, - 'description': video_description, - 'thumbnails': thumbnails, - 'thumbnail': thumbnails[-1]['url'], - 'uploader': video_uploader, - 'upload_date': upload_date, - } + return self.extract_from_xml_url(video_id, details_url) From e1a0bfdffe25dda494a9da8b02fba0c9ad39f4fe Mon Sep 17 00:00:00 2001 From: dyn888 <dyn.8.8.8+github@gmail.com> Date: Sun, 3 Jan 2016 04:11:19 +0100 Subject: [PATCH 0279/1105] [youtube] added vcodec/acodec/abr for multiple itags Should make downloading with filters more precise and easier, ie. bestvideo[vcodec=h264]. By default a lot of codecs are specified as avc1.xxxxxx and unique for each format, which makes them unusable for bestvideo selection. --- youtube_dl/extractor/youtube.py | 120 ++++++++++++++++---------------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4aac2cc03..64386f34a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -277,55 +277,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor): $""" _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' _formats = { - '5': {'ext': 'flv', 'width': 400, 'height': 240}, - '6': {'ext': 'flv', 'width': 450, 'height': 270}, - '13': {'ext': '3gp'}, - '17': {'ext': '3gp', 'width': 176, 'height': 144}, - '18': {'ext': 'mp4', 'width': 640, 'height': 360}, - '22': {'ext': 'mp4', 'width': 1280, 'height': 720}, - '34': {'ext': 'flv', 'width': 640, 'height': 360}, - '35': {'ext': 'flv', 'width': 854, 'height': 480}, - '36': {'ext': '3gp', 'width': 320, 'height': 240}, - '37': {'ext': 'mp4', 'width': 1920, 'height': 1080}, - '38': {'ext': 'mp4', 'width': 4096, 'height': 3072}, - '43': {'ext': 'webm', 'width': 640, 'height': 360}, - '44': {'ext': 'webm', 'width': 854, 'height': 480}, - '45': {'ext': 'webm', 'width': 1280, 'height': 720}, - '46': {'ext': 'webm', 'width': 1920, 'height': 1080}, - '59': {'ext': 'mp4', 'width': 854, 'height': 480}, - '78': {'ext': 'mp4', 'width': 854, 'height': 480}, + '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, + '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, + '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'}, + '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'}, + '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'}, + '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '36': {'ext': '3gp', 'width': 320, 'height': 240, 'acodec': 'aac', 'abr': 32, 'vcodec': 'mp4v'}, + '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, + '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, + '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, - # 3d videos - '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'preference': -20}, - '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'preference': -20}, - '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'preference': -20}, - '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'preference': -20}, - '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'preference': -20}, - '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'preference': -20}, - '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'preference': -20}, + # 3D videos + '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, + '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, + '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, + '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, + '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20}, + '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, + '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, # Apple HTTP Live Streaming - '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10}, - '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'preference': -10}, - '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'preference': -10}, - '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'preference': -10}, - '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'preference': -10}, - '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'preference': -10}, - '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'preference': -10}, + '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, + '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, + '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, + '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, + '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, + '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, + '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10}, # DASH mp4 video - '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) - '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, - '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, - '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'}, + '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) + '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, + '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'fps': 60, 'preference': -40}, + '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'fps': 60, 'preference': -40}, + '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'h264', 'preference': -40}, # Dash mp4 audio '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'}, @@ -339,26 +339,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, - '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'}, - '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40}, + '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) - '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, - '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, - '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, - '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, - '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'}, - '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, + '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, + '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, + '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, + '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'preference': -40}, + '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'vcodec': 'vp9', 'fps': 60, 'preference': -40}, # Dash webm audio - '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, - '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50}, + '171': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, + '172': {'ext': 'webm', 'acodec': 'vorbis', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50}, # Dash webm audio with opus inside '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50}, From ea6abd740f8dd1c559e186bc1cbcfa73854a90d1 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Sun, 3 Jan 2016 10:12:13 +0500 Subject: [PATCH 0280/1105] [nowtv] Mark broken --- youtube_dl/extractor/nowtv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index fd107aca2..916a102bf 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -71,6 +71,7 @@ class NowTVBaseIE(InfoExtractor): class NowTVIE(NowTVBaseIE): + _WORKING = False _VALID_URL = r'https?://(?:www\.)?nowtv\.(?:de|at|ch)/(?:rtl|rtl2|rtlnitro|superrtl|ntv|vox)/(?P<show_id>[^/]+)/(?:(?:list/[^/]+|jahr/\d{4}/\d{1,2})/)?(?P<id>[^/]+)/(?:player|preview)' _TESTS = [{ From a7aaa39863a6b1ab89a56a55521fd4779c6ac900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 Jan 2016 01:08:34 +0600 Subject: [PATCH 0281/1105] [utils] Extract known extensions for reuse --- youtube_dl/utils.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 0ed6c45c8..da4ec7f20 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -70,6 +70,21 @@ ENGLISH_MONTH_NAMES = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] +KNOWN_EXTENSIONS = ( + 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', + 'flv', 'f4v', 'f4a', 'f4b', + 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', + 'mkv', 'mka', 'mk3d', + 'avi', 'divx', + 'mov', + 'asf', 'wmv', 'wma', + '3gp', '3g2', + 'mp3', + 'flac', + 'ape', + 'wav', + 'f4f', 'f4m', 'm3u8', 'smil') + def preferredencoding(): """Get preferred encoding. @@ -942,20 +957,8 @@ def determine_ext(url, default_ext='unknown_video'): guess = url.partition('?')[0].rpartition('.')[2] if re.match(r'^[A-Za-z0-9]+$', guess): return guess - elif guess.rstrip('/') in ( - 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', - 'flv', 'f4v', 'f4a', 'f4b', - 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus', - 'mkv', 'mka', 'mk3d', - 'avi', 'divx', - 'mov', - 'asf', 'wmv', 'wma', - '3gp', '3g2', - 'mp3', - 'flac', - 'ape', - 'wav', - 'f4f', 'f4m', 'm3u8', 'smil'): + # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download + elif guess.rstrip('/') in KNOWN_EXTENSIONS: return guess.rstrip('/') else: return default_ext From e54c44eeab9e088c344ec379cdc4b1fbc63ff324 Mon Sep 17 00:00:00 2001 From: pingtux <pingtux@users.noreply.github.com> Date: Sat, 2 Jan 2016 01:13:49 +0100 Subject: [PATCH 0282/1105] [20min.ch] Add new extractor (closes #5977) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/min20.py | 40 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 youtube_dl/extractor/min20.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4c7e5223d..64e1fd334 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -364,6 +364,7 @@ from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE +from .min20 import Min20IE from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE from .miomio import MioMioIE diff --git a/youtube_dl/extractor/min20.py b/youtube_dl/extractor/min20.py new file mode 100644 index 000000000..a3f91671e --- /dev/null +++ b/youtube_dl/extractor/min20.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class Min20IE(InfoExtractor): + _VALID_URL = r'http://www\.20min\.ch/.+?-(?P<id>[0-9]+)$' + _TEST = { + 'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469', + 'md5': 'cd4cbb99b94130cff423e967cd275e5e', + 'info_dict': { + 'id': '22050469', + 'ext': 'flv', + 'title': '«Wir müssen mutig nach vorne schauen»', + 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', + 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' + } + } + + # location of the flv videos, can't be extracted from the web page + _BASE_URL = "http://flv-rr.20min-tv.ch/videos/" + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'<h1><span>(.+?)</span></h1>', webpage, 'title') + flash_id = self._search_regex(r"so\.addVariable\(\"file1\",\"([0-9]+)\"\)", webpage, 'flash_id') + + description = self._html_search_regex(r'<meta name="description" content="(.+?)" />', webpage, 'description') + thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)" />', webpage, 'thumbnail') + url = self._BASE_URL + flash_id + "m.flv" + + return { + 'id': video_id, + 'url': url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail + } From 66295fa4a6a712d4d41ff54ee9c361c7c83b44bc Mon Sep 17 00:00:00 2001 From: pingtux <pingtux@users.noreply.github.com> Date: Sat, 2 Jan 2016 16:19:05 +0100 Subject: [PATCH 0283/1105] [20min.ch] Added support for videoportal --- youtube_dl/extractor/min20.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/min20.py b/youtube_dl/extractor/min20.py index a3f91671e..23aead19d 100644 --- a/youtube_dl/extractor/min20.py +++ b/youtube_dl/extractor/min20.py @@ -1,12 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor class Min20IE(InfoExtractor): - _VALID_URL = r'http://www\.20min\.ch/.+?-(?P<id>[0-9]+)$' - _TEST = { + _VALID_URL = r'http://www\.20min\.ch/(videotv/\?vid=(?P<video_id>[0-9]+)|.+?-(?P<page_id>[0-9]+)$)' + _TESTS = [{ 'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469', 'md5': 'cd4cbb99b94130cff423e967cd275e5e', 'info_dict': { @@ -16,15 +18,29 @@ class Min20IE(InfoExtractor): 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' } - } + }, { + 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', + 'md5': 'b52d6bc6ea6398e6a38f12cfd418149c', + 'info_dict': { + 'id': '469148', + 'ext': 'flv', + 'title': '85 000 Franken für 15 perfekte Minuten', + 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', + 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' + } + }] # location of the flv videos, can't be extracted from the web page _BASE_URL = "http://flv-rr.20min-tv.ch/videos/" def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('page_id') + if video_id is None: + # URL from the videoportal + video_id = mobj.group('video_id') webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<h1><span>(.+?)</span></h1>', webpage, 'title') + title = self._html_search_regex(r'<h1>.*<span>(.+?)</span></h1>', webpage, 'title') flash_id = self._search_regex(r"so\.addVariable\(\"file1\",\"([0-9]+)\"\)", webpage, 'flash_id') description = self._html_search_regex(r'<meta name="description" content="(.+?)" />', webpage, 'description') From 133b1886fc6721090ea5d3be3d382626e2602b48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 Jan 2016 02:33:08 +0600 Subject: [PATCH 0284/1105] [20min] Improve (Closes #8110) --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/min20.py | 56 ------------------------ youtube_dl/extractor/twentymin.py | 73 +++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 57 deletions(-) delete mode 100644 youtube_dl/extractor/min20.py create mode 100644 youtube_dl/extractor/twentymin.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 64e1fd334..625b0bf16 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -364,7 +364,6 @@ from .mdr import MDRIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mgoon import MgoonIE -from .min20 import Min20IE from .minhateca import MinhatecaIE from .ministrygrid import MinistryGridIE from .miomio import MioMioIE @@ -747,6 +746,7 @@ from .tvp import TvpIE, TvpSeriesIE from .tvplay import TVPlayIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE +from .twentymin import TwentyMinutenIE from .twentytwotracks import ( TwentyTwoTracksIE, TwentyTwoTracksGenreIE diff --git a/youtube_dl/extractor/min20.py b/youtube_dl/extractor/min20.py deleted file mode 100644 index 23aead19d..000000000 --- a/youtube_dl/extractor/min20.py +++ /dev/null @@ -1,56 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class Min20IE(InfoExtractor): - _VALID_URL = r'http://www\.20min\.ch/(videotv/\?vid=(?P<video_id>[0-9]+)|.+?-(?P<page_id>[0-9]+)$)' - _TESTS = [{ - 'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469', - 'md5': 'cd4cbb99b94130cff423e967cd275e5e', - 'info_dict': { - 'id': '22050469', - 'ext': 'flv', - 'title': '«Wir müssen mutig nach vorne schauen»', - 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', - 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' - } - }, { - 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', - 'md5': 'b52d6bc6ea6398e6a38f12cfd418149c', - 'info_dict': { - 'id': '469148', - 'ext': 'flv', - 'title': '85 000 Franken für 15 perfekte Minuten', - 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', - 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' - } - }] - - # location of the flv videos, can't be extracted from the web page - _BASE_URL = "http://flv-rr.20min-tv.ch/videos/" - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('page_id') - if video_id is None: - # URL from the videoportal - video_id = mobj.group('video_id') - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<h1>.*<span>(.+?)</span></h1>', webpage, 'title') - flash_id = self._search_regex(r"so\.addVariable\(\"file1\",\"([0-9]+)\"\)", webpage, 'flash_id') - - description = self._html_search_regex(r'<meta name="description" content="(.+?)" />', webpage, 'description') - thumbnail = self._html_search_regex(r'<meta property="og:image" content="(.+?)" />', webpage, 'thumbnail') - url = self._BASE_URL + flash_id + "m.flv" - - return { - 'id': video_id, - 'url': url, - 'title': title, - 'description': description, - 'thumbnail': thumbnail - } diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py new file mode 100644 index 000000000..ca7d953b8 --- /dev/null +++ b/youtube_dl/extractor/twentymin.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import remove_end + + +class TwentyMinutenIE(InfoExtractor): + IE_NAME = '20min' + _VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P<id>\d+)|(?:[^/]+/)*(?P<display_id>[^/#?]+))' + _TESTS = [{ + # regular video + 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2', + 'md5': 'b52d6bc6ea6398e6a38f12cfd418149c', + 'info_dict': { + 'id': '469148', + 'ext': 'flv', + 'title': '85 000 Franken für 15 perfekte Minuten', + 'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', + 'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' + } + }, { + # news article with video + 'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469', + 'md5': 'cd4cbb99b94130cff423e967cd275e5e', + 'info_dict': { + 'id': '469408', + 'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469', + 'ext': 'flv', + 'title': '«Wir müssen mutig nach vorne schauen»', + 'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', + 'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' + } + }, { + 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738', + 'only_matching': True, + }, { + 'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id + + webpage = self._download_webpage(url, display_id) + + title = self._html_search_regex( + r'<h1>.*?<span>(.+?)</span></h1>', + webpage, 'title', default=None) + if not title: + title = remove_end(re.sub( + r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') + + if not video_id: + video_id = self._search_regex( + r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id') + + description = self._html_search_meta( + 'description', webpage, 'description') + thumbnail = self._og_search_thumbnail(webpage) + + return { + 'id': video_id, + 'display_id': display_id, + 'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } From 5aa535c32982f1f9b2f689097a64be78d25e7d90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 Jan 2016 02:55:25 +0600 Subject: [PATCH 0285/1105] [bbccouk] Update tests (Closes #8090) --- youtube_dl/extractor/bbc.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 7b169881a..ce99a34ab 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -124,14 +124,14 @@ class BBCCoUkIE(InfoExtractor): }, 'skip': 'Episode is no longer available on BBC iPlayer Radio', }, { - 'url': 'http://www.bbc.co.uk/music/clips/p02frcc3', + 'url': 'http://www.bbc.co.uk/music/clips/p022h44b', 'note': 'Audio', 'info_dict': { - 'id': 'p02frcch', + 'id': 'p022h44j', 'ext': 'flv', - 'title': 'Pete Tong, Past, Present and Future Special, Madeon - After Hours mix', - 'description': 'French house superstar Madeon takes us out of the club and onto the after party.', - 'duration': 3507, + 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances', + 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.", + 'duration': 227, }, 'params': { # rtmp download @@ -182,13 +182,12 @@ class BBCCoUkIE(InfoExtractor): }, { # iptv-all mediaset fails with geolocation however there is no geo restriction # for this programme at all - 'url': 'http://www.bbc.co.uk/programmes/b06bp7lf', + 'url': 'http://www.bbc.co.uk/programmes/b06rkn85', 'info_dict': { - 'id': 'b06bp7kf', + 'id': 'b06rkms3', 'ext': 'flv', - 'title': "Annie Mac's Friday Night, B.Traits sits in for Annie", - 'description': 'B.Traits sits in for Annie Mac with a Mini-Mix from Disclosure.', - 'duration': 10800, + 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1", + 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!", }, 'params': { # rtmp download From ab3176af3472917537635bd96b860a974094b0af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 Jan 2016 03:34:15 +0600 Subject: [PATCH 0286/1105] [ivi] Fix extraction and modernize --- youtube_dl/extractor/ivi.py | 69 ++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 029878d24..216c534b5 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -7,6 +7,7 @@ import json from .common import InfoExtractor from ..utils import ( ExtractorError, + int_or_none, sanitized_Request, ) @@ -27,7 +28,7 @@ class IviIE(InfoExtractor): 'title': 'Иван Васильевич меняет профессию', 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f', 'duration': 5498, - 'thumbnail': 'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg', + 'thumbnail': 're:^https?://.*\.jpg$', }, 'skip': 'Only works from Russia', }, @@ -38,33 +39,23 @@ class IviIE(InfoExtractor): 'info_dict': { 'id': '9549', 'ext': 'mp4', - 'title': 'Двое из ларца - Серия 1', + 'title': 'Двое из ларца - Дело Гольдберга (1 часть)', + 'series': 'Двое из ларца', + 'episode': 'Дело Гольдберга (1 часть)', + 'episode_number': 1, 'duration': 2655, - 'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg', + 'thumbnail': 're:^https?://.*\.jpg$', }, 'skip': 'Only works from Russia', } ] # Sorted by quality - _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ'] - - # Sorted by size - _known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480'] - - def _extract_description(self, html): - m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html) - return m.group('description') if m is not None else None - - def _extract_comment_count(self, html): - m = re.search('(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html) - return int(m.group('commentcount')) if m is not None else 0 + _KNOWN_FORMATS = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ'] def _real_extract(self, url): video_id = self._match_id(url) - api_url = 'http://api.digitalaccess.ru/api/json/' - data = { 'method': 'da.content.get', 'params': [ @@ -76,11 +67,10 @@ class IviIE(InfoExtractor): ] } - request = sanitized_Request(api_url, json.dumps(data)) - - video_json_page = self._download_webpage( + request = sanitized_Request( + 'http://api.digitalaccess.ru/api/json/', json.dumps(data)) + video_json = self._download_json( request, video_id, 'Downloading video JSON') - video_json = json.loads(video_json_page) if 'error' in video_json: error = video_json['error'] @@ -95,35 +85,42 @@ class IviIE(InfoExtractor): formats = [{ 'url': x['url'], 'format_id': x['content_format'], - 'preference': self._known_formats.index(x['content_format']), - } for x in result['files'] if x['content_format'] in self._known_formats] + 'preference': self._KNOWN_FORMATS.index(x['content_format']), + } for x in result['files'] if x['content_format'] in self._KNOWN_FORMATS] self._sort_formats(formats) - if not formats: - raise ExtractorError('No media links available for %s' % video_id) - - duration = result['duration'] - compilation = result['compilation'] title = result['title'] + duration = int_or_none(result.get('duration')) + compilation = result.get('compilation') + episode = title if compilation else None + title = '%s - %s' % (compilation, title) if compilation is not None else title - previews = result['preview'] - previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format'])) - thumbnail = previews[-1]['url'] if len(previews) > 0 else None + thumbnails = [{ + 'url': preview['url'], + 'id': preview.get('content_format'), + } for preview in result.get('preview', []) if preview.get('url')] - video_page = self._download_webpage(url, video_id, 'Downloading video page') - description = self._extract_description(video_page) - comment_count = self._extract_comment_count(video_page) + webpage = self._download_webpage(url, video_id) + + episode_number = int_or_none(self._search_regex( + r'<meta[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)', + webpage, 'episode number', default=None)) + + description = self._og_search_description(webpage, default=None) or self._html_search_meta( + 'description', webpage, 'description', default=None) return { 'id': video_id, 'title': title, - 'thumbnail': thumbnail, + 'series': compilation, + 'episode': episode, + 'episode_number': episode_number, + 'thumbnails': thumbnails, 'description': description, 'duration': duration, - 'comment_count': comment_count, 'formats': formats, } From c6270b2ed5be9e83bcb8114511641f9c5ad9008d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 Jan 2016 03:49:18 +0600 Subject: [PATCH 0287/1105] [ivi:compilation] Fix extraction --- youtube_dl/extractor/ivi.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 216c534b5..9ccfee173 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -146,8 +146,11 @@ class IviCompilationIE(InfoExtractor): }] def _extract_entries(self, html, compilation_id): - return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi') - for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)] + return [ + self.url_result( + 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key()) + for serie in re.findall( + r'<a href="/watch/%s/(\d+)"[^>]+data-id="\1"' % compilation_id, html)] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -155,7 +158,8 @@ class IviCompilationIE(InfoExtractor): season_id = mobj.group('seasonid') if season_id is not None: # Season link - season_page = self._download_webpage(url, compilation_id, 'Downloading season %s web page' % season_id) + season_page = self._download_webpage( + url, compilation_id, 'Downloading season %s web page' % season_id) playlist_id = '%s/season%s' % (compilation_id, season_id) playlist_title = self._html_search_meta('title', season_page, 'title') entries = self._extract_entries(season_page, compilation_id) @@ -163,8 +167,9 @@ class IviCompilationIE(InfoExtractor): compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page') playlist_id = compilation_id playlist_title = self._html_search_meta('title', compilation_page, 'title') - seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page) - if len(seasons) == 0: # No seasons in this compilation + seasons = re.findall( + r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page) + if not seasons: # No seasons in this compilation entries = self._extract_entries(compilation_page, compilation_id) else: entries = [] From 1463c5b9ac16bd50d5e30d2145ec584d1e6e74ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 Jan 2016 03:54:52 +0600 Subject: [PATCH 0288/1105] [ivi] Extract season info --- youtube_dl/extractor/ivi.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 9ccfee173..d0f00cdea 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -41,6 +41,8 @@ class IviIE(InfoExtractor): 'ext': 'mp4', 'title': 'Двое из ларца - Дело Гольдберга (1 часть)', 'series': 'Двое из ларца', + 'season': 'Сезон 1', + 'season_number': 1, 'episode': 'Дело Гольдберга (1 часть)', 'episode_number': 1, 'duration': 2655, @@ -105,6 +107,13 @@ class IviIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + season = self._search_regex( + r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)', + webpage, 'season', default=None) + season_number = int_or_none(self._search_regex( + r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"', + webpage, 'season number', default=None)) + episode_number = int_or_none(self._search_regex( r'<meta[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)', webpage, 'episode number', default=None)) @@ -116,6 +125,8 @@ class IviIE(InfoExtractor): 'id': video_id, 'title': title, 'series': compilation, + 'season': season, + 'season_number': season_number, 'episode': episode, 'episode_number': episode_number, 'thumbnails': thumbnails, From 896c7a23cd9fbfc589f26e9a7bbada38a325e2ad Mon Sep 17 00:00:00 2001 From: bpfoley <bpfoley> Date: Wed, 30 Dec 2015 21:50:26 +0000 Subject: [PATCH 0289/1105] [extractor/rte.py] Add support for RTE radio player While here, stop RteIE changing filename extensions to .mp4. The files saved are .flv containers with h264 video. --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/rte.py | 77 ++++++++++++++++++++++++++++---- 2 files changed, 70 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fb7151443..fe3c32874 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -556,7 +556,7 @@ from .ro220 import Ro220IE from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE from .rtbf import RTBFIE -from .rte import RteIE +from .rte import RteIE, RteRadioIE from .rtlnl import RtlNlIE from .rtl2 import RTL2IE from .rtp import RTPIE diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py index d9cfbf180..25abcee92 100644 --- a/youtube_dl/extractor/rte.py +++ b/youtube_dl/extractor/rte.py @@ -5,16 +5,19 @@ from .common import InfoExtractor from ..utils import ( float_or_none, + unescapeHTML, ) class RteIE(InfoExtractor): + IE_NAME = 'rte' + IE_DESC = 'Raidió Teilifís Éireann TV' _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/', 'info_dict': { 'id': '10478715', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Watch iWitness online', 'thumbnail': 're:^https?://.*\.jpg$', 'description': 'iWitness : The spirit of Ireland, one voice and one minute at a time.', @@ -44,13 +47,6 @@ class RteIE(InfoExtractor): # f4m_url = server + relative_url f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url'] f4m_formats = self._extract_f4m_formats(f4m_url, video_id) - f4m_formats = [{ - 'format_id': f['format_id'], - 'url': f['url'], - 'ext': 'mp4', - 'width': f['width'], - 'height': f['height'], - } for f in f4m_formats] return { 'id': video_id, @@ -60,3 +56,68 @@ class RteIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, } + + + +class RteRadioIE(InfoExtractor): + IE_NAME = 'rte:radio' + IE_DESC = 'Raidió Teilifís Éireann radio' + # Radioplayer URLs have the specifier #!rii=<channel_id>:<id>:<playable_item_id>:<date>: + # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated. + # An <id> uniquely defines an individual recording, and is the only part we require. + _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:[0-9]*)(?:%3A|:)(?P<id>[0-9]+)' + + _TEST = { + 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:', + 'info_dict': { + 'id': '10507902', + 'ext': 'flv', + 'title': 'Gloria', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Tim Thurston guides you through a millennium of sacred music featuring Gregorian chant, pure solo voices and choral masterpieces, framed around the glorious music of J.S. Bach.', + 'duration': 7230.0, + }, + 'params': { + 'skip_download': 'f4m fails with --test atm' + } + } + + def _real_extract(self, url): + item_id = self._match_id(url) + feeds_url = 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id + json_string = self._download_json(feeds_url, item_id) + + # NB the string values in the JSON are stored using XML escaping(!) + show = json_string['shows'][0] + title = unescapeHTML(show['title']) + description = unescapeHTML(show.get('description')) + thumbnail = show.get('thumbnail') + duration = float_or_none(show.get('duration'), 1000) + + mg = show['media:group'][0] + + formats = [] + + if mg.get('url') and not mg['url'].startswith('rtmpe:'): + formats.append({'url': mg.get('url')}) + + if mg.get('hls_server') and mg.get('hls_url'): + hls_url = mg['hls_server'] + mg['hls_url'] + hls_formats = self._extract_m3u8_formats( + hls_url, item_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(hls_formats) + + if mg.get('hds_server') and mg.get('hds_url'): + f4m_url = mg['hds_server'] + mg['hds_url'] + f4m_formats = self._extract_f4m_formats( + f4m_url, item_id, f4m_id='hds', fatal=False) + formats.extend(f4m_formats) + + return { + 'id': item_id, + 'title': title, + 'formats': formats, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + } From fb588f6a5663811da83cde68d17ff4eb041022ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 Jan 2016 04:32:47 +0600 Subject: [PATCH 0290/1105] Credit @bpfoley for rte:radio (#8063) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 20f51009e..e39d07efe 100644 --- a/AUTHORS +++ b/AUTHORS @@ -150,3 +150,4 @@ reiv Muratcan Simsek Evan Lu flatgreen +Brian Foley From 0238451fc0f3c5f06827d5197a904981890a3d3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 Jan 2016 04:49:13 +0600 Subject: [PATCH 0291/1105] [rte] PEP 8 --- youtube_dl/extractor/rte.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py index 25abcee92..673399f04 100644 --- a/youtube_dl/extractor/rte.py +++ b/youtube_dl/extractor/rte.py @@ -58,7 +58,6 @@ class RteIE(InfoExtractor): } - class RteRadioIE(InfoExtractor): IE_NAME = 'rte:radio' IE_DESC = 'Raidió Teilifís Éireann radio' @@ -102,15 +101,15 @@ class RteRadioIE(InfoExtractor): formats.append({'url': mg.get('url')}) if mg.get('hls_server') and mg.get('hls_url'): - hls_url = mg['hls_server'] + mg['hls_url'] + hls_url = mg['hls_server'] + mg['hls_url'] hls_formats = self._extract_m3u8_formats( - hls_url, item_id, 'mp4', m3u8_id='hls', fatal=False) + hls_url, item_id, 'mp4', m3u8_id='hls', fatal=False) formats.extend(hls_formats) if mg.get('hds_server') and mg.get('hds_url'): f4m_url = mg['hds_server'] + mg['hds_url'] f4m_formats = self._extract_f4m_formats( - f4m_url, item_id, f4m_id='hds', fatal=False) + f4m_url, item_id, f4m_id='hds', fatal=False) formats.extend(f4m_formats) return { From 9746f4314ad0fe55076eb35fd70413623b0c8ab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 Jan 2016 05:01:32 +0600 Subject: [PATCH 0292/1105] [rte:radio] Simplify --- youtube_dl/extractor/rte.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py index 673399f04..c17ef2cc2 100644 --- a/youtube_dl/extractor/rte.py +++ b/youtube_dl/extractor/rte.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor - from ..utils import ( float_or_none, unescapeHTML, @@ -98,19 +97,19 @@ class RteRadioIE(InfoExtractor): formats = [] if mg.get('url') and not mg['url'].startswith('rtmpe:'): - formats.append({'url': mg.get('url')}) + formats.append({'url': mg['url']}) if mg.get('hls_server') and mg.get('hls_url'): - hls_url = mg['hls_server'] + mg['hls_url'] - hls_formats = self._extract_m3u8_formats( - hls_url, item_id, 'mp4', m3u8_id='hls', fatal=False) - formats.extend(hls_formats) + formats.extend(self._extract_m3u8_formats( + mg['hls_server'] + mg['hls_url'], item_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) if mg.get('hds_server') and mg.get('hds_url'): - f4m_url = mg['hds_server'] + mg['hds_url'] - f4m_formats = self._extract_f4m_formats( - f4m_url, item_id, f4m_id='hds', fatal=False) - formats.extend(f4m_formats) + formats.extend(self._extract_f4m_formats( + mg['hds_server'] + mg['hds_url'], item_id, + f4m_id='hds', fatal=False)) + + self._sort_formats(formats) return { 'id': item_id, From 9938a17f92ad8242d8841528a89df647ff759183 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 4 Jan 2016 05:04:48 +0600 Subject: [PATCH 0293/1105] [rte:radio] Extract timestamp --- youtube_dl/extractor/rte.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py index c17ef2cc2..47c8331fe 100644 --- a/youtube_dl/extractor/rte.py +++ b/youtube_dl/extractor/rte.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( float_or_none, + parse_iso8601, unescapeHTML, ) @@ -69,10 +70,12 @@ class RteRadioIE(InfoExtractor): 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:', 'info_dict': { 'id': '10507902', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Gloria', 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'Tim Thurston guides you through a millennium of sacred music featuring Gregorian chant, pure solo voices and choral masterpieces, framed around the glorious music of J.S. Bach.', + 'description': 'md5:9ce124a7fb41559ec68f06387cabddf0', + 'timestamp': 1451203200, + 'upload_date': '20151227', 'duration': 7230.0, }, 'params': { @@ -82,8 +85,10 @@ class RteRadioIE(InfoExtractor): def _real_extract(self, url): item_id = self._match_id(url) - feeds_url = 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id - json_string = self._download_json(feeds_url, item_id) + + json_string = self._download_json( + 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id, + item_id) # NB the string values in the JSON are stored using XML escaping(!) show = json_string['shows'][0] @@ -91,6 +96,7 @@ class RteRadioIE(InfoExtractor): description = unescapeHTML(show.get('description')) thumbnail = show.get('thumbnail') duration = float_or_none(show.get('duration'), 1000) + timestamp = parse_iso8601(show.get('published')) mg = show['media:group'][0] @@ -114,8 +120,9 @@ class RteRadioIE(InfoExtractor): return { 'id': item_id, 'title': title, - 'formats': formats, 'description': description, 'thumbnail': thumbnail, + 'timestamp': timestamp, 'duration': duration, + 'formats': formats, } From 3f17c357d9958bfae18f06676c89ffdb63553509 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 4 Jan 2016 18:35:31 +0100 Subject: [PATCH 0294/1105] [downloader/hls] Don't let ffmpeg read from stdin (#8139) If you run 'while read aurl ; do youtube-dl "${aurl}"; done < path_to_batch_file' (batch_file contains one url per line that uses the hls downloader) each call to youtube-dl consumed some characters and 'read' would assing to 'aurl' a non valid url (This is the same problem that was fixed for the ffmpeg postprocessors in cffcbc02de504d84e1c2677bb525c00b03e04f53) --- youtube_dl/downloader/hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index b5a3e1167..d186d402d 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -46,7 +46,7 @@ class HlsFD(FileDownloader): self._debug_cmd(args) - retval = subprocess.call(args) + retval = subprocess.call(args, stdin=subprocess.PIPE) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen('\r[%s] %s bytes' % (args[0], fsize)) From fff79f1867c5ac50eb7f88201dd3f728996d52a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Jan 2016 01:05:37 +0600 Subject: [PATCH 0295/1105] [amp] Add missing subtitles to info dict --- youtube_dl/extractor/amp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py index 1035d1c48..69e6baff7 100644 --- a/youtube_dl/extractor/amp.py +++ b/youtube_dl/extractor/amp.py @@ -76,5 +76,6 @@ class AMPIE(InfoExtractor): 'thumbnails': thumbnails, 'timestamp': parse_iso8601(item.get('pubDate'), ' '), 'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')), + 'subtitles': subtitles, 'formats': formats, } From 1dcc38b233f33112e00e1ddf0de6eb8ba1ef028e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Jan 2016 01:11:07 +0600 Subject: [PATCH 0296/1105] [dramafever] Improve subtitles extraction (Closes #8136) --- youtube_dl/extractor/dramafever.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index b3b21d65f..0e9e6f7ba 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -105,13 +105,16 @@ class DramaFeverIE(DramaFeverBaseIE): video_id, 'Downloading episode info JSON', fatal=False) if episode_info: value = episode_info.get('value') - if value: - subfile = value[0].get('subfile') or value[0].get('new_subfile') - if subfile and subfile != 'http://www.dramafever.com/st/': - info.setdefault('subtitles', {}).setdefault('English', []).append({ - 'ext': 'srt', - 'url': subfile, - }) + if isinstance(value, list): + for v in value: + if v.get('type') == 'Episode': + subfile = v.get('subfile') or v.get('new_subfile') + if subfile and subfile != 'http://www.dramafever.com/st/': + info.setdefault('subtitles', {}).setdefault('English', []).append({ + 'ext': 'srt', + 'url': subfile, + }) + break return info From 8f4c56f33412db909e92218f4933d5b08beb9f21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Jan 2016 01:17:33 +0600 Subject: [PATCH 0297/1105] [dramafever] Extract episode number --- youtube_dl/extractor/dramafever.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 0e9e6f7ba..8d71d6769 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -12,6 +12,7 @@ from ..compat import ( from ..utils import ( ExtractorError, clean_html, + int_or_none, sanitized_Request, ) @@ -114,6 +115,7 @@ class DramaFeverIE(DramaFeverBaseIE): 'ext': 'srt', 'url': subfile, }) + info['episode_number'] = int_or_none(v.get('number')) break return info From bd19aa0ed32ca4d041e691d47697ad7b82454b69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Jan 2016 01:28:48 +0600 Subject: [PATCH 0298/1105] [dramafever] Extract episode --- youtube_dl/extractor/dramafever.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 8d71d6769..a14021a9e 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -115,7 +115,12 @@ class DramaFeverIE(DramaFeverBaseIE): 'ext': 'srt', 'url': subfile, }) - info['episode_number'] = int_or_none(v.get('number')) + episode_number = int_or_none(v.get('number')) + episode_fallback = 'Episode' + if episode_number: + episode_fallback += ' %d' % episode_number + info['episode'] = v.get('title', episode_fallback) + info['episode_number'] = episode_number break return info From a2e51e7b49a193b326291842d03aa49dd08326f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Jan 2016 01:36:38 +0600 Subject: [PATCH 0299/1105] [dramafever] Fix episode fallback --- youtube_dl/extractor/dramafever.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index a14021a9e..9f5e82f56 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -119,7 +119,7 @@ class DramaFeverIE(DramaFeverBaseIE): episode_fallback = 'Episode' if episode_number: episode_fallback += ' %d' % episode_number - info['episode'] = v.get('title', episode_fallback) + info['episode'] = v.get('title') or episode_fallback info['episode_number'] = episode_number break From 33cee6c7f6260dd33a9fbafd9f374c70454ff0f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Jan 2016 01:41:18 +0600 Subject: [PATCH 0300/1105] [dramafever] Add test for custom episode title --- youtube_dl/extractor/dramafever.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index 9f5e82f56..d35e88881 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -67,13 +67,15 @@ class DramaFeverBaseIE(AMPIE): class DramaFeverIE(DramaFeverBaseIE): IE_NAME = 'dramafever' _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)' - _TEST = { + _TESTS = [{ 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', 'info_dict': { 'id': '4512.1', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Cooking with Shin 4512.1', 'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0', + 'episode': 'Episode 1', + 'episode_number': 1, 'thumbnail': 're:^https?://.*\.jpg', 'timestamp': 1404336058, 'upload_date': '20140702', @@ -83,7 +85,25 @@ class DramaFeverIE(DramaFeverBaseIE): # m3u8 download 'skip_download': True, }, - } + }, { + 'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1', + 'info_dict': { + 'id': '4826.4', + 'ext': 'mp4', + 'title': 'Mnet Asian Music Awards 2015 4826.4', + 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91', + 'episode': 'Mnet Asian Music Awards 2015 - Part 3', + 'episode_number': 4, + 'thumbnail': 're:^https?://.*\.jpg', + 'timestamp': 1450213200, + 'upload_date': '20151215', + 'duration': 5602, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] def _real_extract(self, url): video_id = self._match_id(url).replace('/', '.') From 18c782ab26a53958a7d54d39e06eba0cd920178b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Jan 2016 01:58:25 +0600 Subject: [PATCH 0301/1105] [vrt] Extend _VALUD_URL --- youtube_dl/extractor/vrt.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index bbd3bbf7b..ee158b7b3 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -8,7 +8,7 @@ from ..utils import float_or_none class VRTIE(InfoExtractor): - _VALID_URL = r'https?://(?:deredactie|sporza|cobra)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*' + _VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*' _TESTS = [ # deredactie.be { @@ -52,6 +52,10 @@ class VRTIE(InfoExtractor): 'duration': 661, } }, + { + 'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055', + 'only_matching': True, + } ] def _real_extract(self, url): From 2f546d0a3cc9b9fa7f022df68574f7009d7d1ffb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 5 Jan 2016 01:59:45 +0600 Subject: [PATCH 0302/1105] [vrt] Prefix format ids --- youtube_dl/extractor/vrt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index ee158b7b3..01891ac4c 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -73,11 +73,11 @@ class VRTIE(InfoExtractor): if mobj: formats.extend(self._extract_m3u8_formats( '%s/%s' % (mobj.group('server'), mobj.group('path')), - video_id, 'mp4')) + video_id, 'mp4', m3u8_id='hls')) mobj = re.search(r'data-video-src="(?P<src>[^"]+)"', webpage) if mobj: formats.extend(self._extract_f4m_formats( - '%s/manifest.f4m' % mobj.group('src'), video_id)) + '%s/manifest.f4m' % mobj.group('src'), video_id, f4m_id='hds')) self._sort_formats(formats) title = self._og_search_title(webpage) From 40f796288afe634055c4600993b353da58e29a45 Mon Sep 17 00:00:00 2001 From: Sergey M <dstftw@gmail.com> Date: Tue, 5 Jan 2016 02:17:12 +0600 Subject: [PATCH 0303/1105] [README.md] Clarify cookies usage --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4fc83b8e3..75253199c 100644 --- a/README.md +++ b/README.md @@ -627,7 +627,7 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows, `LF` (`\n`) for Linux and `CR` (`\r`) for Mac OS. `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. -Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. +Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare). ### Can you add support for this anime video site, or site which shows current movies for free? From 17b2d7ca772da5b709e00ca01e96e893807f7b66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 6 Jan 2016 00:02:21 +0600 Subject: [PATCH 0304/1105] [udemy] Detect non free courses (Closes #8138) --- youtube_dl/extractor/udemy.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 1df636779..e19c1f762 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -11,6 +11,7 @@ from ..utils import ( float_or_none, int_or_none, sanitized_Request, + unescapeHTML, ) @@ -19,8 +20,6 @@ class UdemyIE(InfoExtractor): _VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)' _LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1' _ORIGIN_URL = 'https://www.udemy.com' - _SUCCESSFULLY_ENROLLED = '>You have enrolled in this course!<' - _ALREADY_ENROLLED = '>You are already taking this course.<' _NETRC_MACHINE = 'udemy' _TESTS = [{ @@ -37,15 +36,21 @@ class UdemyIE(InfoExtractor): }] def _enroll_course(self, webpage, course_id): - enroll_url = self._search_regex( + checkout_url = unescapeHTML(self._search_regex( + r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/payment/checkout/.+?)\1', + webpage, 'checkout url', group='url', default=None)) + if checkout_url: + raise ExtractorError( + 'Course %s is not free. You have to pay for it before you can download.' + 'Use this URL to confirm purchase: %s' % (course_id, checkout_url), expected=True) + + enroll_url = unescapeHTML(self._search_regex( r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/course/subscribe/.+?)\1', - webpage, 'enroll url', group='url', - default='https://www.udemy.com/course/subscribe/?courseId=%s' % course_id) - webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course') - if self._SUCCESSFULLY_ENROLLED in webpage: - self.to_screen('%s: Successfully enrolled in' % course_id) - elif self._ALREADY_ENROLLED in webpage: - self.to_screen('%s: Already enrolled in' % course_id) + webpage, 'enroll url', group='url', default=None)) + if enroll_url: + webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course') + if '>You have enrolled in' in webpage: + self.to_screen('%s: Successfully enrolled in the course' % course_id) def _download_lecture(self, course_id, lecture_id): return self._download_json( From f20756fb10ec560177282f032684327f600acc34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 6 Jan 2016 00:03:39 +0600 Subject: [PATCH 0305/1105] [udemy] Fix non free course message --- youtube_dl/extractor/udemy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index e19c1f762..f5b5e7fd6 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -41,7 +41,7 @@ class UdemyIE(InfoExtractor): webpage, 'checkout url', group='url', default=None)) if checkout_url: raise ExtractorError( - 'Course %s is not free. You have to pay for it before you can download.' + 'Course %s is not free. You have to pay for it before you can download. ' 'Use this URL to confirm purchase: %s' % (course_id, checkout_url), expected=True) enroll_url = unescapeHTML(self._search_regex( From 76048b23e8a4aac93a33a96356fe64a9bcf78421 Mon Sep 17 00:00:00 2001 From: kaspi <je326@hotmail.com> Date: Sat, 17 Oct 2015 23:27:03 -0400 Subject: [PATCH 0306/1105] [npr] Add extractor removed md5 from _TEST moved from xml data to json test changed _TEST url to one that will not expire, so tests would not be failing --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/npr.py | 71 ++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 youtube_dl/extractor/npr.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 19cea5e99..c46e19eae 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -473,6 +473,7 @@ from .npo import ( VPROIE, WNLIE ) +from .npr import NprIE from .nrk import ( NRKIE, NRKPlaylistIE, diff --git a/youtube_dl/extractor/npr.py b/youtube_dl/extractor/npr.py new file mode 100644 index 000000000..a823bc096 --- /dev/null +++ b/youtube_dl/extractor/npr.py @@ -0,0 +1,71 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import os.path +import re + +from ..compat import compat_urllib_parse_unquote +from ..utils import url_basename +from .common import InfoExtractor + +class NprIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?npr\.org/player/v2/mediaPlayer.html?.*id=(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205', + 'info_dict': { + 'id': '449974205', + 'ext': 'mp4', + 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More' + } +} + + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage_url = 'http://www.npr.org/player/v2/mediaPlayer.html?id=' + video_id + webpage = self._download_webpage(webpage_url, video_id) + key = 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010' + xml_url = 'http://api.npr.org/query?id=%s&apiKey=%s' % (video_id, key) + json_url = 'http://api.npr.org/query?id=%s&apiKey=%s&format=json' % (video_id, key) + + formats = [] + entries = [] + + config = self._download_json(json_url, video_id) + + content = config["list"]["story"] + + album_title = config["list"]["story"][0]['song'][0]['album']['albumTitle'] + print album_title['$text'] + + for key in content: + if "audio" in key: + for x in key['audio']: + if x['type'] == 'standard': + playlist = True + song_duration = x["duration"]['$text'] + song_title = x["title"]["$text"] + song_id = x["id"] + + for k in x["format"]: + if type(x["format"][k]) is list: + for z in x["format"][k]: + formats.append({ 'format': z['type'], + 'url' : z['$text'] + }) + else: + formats.append({ 'format': k, + 'url' : x["format"][k]['$text'] + }) + + entries.append({ "title":song_title, + "id":song_id, + "duration": song_duration , + "formats":formats}) + formats = [] + + return { '_type': 'playlist', + 'id' : video_id, + 'title' : album_title, + 'entries': entries } From 51d3045de26fccbdcf0076c5cab8ab8152548bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 7 Jan 2016 01:57:36 +0600 Subject: [PATCH 0307/1105] [npr] Fix extractor (Closes #7218) --- youtube_dl/extractor/npr.py | 133 +++++++++++++++++++----------------- 1 file changed, 72 insertions(+), 61 deletions(-) diff --git a/youtube_dl/extractor/npr.py b/youtube_dl/extractor/npr.py index a823bc096..125c7010b 100644 --- a/youtube_dl/extractor/npr.py +++ b/youtube_dl/extractor/npr.py @@ -1,71 +1,82 @@ -# coding: utf-8 from __future__ import unicode_literals -import os.path -import re - -from ..compat import compat_urllib_parse_unquote -from ..utils import url_basename from .common import InfoExtractor +from ..compat import compat_urllib_parse +from ..utils import ( + int_or_none, + qualities, +) + class NprIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?npr\.org/player/v2/mediaPlayer.html?.*id=(?P<id>[0-9]+)' - _TEST = { - 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205', - 'info_dict': { - 'id': '449974205', - 'ext': 'mp4', - 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More' - } -} - + _VALID_URL = r'http://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205', + 'info_dict': { + 'id': '449974205', + 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More' + }, + 'playlist_count': 7, + }, { + 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?action=1&t=1&islist=false&id=446928052&m=446929930&live=1', + 'info_dict': { + 'id': '446928052', + 'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'" + }, + 'playlist': [{ + 'md5': '12fa60cb2d3ed932f53609d4aeceabf1', + 'info_dict': { + 'id': '446929930', + 'ext': 'mp3', + 'title': 'Your Mercy is Boundless (Bazum en Qo gtutyunqd)', + 'duration': 402, + }, + }], + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - webpage_url = 'http://www.npr.org/player/v2/mediaPlayer.html?id=' + video_id - webpage = self._download_webpage(webpage_url, video_id) - key = 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010' - xml_url = 'http://api.npr.org/query?id=%s&apiKey=%s' % (video_id, key) - json_url = 'http://api.npr.org/query?id=%s&apiKey=%s&format=json' % (video_id, key) + playlist_id = self._match_id(url) + + config = self._download_json( + 'http://api.npr.org/query?%s' % compat_urllib_parse.urlencode({ + 'id': playlist_id, + 'fields': 'titles,audio,show', + 'format': 'json', + 'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010', + }), playlist_id) + + story = config['list']['story'][0] + + KNOWN_FORMATS = ('threegp', 'mp4', 'mp3') + quality = qualities(KNOWN_FORMATS) - formats = [] entries = [] + for audio in story.get('audio', []): + title = audio.get('title', {}).get('$text') + duration = int_or_none(audio.get('duration', {}).get('$text')) + formats = [] + for format_id, formats_entry in audio.get('format', {}).items(): + if not formats_entry: + continue + if isinstance(formats_entry, list): + formats_entry = formats_entry[0] + format_url = formats_entry.get('$text') + if not format_url: + continue + if format_id in KNOWN_FORMATS: + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'ext': formats_entry.get('type'), + 'quality': quality(format_id), + }) + self._sort_formats(formats) + entries.append({ + 'id': audio['id'], + 'title': title, + 'duration': duration, + 'formats': formats, + }) - config = self._download_json(json_url, video_id) - - content = config["list"]["story"] - - album_title = config["list"]["story"][0]['song'][0]['album']['albumTitle'] - print album_title['$text'] - - for key in content: - if "audio" in key: - for x in key['audio']: - if x['type'] == 'standard': - playlist = True - song_duration = x["duration"]['$text'] - song_title = x["title"]["$text"] - song_id = x["id"] - - for k in x["format"]: - if type(x["format"][k]) is list: - for z in x["format"][k]: - formats.append({ 'format': z['type'], - 'url' : z['$text'] - }) - else: - formats.append({ 'format': k, - 'url' : x["format"][k]['$text'] - }) - - entries.append({ "title":song_title, - "id":song_id, - "duration": song_duration , - "formats":formats}) - formats = [] - - return { '_type': 'playlist', - 'id' : video_id, - 'title' : album_title, - 'entries': entries } + playlist_title = story.get('title', {}).get('$text') + return self.playlist_result(entries, playlist_id, playlist_title) From e4f49a87533bd6e3ebf11b43c3baf0e32db2f5ca Mon Sep 17 00:00:00 2001 From: oittaa <oittaa@users.noreply.github.com> Date: Thu, 7 Jan 2016 07:26:14 +0200 Subject: [PATCH 0308/1105] check video_play_path and use xpath_text "This check should take place earlier and should be more general if not video_url:. Same should be done for video_play_path. Also these fields better extracted with xpath_text." Suggestions by @dstftw --- youtube_dl/extractor/crunchyroll.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 00d943f77..785594df8 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -329,8 +329,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text streamdata_req, video_id, note='Downloading media info for %s' % video_format) stream_info = streamdata.find('./{default}preload/stream_info') - video_url = stream_info.find('./host').text - video_play_path = stream_info.find('./file').text + video_url = xpath_text(stream_info, './host') + video_play_path = xpath_text(stream_info, './file') + if not video_url or not video_play_path: + continue metadata = stream_info.find('./metadata') format_info = { 'format': video_format, From 2e02ecbccc9751af54ae2aab9ce973712c78af74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 7 Jan 2016 12:24:32 +0600 Subject: [PATCH 0309/1105] [ivideon] Add extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ivideon.py | 75 ++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 youtube_dl/extractor/ivideon.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c46e19eae..a9d23b8f4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -299,6 +299,7 @@ from .ivi import ( IviIE, IviCompilationIE ) +from .ivideon import IvideonIE from .izlesene import IzleseneIE from .jadorecettepub import JadoreCettePubIE from .jeuxvideo import JeuxVideoIE diff --git a/youtube_dl/extractor/ivideon.py b/youtube_dl/extractor/ivideon.py new file mode 100644 index 000000000..aa05f5413 --- /dev/null +++ b/youtube_dl/extractor/ivideon.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urllib_parse +from ..utils import qualities + + +class IvideonIE(InfoExtractor): + IE_NAME = 'ivideon' + IE_DESC = 'Ivideon TV' + _VALID_URL = r'https?://(?:www\.)?ivideon\.com/tv/camera/(?P<id>\d+-[\da-f]+)/(?P<camera_id>\d+)' + _TESTS = [{ + 'url': 'https://www.ivideon.com/tv/camera/100-916ca13b5c4ad9f564266424a026386d/0/', + 'info_dict': { + 'id': '100-916ca13b5c4ad9f564266424a026386d', + 'ext': 'flv', + 'title': 're:^Касса [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'Основное предназначение - запись действий кассиров. Плюс общий вид.', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'https://www.ivideon.com/tv/camera/100-c4ee4cb9ede885cf62dfbe93d7b53783/589824/?lang=ru', + 'only_matching': True, + }] + + _QUALITIES = ('low', 'mid', 'hi') + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + server_id, camera_id = mobj.group('id'), mobj.group('camera_id') + camera_name, description = None, None + + webpage = self._download_webpage(url, server_id, fatal=False) + if webpage: + config_string = self._search_regex( + r'var\s+config\s*=\s*({.+?});', webpage, 'config', default=None) + if config_string: + config = self._parse_json(config_string, server_id, fatal=False) + camera_info = config.get('ivTvAppOptions', {}).get('currentCameraInfo') + if camera_info: + camera_name = camera_info.get('camera_name') + description = camera_info.get('misc', {}).get('description') + if not camera_name: + camera_name = self._html_search_meta( + 'name', webpage, 'camera name', default=None) or self._search_regex( + r'<h1[^>]+class="b-video-title"[^>]*>([^<]+)', webpage, 'camera name', default=None) + + quality = qualities(self._QUALITIES) + + formats = [{ + 'url': 'https://streaming.ivideon.com/flv/live?%s' % compat_urllib_parse.urlencode({ + 'server': server_id, + 'camera': camera_id, + 'sessionId': 'demo', + 'q': quality(format_id), + }), + 'format_id': format_id, + 'ext': 'flv', + 'quality': quality(format_id), + } for format_id in self._QUALITIES] + self._sort_formats(formats) + + return { + 'id': server_id, + 'title': self._live_title(camera_name or server_id), + 'description': description, + 'is_live': True, + 'formats': formats, + } From 23f13e97548cc4ff3d11408ee5bc77f682e642dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Jan 2016 00:52:55 +0600 Subject: [PATCH 0310/1105] [youtube] Support expanding alternative format of links in description (Closes #8164) --- youtube_dl/extractor/youtube.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4aac2cc03..897641e42 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1235,10 +1235,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_description = re.sub(r'''(?x) <a\s+ (?:[a-zA-Z-]+="[^"]+"\s+)*? - title="([^"]+)"\s+ + (?:title|href)="([^"]+)"\s+ (?:[a-zA-Z-]+="[^"]+"\s+)*? - class="yt-uix-redirect-link"\s*> - [^<]+ + class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)".*?> + [^<]+\.{3}\s* </a> ''', r'\1', video_description) video_description = clean_html(video_description) From 97afd99a18e4723e4ff588df456c7aec62967b35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Jan 2016 01:54:31 +0600 Subject: [PATCH 0311/1105] [soundcloud:likes] Adapt to API changes (Closes #8166) --- youtube_dl/extractor/soundcloud.py | 38 +++++++++++++++--------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 02e64e094..b2d5487ca 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -384,27 +384,24 @@ class SoundcloudUserIE(SoundcloudIE): resource = mobj.group('rsrc') or 'all' base_url = self._BASE_URL_MAP[resource] % user['id'] - next_href = None + COMMON_QUERY = { + 'limit': 50, + 'client_id': self._CLIENT_ID, + 'linked_partitioning': '1', + } + + query = COMMON_QUERY.copy() + query['offset'] = 0 + + next_href = base_url + '?' + compat_urllib_parse.urlencode(query) entries = [] for i in itertools.count(): - if not next_href: - data = compat_urllib_parse.urlencode({ - 'offset': i * 50, - 'limit': 50, - 'client_id': self._CLIENT_ID, - 'linked_partitioning': '1', - 'representation': 'speedy', - }) - next_href = base_url + '?' + data - response = self._download_json( next_href, uploader, 'Downloading track page %s' % (i + 1)) collection = response['collection'] - if not collection: - self.to_screen('%s: End page received' % uploader) break def resolve_permalink_url(candidates): @@ -419,12 +416,15 @@ class SoundcloudUserIE(SoundcloudIE): if permalink_url: entries.append(self.url_result(permalink_url)) - if 'next_href' in response: - next_href = response['next_href'] - if not next_href: - break - else: - next_href = None + next_href = response.get('next_href') + if not next_href: + break + + parsed_next_href = compat_urlparse.urlparse(response['next_href']) + qs = compat_urlparse.parse_qs(parsed_next_href.query) + qs.update(COMMON_QUERY) + next_href = compat_urlparse.urlunparse( + parsed_next_href._replace(query=compat_urllib_parse.urlencode(qs, True))) return { '_type': 'playlist', From 18e6c97c48f883911649d9b3d64127379a8b1df4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Jan 2016 03:19:47 +0600 Subject: [PATCH 0312/1105] [adultswim] Skip georestricted hls (Closes #8168) --- youtube_dl/extractor/adultswim.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index bf21a6887..8157da2cb 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -187,7 +187,8 @@ class AdultSwimIE(InfoExtractor): media_url = file_el.text if determine_ext(media_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( - media_url, segment_title, 'mp4', preference=0, m3u8_id='hls')) + media_url, segment_title, 'mp4', preference=0, + m3u8_id='hls', fatal=False)) else: formats.append({ 'format_id': '%s_%s' % (bitrate, ftype), From 4cf096a4a99c2cc29708f23244e4433d91c83b65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Jan 2016 05:11:23 +0600 Subject: [PATCH 0313/1105] [ivideon] Add support for map bound URLs --- youtube_dl/extractor/ivideon.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ivideon.py b/youtube_dl/extractor/ivideon.py index aa05f5413..617dc8c07 100644 --- a/youtube_dl/extractor/ivideon.py +++ b/youtube_dl/extractor/ivideon.py @@ -4,14 +4,17 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import ( + compat_urllib_parse, + compat_urlparse, +) from ..utils import qualities class IvideonIE(InfoExtractor): IE_NAME = 'ivideon' IE_DESC = 'Ivideon TV' - _VALID_URL = r'https?://(?:www\.)?ivideon\.com/tv/camera/(?P<id>\d+-[\da-f]+)/(?P<camera_id>\d+)' + _VALID_URL = r'https?://(?:www\.)?ivideon\.com/tv/(?:[^/]+/)*camera/(?P<id>\d+-[\da-f]+)/(?P<camera_id>\d+)' _TESTS = [{ 'url': 'https://www.ivideon.com/tv/camera/100-916ca13b5c4ad9f564266424a026386d/0/', 'info_dict': { @@ -27,6 +30,9 @@ class IvideonIE(InfoExtractor): }, { 'url': 'https://www.ivideon.com/tv/camera/100-c4ee4cb9ede885cf62dfbe93d7b53783/589824/?lang=ru', 'only_matching': True, + }, { + 'url': 'https://www.ivideon.com/tv/map/22.917923/-31.816406/16/camera/100-e7bc16c7d4b5bbd633fd5350b66dfa9a/0', + 'only_matching': True, }] _QUALITIES = ('low', 'mid', 'hi') @@ -35,8 +41,10 @@ class IvideonIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) server_id, camera_id = mobj.group('id'), mobj.group('camera_id') camera_name, description = None, None + camera_url = compat_urlparse.urljoin( + url, '/tv/camera/%s/%s/' % (server_id, camera_id)) - webpage = self._download_webpage(url, server_id, fatal=False) + webpage = self._download_webpage(camera_url, server_id, fatal=False) if webpage: config_string = self._search_regex( r'var\s+config\s*=\s*({.+?});', webpage, 'config', default=None) From 5dbe81a1d35ae704b5ea208698a6bb785923d71a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 8 Jan 2016 10:41:24 +0600 Subject: [PATCH 0314/1105] [vimeo] Automatically pickup full movie when rented (Closes #8171) --- youtube_dl/extractor/vimeo.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 7af699982..776e8cce4 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -217,7 +217,7 @@ class VimeoIE(VimeoBaseInfoExtractor): r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) if mobj: player_url = unescapeHTML(mobj.group('url')) - surl = smuggle_url(player_url, {'Referer': url}) + surl = smuggle_url(player_url, {'http_headers': {'Referer': url}}) return surl # Look for embedded (swf embed) Vimeo player mobj = re.search( @@ -262,11 +262,11 @@ class VimeoIE(VimeoBaseInfoExtractor): self._login() def _real_extract(self, url): - url, data = unsmuggle_url(url) + url, data = unsmuggle_url(url, {}) headers = std_headers - if data is not None: + if 'http_headers' in data: headers = headers.copy() - headers.update(data) + headers.update(data['http_headers']) if 'Referer' not in headers: headers['Referer'] = url @@ -342,7 +342,7 @@ class VimeoIE(VimeoBaseInfoExtractor): raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None: - if data and '_video_password_verified' in data: + if '_video_password_verified' in data: raise ExtractorError('video password verification failed!') self._verify_video_password(url, video_id, webpage) return self._real_extract( @@ -354,6 +354,13 @@ class VimeoIE(VimeoBaseInfoExtractor): if config.get('view') == 4: config = self._verify_player_video_password(url, video_id) + if '>You rented this title.<' in webpage: + feature_id = config.get('video', {}).get('vod', {}).get('feature_id') + if feature_id and not data.get('force_feature_id', False): + return self.url_result(smuggle_url( + 'https://player.vimeo.com/player/%s' % feature_id, + {'force_feature_id': True}), 'Vimeo') + # Extract title video_title = config["video"]["title"] From 5777f5d386d14407a19bc86c31e0bf2b5ae4a87f Mon Sep 17 00:00:00 2001 From: atomic83 <atomic83@riseup.net> Date: Fri, 8 Jan 2016 12:58:05 +0100 Subject: [PATCH 0315/1105] Extract xHamster title fix --- youtube_dl/extractor/xhamster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 8938c0e45..261d323a7 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -65,7 +65,7 @@ class XHamsterIE(InfoExtractor): title = self._html_search_regex( [r'<title>(?P<title>.+?)(?:, (?:[^,]+? )?Porn: xHamster| - xHamster\.com)', - r'

([^<]+)

'], webpage, 'title') + r'([^<]+)'], webpage, 'title') # Only a few videos have an description mobj = re.search(r'Description: ([^<]+)', webpage) From 6609b3ce371d08c0464a3caa30c21b61de221508 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jan 2016 00:19:36 +0600 Subject: [PATCH 0316/1105] [xhamster] Improve title extraction --- youtube_dl/extractor/xhamster.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 261d323a7..97355d17f 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -64,8 +64,9 @@ class XHamsterIE(InfoExtractor): webpage = self._download_webpage(mrss_url, video_id) title = self._html_search_regex( - [r'(?P<title>.+?)(?:, (?:[^,]+? )?Porn: xHamster| - xHamster\.com)', - r'([^<]+)'], webpage, 'title') + [r']*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)', + r']*>([^<]+)', + r']+itemprop=".*?caption.*?"[^>]+content="(.+?)"'], webpage, 'title') # Only a few videos have an description mobj = re.search(r'Description: ([^<]+)', webpage) From 4763b624a6655bc2333157031c73858f6b918f61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jan 2016 00:21:57 +0600 Subject: [PATCH 0317/1105] [xhamster] Fix upload date extraction --- youtube_dl/extractor/xhamster.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 97355d17f..560c38e26 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -72,10 +72,9 @@ class XHamsterIE(InfoExtractor): mobj = re.search(r'Description: ([^<]+)', webpage) description = mobj.group(1) if mobj else None - upload_date = self._html_search_regex(r'hint=\'(\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}\'', - webpage, 'upload date', fatal=False) - if upload_date: - upload_date = unified_strdate(upload_date) + upload_date = unified_strdate(self._search_regex( + r'hint=["\'](\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}', + webpage, 'upload date', fatal=False)) uploader = self._html_search_regex( r"
]+>(?P[^<]+)", From 44731e308cdcc89fda1e613e094aa23de33f5c9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jan 2016 00:26:37 +0600 Subject: [PATCH 0318/1105] [xhamster] Fix duration extraction --- youtube_dl/extractor/xhamster.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 560c38e26..2b9ac2419 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -4,10 +4,10 @@ import re from .common import InfoExtractor from ..utils import ( - unified_strdate, - str_to_int, + float_or_none, int_or_none, - parse_duration, + str_to_int, + unified_strdate, ) @@ -85,8 +85,9 @@ class XHamsterIE(InfoExtractor): r''']+poster=(?P["'])(?P.+?)(?P=q)[^>]*>'''], webpage, 'thumbnail', fatal=False, group='thumbnail') - duration = parse_duration(self._html_search_regex(r'Runtime: (\d+:\d+)
', - webpage, 'duration', fatal=False)) + duration = float_or_none(self._search_regex( + r'(["\'])duration\1\s*:\s*(["\'])(?P.+?)\2', + webpage, 'duration', fatal=False, group='duration')) view_count = self._html_search_regex(r'Views: ([^<]+)', webpage, 'view count', fatal=False) if view_count: From 6a16fd4a1ad1dbd9372f75ddffce1e9fe95b002c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jan 2016 00:29:10 +0600 Subject: [PATCH 0319/1105] [xhamster] Fix view count extraction --- youtube_dl/extractor/xhamster.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 2b9ac2419..ccee77359 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -89,9 +89,9 @@ class XHamsterIE(InfoExtractor): r'(["\'])duration\1\s*:\s*(["\'])(?P.+?)\2', webpage, 'duration', fatal=False, group='duration')) - view_count = self._html_search_regex(r'Views: ([^<]+)', webpage, 'view count', fatal=False) - if view_count: - view_count = str_to_int(view_count) + view_count = int_or_none(self._search_regex( + r'content=["\']User(?:View|Play)s:(\d+)', + webpage, 'view count', fatal=False)) mobj = re.search(r"hint='(?P\d+) Likes / (?P\d+) Dislikes'", webpage) (like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None) From 1a6d92847f4e5fe1d94f2f159f903a414d5ff62c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jan 2016 00:31:24 +0600 Subject: [PATCH 0320/1105] [xhamster] Change title regex precedence --- youtube_dl/extractor/xhamster.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index ccee77359..c9108c345 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -64,9 +64,10 @@ class XHamsterIE(InfoExtractor): webpage = self._download_webpage(mrss_url, video_id) title = self._html_search_regex( - [r']*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)', - r']*>([^<]+)', - r']+itemprop=".*?caption.*?"[^>]+content="(.+?)"'], webpage, 'title') + [r']*>([^<]+)', + r']+itemprop=".*?caption.*?"[^>]+content="(.+?)"', + r']*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)'], + webpage, 'title') # Only a few videos have an description mobj = re.search(r'Description: ([^<]+)', webpage) From bcac2a071040fd89585a806ca66e086c109406c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jan 2016 00:36:19 +0600 Subject: [PATCH 0321/1105] [xhamster] Fix uploader extraction --- youtube_dl/extractor/xhamster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index c9108c345..981ee284f 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -78,7 +78,7 @@ class XHamsterIE(InfoExtractor): webpage, 'upload date', fatal=False)) uploader = self._html_search_regex( - r"]+>(?P[^<]+)", + r']+itemprop=["\']author[^>]+>]+href=["\'].+?xhamster\.com/user/[^>]+>(?P.+?)', webpage, 'uploader', default='anonymous') thumbnail = self._search_regex( From 14b4f038c0123138a642a25bf6a3851dd610d1d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jan 2016 00:36:43 +0600 Subject: [PATCH 0322/1105] [xhamster] Update tests --- youtube_dl/extractor/xhamster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 981ee284f..066af184f 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -22,7 +22,7 @@ class XHamsterIE(InfoExtractor): 'title': 'FemaleAgent Shy beauty takes the bait', 'upload_date': '20121014', 'uploader': 'Ruseful2011', - 'duration': 893, + 'duration': 893.52, 'age_limit': 18, } }, @@ -34,7 +34,7 @@ class XHamsterIE(InfoExtractor): 'title': 'Britney Spears Sexy Booty', 'upload_date': '20130914', 'uploader': 'jojo747400', - 'duration': 200, + 'duration': 200.48, 'age_limit': 18, } }, From d5f071afb5ce302058d99a14989f3f6afb88b1a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jan 2016 03:06:09 +0600 Subject: [PATCH 0323/1105] [vimeo] Check source file URL (Closes #8187) --- youtube_dl/extractor/vimeo.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 776e8cce4..5cb01907b 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -11,6 +11,7 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + determine_ext, encode_dict, ExtractorError, InAdvancePagedList, @@ -419,16 +420,21 @@ class VimeoIE(VimeoBaseInfoExtractor): download_data = self._download_json(download_request, video_id, fatal=False) if download_data: source_file = download_data.get('source_file') - if source_file and not source_file.get('is_cold') and not source_file.get('is_defrosting'): - formats.append({ - 'url': source_file['download_url'], - 'ext': source_file['extension'].lower(), - 'width': int_or_none(source_file.get('width')), - 'height': int_or_none(source_file.get('height')), - 'filesize': parse_filesize(source_file.get('size')), - 'format_id': source_file.get('public_name', 'Original'), - 'preference': 1, - }) + if isinstance(source_file, dict): + download_url = source_file.get('download_url') + if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): + source_name = source_file.get('public_name', 'Original') + if self._is_valid_url(download_url, video_id, '%s video' % source_name): + ext = source_file.get('extension', determine_ext(download_url)).lower(), + formats.append({ + 'url': download_url, + 'ext': ext, + 'width': int_or_none(source_file.get('width')), + 'height': int_or_none(source_file.get('height')), + 'filesize': parse_filesize(source_file.get('size')), + 'format_id': source_name, + 'preference': 1, + }) config_files = config['video'].get('files') or config['request'].get('files', {}) for f in config_files.get('progressive', []): video_url = f.get('url') From 16f1131a4d46c4437485d4b075e9d1243ca8d60c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jan 2016 03:07:29 +0600 Subject: [PATCH 0324/1105] [vimeo] Add test for #8187 --- youtube_dl/extractor/vimeo.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 5cb01907b..76e681bc3 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -209,6 +209,11 @@ class VimeoIE(VimeoBaseInfoExtractor): 'url': 'https://vimeo.com/groups/travelhd/videos/22439234', 'only_matching': True, }, + { + # source file returns 403: Forbidden + 'url': 'https://vimeo.com/7809605', + 'only_matching': True, + }, ] @staticmethod From b374af6ebdf6298ab1593bee56dba5b0a1daf9b7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 9 Jan 2016 01:16:08 +0100 Subject: [PATCH 0325/1105] release 2016.01.09 --- docs/supportedsites.md | 10 +++++++--- youtube_dl/version.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 84c166805..8d0c7b97a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1,6 +1,7 @@ # Supported sites - **1tv**: Первый канал - **1up.com** + - **20min** - **220.ro** - **22tracks:genre** - **22tracks:track** @@ -255,6 +256,7 @@ - **Ir90Tv** - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations + - **ivideon**: Ivideon TV - **Izlesene** - **JadoreCettePub** - **JeuxVideo** @@ -386,13 +388,14 @@ - **nowness** - **nowness:playlist** - **nowness:series** - - **NowTV** + - **NowTV** (Currently broken) - **NowTVList** - **nowvideo**: NowVideo - **npo**: npo.nl and ntr.nl - **npo.nl:live** - **npo.nl:radio** - **npo.nl:radio:fragment** + - **Npr** - **NRK** - **NRKPlaylist** - **NRKTV**: NRK TV and NRK Radio @@ -464,11 +467,13 @@ - **RegioTV** - **Restudy** - **ReverbNation** + - **Revision3** - **RingTV** - **RottenTomatoes** - **Roxwel** - **RTBF** - - **Rte** + - **rte**: Raidió Teilifís Éireann TV + - **rte:radio**: Raidió Teilifís Éireann radio - **rtl.nl**: rtl.nl and rtlxl.nl - **RTL2** - **RTP** @@ -573,7 +578,6 @@ - **TeleMB** - **TeleTask** - **TenPlay** - - **TestTube** - **TF1** - **TheIntercept** - **TheOnion** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 790bd5b3b..7030903c0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.01.01' +__version__ = '2016.01.09' From 27783821af5a85fb20266834cff4f07eb0695cb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 9 Jan 2016 11:16:23 +0100 Subject: [PATCH 0326/1105] [xhamster] Remove unused import --- youtube_dl/extractor/xhamster.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 066af184f..fd43e8854 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -6,7 +6,6 @@ from .common import InfoExtractor from ..utils import ( float_or_none, int_or_none, - str_to_int, unified_strdate, ) From 7a34302e95ee66f770b1eba720847831c40121a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Jan 2016 01:37:10 +0600 Subject: [PATCH 0327/1105] [canalc2] Fix extraction (Closes #8191) --- youtube_dl/extractor/canalc2.py | 46 +++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py index f6a1ff381..f1f128c45 100644 --- a/youtube_dl/extractor/canalc2.py +++ b/youtube_dl/extractor/canalc2.py @@ -9,9 +9,9 @@ from ..utils import parse_duration class Canalc2IE(InfoExtractor): IE_NAME = 'canalc2.tv' - _VALID_URL = r'https?://(?:www\.)?canalc2\.tv/video/(?P\d+)' + _VALID_URL = r'https?://(?:(?:www\.)?canalc2\.tv/video/|archives-canalc2\.u-strasbg\.fr/video\.asp\?.*\bidVideo=)(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.canalc2.tv/video/12163', 'md5': '060158428b650f896c542dfbb3d6487f', 'info_dict': { @@ -23,24 +23,36 @@ class Canalc2IE(InfoExtractor): 'params': { 'skip_download': True, # Requires rtmpdump } - } + }, { + 'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - video_url = self._search_regex( - r'jwplayer\((["\'])Player\1\)\.setup\({[^}]*file\s*:\s*(["\'])(?P.+?)\2', - webpage, 'video_url', group='file') - formats = [{'url': video_url}] - if video_url.startswith('rtmp://'): - rtmp = re.search(r'^(?Prtmp://[^/]+/(?P.+/))(?Pmp4:.+)$', video_url) - formats[0].update({ - 'url': rtmp.group('url'), - 'ext': 'flv', - 'app': rtmp.group('app'), - 'play_path': rtmp.group('play_path'), - 'page_url': url, - }) + + webpage = self._download_webpage( + 'http://www.canalc2.tv/video/%s' % video_id, video_id) + + formats = [] + for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage): + if video_url.startswith('rtmp://'): + rtmp = re.search( + r'^(?Prtmp://[^/]+/(?P.+/))(?Pmp4:.+)$', video_url) + formats.append({ + 'url': rtmp.group('url'), + 'format_id': 'rtmp', + 'ext': 'flv', + 'app': rtmp.group('app'), + 'play_path': rtmp.group('play_path'), + 'page_url': url, + }) + else: + formats.append({ + 'url': video_url, + 'format_id': 'http', + }) + self._sort_formats(formats) title = self._html_search_regex( r'(?s)class="[^"]*col_description[^"]*">.*?

(.*?)

', webpage, 'title') From 0a899a1448328648927e18e43c6e2274d2706396 Mon Sep 17 00:00:00 2001 From: Vignesh Venkat Date: Sat, 9 Jan 2016 15:31:50 -0800 Subject: [PATCH 0328/1105] [Bigflix] Add new extractor for bigflix.com Add an IE to support bigflix.com. It uses some sort of silverlight plugin whose video url is being populated using base64 encoded flashvars. So it is quite straightforward to extract. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/bigflix.py | 42 ++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/bigflix.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a9d23b8f4..40c42d4d2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -61,6 +61,7 @@ from .beeg import BeegIE from .behindkink import BehindKinkIE from .beatportpro import BeatportProIE from .bet import BetIE +from .bigflix import BigflixIE from .bild import BildIE from .bilibili import BiliBiliIE from .bleacherreport import ( diff --git a/youtube_dl/extractor/bigflix.py b/youtube_dl/extractor/bigflix.py new file mode 100644 index 000000000..aeea1a002 --- /dev/null +++ b/youtube_dl/extractor/bigflix.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from base64 import b64decode + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote + + +class BigflixIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.*/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537', + 'md5': 'ec76aa9b1129e2e5b301a474e54fab74', + 'info_dict': { + 'id': '16537', + 'ext': 'mp4', + 'title': 'Singham Returns', + 'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r']+class=["\']pagetitle["\'][^>]*>(.+?)', + webpage, 'title') + + video_url = b64decode(compat_urllib_parse_unquote(self._search_regex( + r'file=([^&]+)', webpage, 'video url')).encode('ascii')).decode('utf-8') + + description = self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'description': description, + } From 15b1c6656fd8b384bd974f4d943a04b8bf8ca915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Jan 2016 10:03:56 +0600 Subject: [PATCH 0329/1105] Credit @vickyg3 for bigflix (#8194) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index e39d07efe..3d8bebbb0 100644 --- a/AUTHORS +++ b/AUTHORS @@ -151,3 +151,4 @@ Muratcan Simsek Evan Lu flatgreen Brian Foley +Vignesh Venkat From 6e99d5762a844b44cad4cae144045ed0537c084b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Jan 2016 10:31:36 +0600 Subject: [PATCH 0330/1105] [bigflix] Extract all formats --- youtube_dl/extractor/bigflix.py | 42 +++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/bigflix.py b/youtube_dl/extractor/bigflix.py index aeea1a002..b7e498436 100644 --- a/youtube_dl/extractor/bigflix.py +++ b/youtube_dl/extractor/bigflix.py @@ -1,15 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals -from base64 import b64decode +import base64 +import re from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote class BigflixIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.*/(?P[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P[0-9]+)' + _TESTS = [{ 'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537', 'md5': 'ec76aa9b1129e2e5b301a474e54fab74', 'info_dict': { @@ -18,7 +19,20 @@ class BigflixIE(InfoExtractor): 'title': 'Singham Returns', 'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d', } - } + }, { + # multiple formats + 'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070', + 'info_dict': { + 'id': '16070', + 'ext': 'mp4', + 'title': 'Madarasapatinam', + 'description': 'md5:63b9b8ed79189c6f0418c26d9a3452ca', + 'formats': 'mincount:2', + }, + 'params': { + 'skip_download': True, + } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -29,14 +43,28 @@ class BigflixIE(InfoExtractor): r']+class=["\']pagetitle["\'][^>]*>(.+?)', webpage, 'title') - video_url = b64decode(compat_urllib_parse_unquote(self._search_regex( - r'file=([^&]+)', webpage, 'video url')).encode('ascii')).decode('utf-8') + def decode_url(quoted_b64_url): + return base64.b64decode(compat_urllib_parse_unquote( + quoted_b64_url)).encode('ascii').decode('utf-8') + + formats = [{ + 'url': decode_url(encoded_url), + 'format_id': '%sp' % height, + 'height': int(height), + } for height, encoded_url in re.findall( + r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage)] + + if not formats: + formats.append({ + 'url': decode_url(self._search_regex( + r'file=([^&]+)', webpage, 'video url')), + }) description = self._html_search_meta('description', webpage) return { 'id': video_id, 'title': title, - 'url': video_url, 'description': description, + 'formats': formats } From a9bbd26f1d2bb45205f9fbd2626569522049e40e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Jan 2016 10:49:27 +0600 Subject: [PATCH 0331/1105] [bigflix] Improve formats extraction --- youtube_dl/extractor/bigflix.py | 41 ++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/bigflix.py b/youtube_dl/extractor/bigflix.py index b7e498436..24720de86 100644 --- a/youtube_dl/extractor/bigflix.py +++ b/youtube_dl/extractor/bigflix.py @@ -20,7 +20,7 @@ class BigflixIE(InfoExtractor): 'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d', } }, { - # multiple formats + # 2 formats 'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070', 'info_dict': { 'id': '16070', @@ -32,6 +32,10 @@ class BigflixIE(InfoExtractor): 'params': { 'skip_download': True, } + }, { + # multiple formats + 'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967', + 'only_matching': True, }] def _real_extract(self, url): @@ -45,20 +49,31 @@ class BigflixIE(InfoExtractor): def decode_url(quoted_b64_url): return base64.b64decode(compat_urllib_parse_unquote( - quoted_b64_url)).encode('ascii').decode('utf-8') + quoted_b64_url).encode('ascii')).decode('utf-8') - formats = [{ - 'url': decode_url(encoded_url), - 'format_id': '%sp' % height, - 'height': int(height), - } for height, encoded_url in re.findall( - r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage)] + formats = [] + for height, encoded_url in re.findall( + r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage): + video_url = decode_url(encoded_url) + f = { + 'url': video_url, + 'format_id': '%sp' % height, + 'height': int(height), + } + if video_url.startswith('rtmp'): + f['ext'] = 'flv' + formats.append(f) - if not formats: - formats.append({ - 'url': decode_url(self._search_regex( - r'file=([^&]+)', webpage, 'video url')), - }) + file_url = self._search_regex( + r'file=([^&]+)', webpage, 'video url', default=None) + if file_url: + video_url = decode_url(file_url) + if all(f['url'] != video_url for f in formats): + formats.append({ + 'url': decode_url(file_url), + }) + + self._sort_formats(formats) description = self._html_search_meta('description', webpage) From 3fc088f8c7f61026943a62fc28e051a7d3a6bdd5 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 10 Jan 2016 07:45:41 +0100 Subject: [PATCH 0332/1105] [dcn] extract video ids in season entries --- youtube_dl/extractor/dcn.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dcn.py b/youtube_dl/extractor/dcn.py index 8f48571de..15a1c40f7 100644 --- a/youtube_dl/extractor/dcn.py +++ b/youtube_dl/extractor/dcn.py @@ -5,7 +5,10 @@ import re import base64 from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import ( + compat_urllib_parse, + compat_str, +) from ..utils import ( int_or_none, parse_iso8601, @@ -186,7 +189,8 @@ class DCNSeasonIE(InfoExtractor): entries = [] for video in show['videos']: + video_id = compat_str(video['id']) entries.append(self.url_result( - 'http://www.dcndigital.ae/media/%s' % video['id'], 'DCNVideo')) + 'http://www.dcndigital.ae/media/%s' % video_id, 'DCNVideo', video_id)) return self.playlist_result(entries, season_id, title) From 2334762b03dec5da4d6788539e3e11192eb97010 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 10 Jan 2016 07:55:58 +0100 Subject: [PATCH 0333/1105] [shahid] raise ExtractorError if the video is DRM protected --- youtube_dl/extractor/shahid.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py index f76fb12c0..1178b7a27 100644 --- a/youtube_dl/extractor/shahid.py +++ b/youtube_dl/extractor/shahid.py @@ -73,6 +73,9 @@ class ShahidIE(InfoExtractor): 'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html' % (video_id, api_vars['type']), video_id, 'Downloading player JSON') + if player.get('drm'): + raise ExtractorError('This video is DRM protected.', expected=True) + formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') video = self._download_json( From 7e8a800f29d52cbc6057638595df05ac69f622bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 10 Jan 2016 14:26:27 +0100 Subject: [PATCH 0334/1105] [bigflix] Use correct indentation to make flake8 happy --- youtube_dl/extractor/bigflix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bigflix.py b/youtube_dl/extractor/bigflix.py index 24720de86..33762ad93 100644 --- a/youtube_dl/extractor/bigflix.py +++ b/youtube_dl/extractor/bigflix.py @@ -53,7 +53,7 @@ class BigflixIE(InfoExtractor): formats = [] for height, encoded_url in re.findall( - r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage): + r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage): video_url = decode_url(encoded_url) f = { 'url': video_url, From 3c91e41614a0f3bfab8710e861c895b78d93bce8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 10 Jan 2016 14:32:53 +0100 Subject: [PATCH 0335/1105] [downloader/fragment] Don't fail if the 'Content-Length' header is missing In some dailymotion videos (like http://www.dailymotion.com/video/x3k0dtv from #8156) the segments URLs don't have the 'Content-Length' header and HttpFD sets the 'totat_bytes' field to None, so we also use '0' in that case (since we do different math operations with it). --- youtube_dl/downloader/fragment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 5a64b29ee..3da554622 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -66,7 +66,7 @@ class FragmentFD(FileDownloader): if s['status'] not in ('downloading', 'finished'): return - frag_total_bytes = s.get('total_bytes', 0) + frag_total_bytes = s.get('total_bytes') or 0 if s['status'] == 'finished': state['downloaded_bytes'] += frag_total_bytes state['frag_index'] += 1 From dfb1b1468cef4ddc7ecc43776abce03763f8e426 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Sun, 10 Jan 2016 16:17:47 +0100 Subject: [PATCH 0336/1105] Fix typos Closes #8200. --- devscripts/gh-pages/update-copyright.py | 2 +- test/test_write_annotations.py | 2 +- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/extractor/common.py | 4 ++-- youtube_dl/extractor/facebook.py | 4 ++-- youtube_dl/extractor/generic.py | 4 ++-- youtube_dl/extractor/ivi.py | 2 +- youtube_dl/extractor/mdr.py | 2 +- youtube_dl/extractor/nbc.py | 2 +- youtube_dl/extractor/nhl.py | 2 +- youtube_dl/extractor/ora.py | 2 +- youtube_dl/extractor/testurl.py | 2 +- youtube_dl/extractor/tv4.py | 2 +- youtube_dl/extractor/videomore.py | 2 +- youtube_dl/swfinterp.py | 2 +- youtube_dl/utils.py | 4 ++-- 16 files changed, 20 insertions(+), 20 deletions(-) diff --git a/devscripts/gh-pages/update-copyright.py b/devscripts/gh-pages/update-copyright.py index 3663c8afe..e6c3abc8d 100755 --- a/devscripts/gh-pages/update-copyright.py +++ b/devscripts/gh-pages/update-copyright.py @@ -5,7 +5,7 @@ from __future__ import with_statement, unicode_literals import datetime import glob -import io # For Python 2 compatibilty +import io # For Python 2 compatibility import os import re diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py index 84b8f39e0..8de08f2d6 100644 --- a/test/test_write_annotations.py +++ b/test/test_write_annotations.py @@ -66,7 +66,7 @@ class TestAnnotations(unittest.TestCase): textTag = a.find('TEXT') text = textTag.text self.assertTrue(text in expected) # assertIn only added in python 2.7 - # remove the first occurance, there could be more than one annotation with the same text + # remove the first occurrence, there could be more than one annotation with the same text expected.remove(text) # We should have seen (and removed) all the expected annotation texts. self.assertEqual(len(expected), 0, 'Not all expected annotations were found.') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 3b2be3159..d50b7cfed 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1312,7 +1312,7 @@ class YoutubeDL(object): # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) # element in the 'formats' field in info_dict is info_dict itself, - # wich can't be exported to json + # which can't be exported to json info_dict['formats'] = formats if self.params.get('listformats'): self.list_formats(info_dict) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 0719c7bcd..b05b22a94 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -313,9 +313,9 @@ class InfoExtractor(object): except ExtractorError: raise except compat_http_client.IncompleteRead as e: - raise ExtractorError('A network error has occured.', cause=e, expected=True) + raise ExtractorError('A network error has occurred.', cause=e, expected=True) except (KeyError, StopIteration) as e: - raise ExtractorError('An extractor error has occured.', cause=e) + raise ExtractorError('An extractor error has occurred.', cause=e) def set_downloader(self, downloader): """Sets the downloader for this IE.""" diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 5e43f2359..ec699ba54 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -105,7 +105,7 @@ class FacebookIE(InfoExtractor): login_results, 'login error', default=None, group='error') if error: raise ExtractorError('Unable to login: %s' % error, expected=True) - self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') + self._downloader.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.') return fb_dtsg = self._search_regex( @@ -126,7 +126,7 @@ class FacebookIE(InfoExtractor): check_response = self._download_webpage(check_req, None, note='Confirming login') if re.search(r'id="checkpointSubmitButton"', check_response) is not None: - self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.') + self._downloader.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err)) return diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d79e1adc9..b3f8efc80 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -487,7 +487,7 @@ class GenericIE(InfoExtractor): 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9', } }, - # Embeded Ustream video + # Embedded Ustream video { 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm', 'md5': '27b99cdb639c9b12a79bca876a073417', @@ -1644,7 +1644,7 @@ class GenericIE(InfoExtractor): if myvi_url: return self.url_result(myvi_url) - # Look for embeded soundcloud player + # Look for embedded soundcloud player mobj = re.search( r'https?://(?:w\.)?soundcloud\.com/player[^"]+)"', webpage) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index d0f00cdea..472d72b4c 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -32,7 +32,7 @@ class IviIE(InfoExtractor): }, 'skip': 'Only works from Russia', }, - # Serial's serie + # Serial's series { 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549', 'md5': '221f56b35e3ed815fde2df71032f4b3e', diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py index 88334889e..425fc9e2a 100644 --- a/youtube_dl/extractor/mdr.py +++ b/youtube_dl/extractor/mdr.py @@ -17,7 +17,7 @@ class MDRIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P\d+)(?:_.+?)?\.html' _TESTS = [{ - # MDR regularily deletes its videos + # MDR regularly deletes its videos 'url': 'http://www.mdr.de/fakt/video189002.html', 'only_matching': True, }, { diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 340c922bd..1dd54c2f1 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -100,7 +100,7 @@ class NBCSportsVPlayerIE(InfoExtractor): class NBCSportsIE(InfoExtractor): - # Does not include https becuase its certificate is invalid + # Does not include https because its certificate is invalid _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P[0-9a-z-]+)' _TEST = { diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index e98a5ef89..8d5ce46ad 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -223,7 +223,7 @@ class NHLVideocenterIE(NHLBaseInfoExtractor): response = self._download_webpage(request_url, playlist_title) response = self._fix_json(response) if not response.strip(): - self._downloader.report_warning('Got an empty reponse, trying ' + self._downloader.report_warning('Got an empty response, trying ' 'adding the "newvideos" parameter') response = self._download_webpage(request_url + '&newvideos=true', playlist_title) diff --git a/youtube_dl/extractor/ora.py b/youtube_dl/extractor/ora.py index 9c4255a2d..02de1502a 100644 --- a/youtube_dl/extractor/ora.py +++ b/youtube_dl/extractor/ora.py @@ -37,7 +37,7 @@ class OraTVIE(InfoExtractor): formats = self._extract_m3u8_formats( m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - # simular to GameSpotIE + # similar to GameSpotIE m3u8_path = compat_urlparse.urlparse(m3u8_url).path QUALITIES_RE = r'((,[a-z]+\d+)+,?)' available_qualities = self._search_regex( diff --git a/youtube_dl/extractor/testurl.py b/youtube_dl/extractor/testurl.py index c7d559315..46918adb0 100644 --- a/youtube_dl/extractor/testurl.py +++ b/youtube_dl/extractor/testurl.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class TestURLIE(InfoExtractor): - """ Allows adressing of the test cases as test:yout.*be_1 """ + """ Allows addressing of the test cases as test:yout.*be_1 """ IE_DESC = False # Do not list _VALID_URL = r'test(?:url)?:(?P(?P.+?)(?:_(?P[0-9]+))?)$' diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py index 1c4b6d635..343edf206 100644 --- a/youtube_dl/extractor/tv4.py +++ b/youtube_dl/extractor/tv4.py @@ -67,7 +67,7 @@ class TV4IE(InfoExtractor): info = self._download_json( 'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON') - # If is_geo_restricted is true, it doesn't neceserally mean we can't download it + # If is_geo_restricted is true, it doesn't necessarily mean we can't download it if info['is_geo_restricted']: self.report_warning('This content might not be available in your country due to licensing restrictions.') if info['requires_subscription']: diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py index a66d6de23..fcee940e6 100644 --- a/youtube_dl/extractor/videomore.py +++ b/youtube_dl/extractor/videomore.py @@ -170,7 +170,7 @@ class VideomoreVideoIE(InfoExtractor): 'skip_download': True, }, }, { - # season single serie with og:video:iframe + # season single series with og:video:iframe 'url': 'http://videomore.ru/poslednii_ment/1_sezon/14_seriya', 'only_matching': True, }, { diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index e60505ace..06c1d6cc1 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -689,7 +689,7 @@ class SWFInterpreter(object): elif mname in _builtin_classes: res = _builtin_classes[mname] else: - # Assume unitialized + # Assume uninitialized # TODO warn here res = undefined stack.append(res) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index da4ec7f20..9c1c0e0bd 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -984,7 +984,7 @@ def date_from_str(date_str): if sign == '-': time = -time unit = match.group('unit') - # A bad aproximation? + # A bad approximation? if unit == 'month': unit = 'day' time *= 30 @@ -1307,7 +1307,7 @@ def parse_filesize(s): if s is None: return None - # The lower-case forms are of course incorrect and inofficial, + # The lower-case forms are of course incorrect and unofficial, # but we support those too _UNIT_TABLE = { 'B': 1, From 36a0e46c39ea4f211dea9944177976e8f8364736 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 10 Jan 2016 17:55:41 +0100 Subject: [PATCH 0337/1105] fix typos --- youtube_dl/YoutubeDL.py | 4 ++-- youtube_dl/utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d50b7cfed..6afc1b730 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -241,7 +241,7 @@ class YoutubeDL(object): - "detect_or_warn": check whether we can do anything about it, warn otherwise (default) source_address: (Experimental) Client-side IP address to bind to. - call_home: Boolean, true iff we are allowed to contact the + call_home: Boolean, true if we are allowed to contact the youtube-dl servers for debugging. sleep_interval: Number of seconds to sleep before each download. listformats: Print an overview of available video formats and exit. @@ -590,7 +590,7 @@ class YoutubeDL(object): return None def _match_entry(self, info_dict, incomplete): - """ Returns None iff the file should be downloaded """ + """ Returns None if the file should be downloaded """ video_title = info_dict.get('title', info_dict.get('id', 'video')) if 'title' in info_dict: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 9c1c0e0bd..e583299c5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1857,7 +1857,7 @@ def encode_data_uri(data, mime_type): def age_restricted(content_limit, age_limit): - """ Returns True iff the content should be blocked """ + """ Returns True if the content should be blocked """ if age_limit is None: # No limit set return False From 6ec6cb4e956abbd76a6ff42336821770a5fbbcc7 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 10 Jan 2016 19:27:22 +0100 Subject: [PATCH 0338/1105] Revert "fix typos" This reverts commit 36a0e46c39ea4f211dea9944177976e8f8364736. --- youtube_dl/YoutubeDL.py | 4 ++-- youtube_dl/utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 6afc1b730..d50b7cfed 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -241,7 +241,7 @@ class YoutubeDL(object): - "detect_or_warn": check whether we can do anything about it, warn otherwise (default) source_address: (Experimental) Client-side IP address to bind to. - call_home: Boolean, true if we are allowed to contact the + call_home: Boolean, true iff we are allowed to contact the youtube-dl servers for debugging. sleep_interval: Number of seconds to sleep before each download. listformats: Print an overview of available video formats and exit. @@ -590,7 +590,7 @@ class YoutubeDL(object): return None def _match_entry(self, info_dict, incomplete): - """ Returns None if the file should be downloaded """ + """ Returns None iff the file should be downloaded """ video_title = info_dict.get('title', info_dict.get('id', 'video')) if 'title' in info_dict: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e583299c5..9c1c0e0bd 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1857,7 +1857,7 @@ def encode_data_uri(data, mime_type): def age_restricted(content_limit, age_limit): - """ Returns True if the content should be blocked """ + """ Returns True iff the content should be blocked """ if age_limit is None: # No limit set return False From 192b9a571cd50b0fd924e9ecb926b28633e8c19c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 11 Jan 2016 21:56:19 +0600 Subject: [PATCH 0339/1105] [videomega] Mark broken --- youtube_dl/extractor/videomega.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py index 87aca327b..5e2e7cbac 100644 --- a/youtube_dl/extractor/videomega.py +++ b/youtube_dl/extractor/videomega.py @@ -8,6 +8,7 @@ from ..utils import sanitized_Request class VideoMegaIE(InfoExtractor): + _WORKING = False _VALID_URL = r'(?:videomega:|https?://(?:www\.)?videomega\.tv/(?:(?:view|iframe|cdn)\.php)?\?ref=)(?P[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA', From b924bfad684ac7e9209e7165df3e13af6545596d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 11 Jan 2016 21:58:32 +0600 Subject: [PATCH 0340/1105] [videott] Mark broken --- youtube_dl/extractor/videott.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py index 591024ead..2cd36508a 100644 --- a/youtube_dl/extractor/videott.py +++ b/youtube_dl/extractor/videott.py @@ -11,6 +11,7 @@ from ..utils import ( class VideoTtIE(InfoExtractor): + _WORKING = False ID_NAME = 'video.tt' IE_DESC = 'video.tt - Your True Tube' _VALID_URL = r'http://(?:www\.)?video\.tt/(?:(?:video|embed)/|watch_video\.php\?v=)(?P[\da-zA-Z]{9})' From be27283ef68e96d2461875a2bfb8c9ce962cfe61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 11 Jan 2016 22:00:17 +0600 Subject: [PATCH 0341/1105] [iprima] Mark broken --- youtube_dl/extractor/iprima.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index 36baf3245..073777f34 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -14,6 +14,7 @@ from ..utils import ( class IPrimaIE(InfoExtractor): + _WORKING = False _VALID_URL = r'https?://play\.iprima\.cz/(?:[^/]+/)*(?P[^?#]+)' _TESTS = [{ From 9cb1a06b6c8fbeb6cfdbf0533ea60a6624fa1246 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 12 Jan 2016 22:09:38 +0600 Subject: [PATCH 0342/1105] [downloader/fragment] Remove unused code and fix zero division error --- youtube_dl/downloader/fragment.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 3da554622..d236ac737 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -78,17 +78,10 @@ class FragmentFD(FileDownloader): state['total_bytes_estimate'] = estimated_size state['elapsed'] = time_now - start - if s['status'] == 'finished': - progress = self.calc_percent(state['frag_index'], total_frags) - else: - frag_downloaded_bytes = s['downloaded_bytes'] - frag_progress = self.calc_percent(frag_downloaded_bytes, - frag_total_bytes) - progress = self.calc_percent(state['frag_index'], total_frags) - progress += frag_progress / float(total_frags) - + if s['status'] != 'finished': state['eta'] = self.calc_eta( - start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) + start, time_now, estimated_size, + state['downloaded_bytes'] + s['downloaded_bytes']) state['speed'] = s.get('speed') self._hook_progress(state) From 709185a2648c991cc143272dda67e4b42e89c03b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 12 Jan 2016 23:18:38 +0600 Subject: [PATCH 0343/1105] [downloader/fragment] More smooth calculations `downloaded_bytes` is now updated on each fragment progress hook invocation --- youtube_dl/downloader/fragment.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index d236ac737..b2597f1e5 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -58,6 +58,11 @@ class FragmentFD(FileDownloader): 'frag_count': total_frags, 'filename': ctx['filename'], 'tmpfilename': ctx['tmpfilename'], + # Total complete fragments downloaded so far in bytes + '_complete_frags_downloaded_bytes': 0, + # Amount of fragment's bytes downloaded by the time of the previous + # frag progress hook invocation + '_prev_frag_downloaded_bytes': 0, } start = time.time() ctx['started'] = start @@ -67,22 +72,27 @@ class FragmentFD(FileDownloader): return frag_total_bytes = s.get('total_bytes') or 0 - if s['status'] == 'finished': - state['downloaded_bytes'] += frag_total_bytes - state['frag_index'] += 1 estimated_size = ( - (state['downloaded_bytes'] + frag_total_bytes) / + (state['_complete_frags_downloaded_bytes'] + frag_total_bytes) / (state['frag_index'] + 1) * total_frags) time_now = time.time() state['total_bytes_estimate'] = estimated_size state['elapsed'] = time_now - start - if s['status'] != 'finished': + if s['status'] == 'finished': + state['frag_index'] += 1 + state['downloaded_bytes'] += frag_total_bytes - state['_prev_frag_downloaded_bytes'] + state['_complete_frags_downloaded_bytes'] = state['downloaded_bytes'] + state['_prev_frag_downloaded_bytes'] = 0 + else: + frag_downloaded_bytes = s['downloaded_bytes'] + state['downloaded_bytes'] += frag_downloaded_bytes - state['_prev_frag_downloaded_bytes'] state['eta'] = self.calc_eta( start, time_now, estimated_size, - state['downloaded_bytes'] + s['downloaded_bytes']) + state['downloaded_bytes']) state['speed'] = s.get('speed') + state['_prev_frag_downloaded_bytes'] = frag_downloaded_bytes self._hook_progress(state) ctx['dl'].add_progress_hook(frag_progress_hook) From 16a348475cb7ab90e80c31a01eec1280f4fc4fcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 12 Jan 2016 23:23:39 +0600 Subject: [PATCH 0344/1105] [dailymotion] Prefer direct links (Closes #8156) --- youtube_dl/extractor/dailymotion.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 439fd42e8..b687ec4d6 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -149,14 +149,15 @@ class DailymotionIE(DailymotionBaseInfoExtractor): ext = determine_ext(media_url) if type_ == 'application/x-mpegURL' or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( - media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + media_url, video_id, 'mp4', preference=-1, + m3u8_id='hls', fatal=False)) elif type_ == 'application/f4m' or ext == 'f4m': formats.extend(self._extract_f4m_formats( media_url, video_id, preference=-1, f4m_id='hds', fatal=False)) else: f = { 'url': media_url, - 'format_id': quality, + 'format_id': 'http-%s' % quality, } m = re.search(r'H264-(?P\d+)x(?P\d+)', media_url) if m: From b83b782dc4afeabbd356a329fd302b4410afa626 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 13 Jan 2016 00:00:31 +0600 Subject: [PATCH 0345/1105] [downloader/fragment] Move helper data to context dict --- youtube_dl/downloader/fragment.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index b2597f1e5..0c9113d0f 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -58,14 +58,17 @@ class FragmentFD(FileDownloader): 'frag_count': total_frags, 'filename': ctx['filename'], 'tmpfilename': ctx['tmpfilename'], + } + + start = time.time() + ctx.update({ + 'started': start, # Total complete fragments downloaded so far in bytes - '_complete_frags_downloaded_bytes': 0, + 'complete_frags_downloaded_bytes': 0, # Amount of fragment's bytes downloaded by the time of the previous # frag progress hook invocation - '_prev_frag_downloaded_bytes': 0, - } - start = time.time() - ctx['started'] = start + 'prev_frag_downloaded_bytes': 0, + }) def frag_progress_hook(s): if s['status'] not in ('downloading', 'finished'): @@ -74,7 +77,7 @@ class FragmentFD(FileDownloader): frag_total_bytes = s.get('total_bytes') or 0 estimated_size = ( - (state['_complete_frags_downloaded_bytes'] + frag_total_bytes) / + (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / (state['frag_index'] + 1) * total_frags) time_now = time.time() state['total_bytes_estimate'] = estimated_size @@ -82,17 +85,17 @@ class FragmentFD(FileDownloader): if s['status'] == 'finished': state['frag_index'] += 1 - state['downloaded_bytes'] += frag_total_bytes - state['_prev_frag_downloaded_bytes'] - state['_complete_frags_downloaded_bytes'] = state['downloaded_bytes'] - state['_prev_frag_downloaded_bytes'] = 0 + state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] + ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] + ctx['prev_frag_downloaded_bytes'] = 0 else: frag_downloaded_bytes = s['downloaded_bytes'] - state['downloaded_bytes'] += frag_downloaded_bytes - state['_prev_frag_downloaded_bytes'] + state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] state['eta'] = self.calc_eta( start, time_now, estimated_size, state['downloaded_bytes']) state['speed'] = s.get('speed') - state['_prev_frag_downloaded_bytes'] = frag_downloaded_bytes + ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes self._hook_progress(state) ctx['dl'].add_progress_hook(frag_progress_hook) From bc0550c262db0bc756bb7e08fb945f3285fb196e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 13 Jan 2016 08:18:37 +0600 Subject: [PATCH 0346/1105] [pluralsight] Fix new player (Closes #8215) --- youtube_dl/extractor/pluralsight.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py index 55c11b3bf..12e1c2862 100644 --- a/youtube_dl/extractor/pluralsight.py +++ b/youtube_dl/extractor/pluralsight.py @@ -232,7 +232,7 @@ class PluralsightIE(PluralsightBaseIE): # { a = author, cn = clip_id, lc = end, m = name } return { - 'id': clip['clipName'], + 'id': clip.get('clipName') or clip['name'], 'title': '%s - %s' % (module['title'], clip['title']), 'duration': int_or_none(clip.get('duration')) or parse_duration(clip.get('formattedDuration')), 'creator': author, From cc28492d31556b55bbd8fc574bf5d890305b22d2 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 13 Jan 2016 17:05:38 +0800 Subject: [PATCH 0347/1105] [youtube] Fix acodec and vcodec order In RFC6381, there's no rule stating that the first part of codecs should be video and the second part should be audio, while it seems the case for data reported by YouTube. --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 897641e42..e4f227f19 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1487,7 +1487,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if codecs: codecs = codecs.split(',') if len(codecs) == 2: - acodec, vcodec = codecs[0], codecs[1] + acodec, vcodec = codecs[1], codecs[0] else: acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0]) dct.update({ From 40cf7fcbd2e30747065ca7b8bf4467a9582a4aa9 Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 13 Jan 2016 13:29:00 +0100 Subject: [PATCH 0348/1105] [tudou] Add support for Albums and Playlists and extract more metadata --- youtube_dl/extractor/__init__.py | 6 +- youtube_dl/extractor/tudou.py | 94 ++++++++++++++++++++++++++------ 2 files changed, 81 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 40c42d4d2..e4ae9332d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -723,7 +723,11 @@ from .trilulilu import TriluliluIE from .trutube import TruTubeIE from .tube8 import Tube8IE from .tubitv import TubiTvIE -from .tudou import TudouIE +from .tudou import ( + TudouIE, + TudouPlaylistIE, + TudouAlbumIE, +) from .tumblr import TumblrIE from .tunein import ( TuneInClipIE, diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py index 5f7ac4b35..da3cd76f7 100644 --- a/youtube_dl/extractor/tudou.py +++ b/youtube_dl/extractor/tudou.py @@ -4,10 +4,16 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_str +from ..utils import ( + int_or_none, + float_or_none, + unescapeHTML, +) class TudouIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:listplay|programs(?:/view)?|albumplay)/([^/]+/)*(?P[^/?#]+?)(?:\.html)?/?(?:$|[?#])' + IE_NAME = 'tudou' + _VALID_URL = r'https?://(?:www\.)?tudou\.com/(?:(?:programs|wlplay)/view|(?:listplay|albumplay)/[\w-]{11})/(?P[\w-]{11})' _TESTS = [{ 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html', 'md5': '140a49ed444bd22f93330985d8475fcb', @@ -16,6 +22,11 @@ class TudouIE(InfoExtractor): 'ext': 'f4v', 'title': '卡马乔国足开大脚长传冲吊集锦', 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1372113489000, + 'description': '卡马乔卡家军,开大脚先进战术不完全集锦!', + 'duration': 289.04, + 'view_count': int, + 'filesize': int, } }, { 'url': 'http://www.tudou.com/programs/view/ajX3gyhL0pc/', @@ -24,10 +35,12 @@ class TudouIE(InfoExtractor): 'ext': 'f4v', 'title': 'La Sylphide-Bolshoi-Ekaterina Krysanova & Vyacheslav Lopatin 2012', 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1349207518000, + 'description': 'md5:294612423894260f2dcd5c6c04fe248b', + 'duration': 5478.33, + 'view_count': int, + 'filesize': int, } - }, { - 'url': 'http://www.tudou.com/albumplay/cJAHGih4yYg.html', - 'only_matching': True, }] _PLAYER_URL = 'http://js.tudouui.com/bin/lingtong/PortalPlayer_177.swf' @@ -42,24 +55,20 @@ class TudouIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + item_data = self._download_json( + 'http://www.tudou.com/tvp/getItemInfo.action?ic=%s' % video_id, video_id) - youku_vcode = self._search_regex( - r'vcode\s*:\s*[\'"]([^\'"]*)[\'"]', webpage, 'youku vcode', default=None) + youku_vcode = item_data.get('vcode') if youku_vcode: return self.url_result('youku:' + youku_vcode, ie='Youku') - title = self._search_regex( - r',kw\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'title') - thumbnail_url = self._search_regex( - r',pic\s*:\s*[\'"]([^\'"]+)[\'"]', webpage, 'thumbnail URL', fatal=False) + title = unescapeHTML(item_data['kw']) + description = item_data.get('desc') + thumbnail_url = item_data.get('pic') + view_count = int_or_none(item_data.get('playTimes')) + timestamp = int_or_none(item_data.get('pt')) - player_url = self._search_regex( - r'playerUrl\s*:\s*[\'"]([^\'"]+\.swf)[\'"]', - webpage, 'player URL', default=self._PLAYER_URL) - - segments = self._parse_json(self._search_regex( - r'segs: \'([^\']+)\'', webpage, 'segments'), video_id) + segments = self._parse_json(item_data['itemSegs'], video_id) # It looks like the keys are the arguments that have to be passed as # the hd field in the request url, we pick the higher # Also, filter non-number qualities (see issue #3643). @@ -80,8 +89,13 @@ class TudouIE(InfoExtractor): 'ext': ext, 'title': title, 'thumbnail': thumbnail_url, + 'description': description, + 'view_count': view_count, + 'timestamp': timestamp, + 'duration': float_or_none(part.get('seconds'), 1000), + 'filesize': int_or_none(part.get('size')), 'http_headers': { - 'Referer': player_url, + 'Referer': self._PLAYER_URL, }, } result.append(part_info) @@ -92,3 +106,47 @@ class TudouIE(InfoExtractor): 'id': video_id, 'title': title, } + + +class TudouPlaylistIE(InfoExtractor): + IE_NAME = 'tudou:playlist' + _VALID_URL = r'https?://(?:www\.)?tudou\.com/listplay/(?P[\w-]{11})\.html' + _TESTS = [{ + 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo.html', + 'info_dict': { + 'id': 'zzdE77v6Mmo', + }, + 'playlist_mincount': 209, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + playlist_data = self._download_json( + 'http://www.tudou.com/tvp/plist.action?lcode=%s' % playlist_id, playlist_id) + entries = [self.url_result( + 'http://www.tudou.com/programs/view/%s' % item['icode'], + 'Tudou', item['icode'], + item['kw']) for item in playlist_data['items']] + return self.playlist_result(entries, playlist_id) + + +class TudouAlbumIE(InfoExtractor): + IE_NAME = 'tudou:album' + _VALID_URL = r'https?://(?:www\.)?tudou\.com/album(?:cover|play)/(?P[\w-]{11})' + _TESTS = [{ + 'url': 'http://www.tudou.com/albumplay/v5qckFJvNJg.html', + 'info_dict': { + 'id': 'v5qckFJvNJg', + }, + 'playlist_mincount': 45, + }] + + def _real_extract(self, url): + album_id = self._match_id(url) + album_data = self._download_json( + 'http://www.tudou.com/tvp/alist.action?acode=%s' % album_id, album_id) + entries = [self.url_result( + 'http://www.tudou.com/programs/view/%s' % item['icode'], + 'Tudou', item['icode'], + item['kw']) for item in album_data['items']] + return self.playlist_result(entries, album_id) From e37afbe0b8a1222cb214ad0bec9a53bb7953531d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 14 Jan 2016 00:16:23 +0100 Subject: [PATCH 0349/1105] [YoutubeDL] urlopen: disable the 'file:' protocol (#8227) If someone is running youtube-dl on a server to deliver files, the user could input 'file:///some/important/file' and youtube-dl would save that file as a video giving access to sensitive information to the user. 'file:' urls can be filtered, but the user can use an URL to a crafted m3u8 manifest like: #EXTM3U #EXT-X-MEDIA-SEQUENCE:0 #EXTINF:10.0 file:///etc/passwd #EXT-X-ENDLIST With this patch 'file:' URLs raise URLError like for unknown protocols. --- test/test_YoutubeDL.py | 7 ++++++- youtube_dl/YoutubeDL.py | 10 ++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 0388c0bf3..0caa43843 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -12,7 +12,7 @@ import copy from test.helper import FakeYDL, assertRegexpMatches from youtube_dl import YoutubeDL -from youtube_dl.compat import compat_str +from youtube_dl.compat import compat_str, compat_urllib_error from youtube_dl.extractor import YoutubeIE from youtube_dl.postprocessor.common import PostProcessor from youtube_dl.utils import ExtractorError, match_filter_func @@ -631,6 +631,11 @@ class TestYoutubeDL(unittest.TestCase): result = get_ids({'playlist_items': '10'}) self.assertEqual(result, []) + def test_urlopen_no_file_protocol(self): + # see https://github.com/rg3/youtube-dl/issues/8227 + ydl = YDL() + self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d50b7cfed..e8ce58604 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1986,8 +1986,14 @@ class YoutubeDL(object): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) data_handler = compat_urllib_request_DataHandler() - opener = compat_urllib_request.build_opener( - proxy_handler, https_handler, cookie_processor, ydlh, data_handler) + unknown_handler = compat_urllib_request.UnknownHandler() + handlers = (proxy_handler, https_handler, cookie_processor, ydlh, data_handler, unknown_handler) + # we don't use build_opener because it automatically adds FileHandler, + # which can be used for malicious purposes (see + # https://github.com/rg3/youtube-dl/issues/8227) + opener = compat_urllib_request.OpenerDirector() + for handler in handlers: + opener.add_handler(handler) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play From 6240b0a278781a3b584a9dd6d57191b2472c0fd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 14 Jan 2016 08:14:01 +0100 Subject: [PATCH 0350/1105] [YoutubeDL] urlopen: use build_opener again Otherwise we would need to manually add handlers like HTTPRedirectHandler, instead we add a customized FileHandler instance that raises an error. --- youtube_dl/YoutubeDL.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e8ce58604..ccad5f2ea 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1986,14 +1986,19 @@ class YoutubeDL(object): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) data_handler = compat_urllib_request_DataHandler() - unknown_handler = compat_urllib_request.UnknownHandler() - handlers = (proxy_handler, https_handler, cookie_processor, ydlh, data_handler, unknown_handler) - # we don't use build_opener because it automatically adds FileHandler, - # which can be used for malicious purposes (see + + # When passing our own FileHandler instance, build_opener won't add the + # default FileHandler and allows us to disable the file protocol, which + # can be used for malicious purposes (see # https://github.com/rg3/youtube-dl/issues/8227) - opener = compat_urllib_request.OpenerDirector() - for handler in handlers: - opener.add_handler(handler) + file_handler = compat_urllib_request.FileHandler() + + def file_open(*args, **kwargs): + raise compat_urllib_error.URLError('file protocol is disabled') + file_handler.file_open = file_open + + opener = compat_urllib_request.build_opener( + proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play From 4240d504963bb6d1c7bd7c288a7874f9d8dc042b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 14 Jan 2016 14:07:54 +0100 Subject: [PATCH 0351/1105] [YoutubeDL] improve error message for file:/// URLs --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ccad5f2ea..4915fbd45 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1994,7 +1994,7 @@ class YoutubeDL(object): file_handler = compat_urllib_request.FileHandler() def file_open(*args, **kwargs): - raise compat_urllib_error.URLError('file protocol is disabled') + raise compat_urllib_error.URLError('file:/// protocol is explicitly disabled in youtube-dl for security reasons') file_handler.file_open = file_open opener = compat_urllib_request.build_opener( From 4511c1976d0a06394a000333a020a4d3668072fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 19:57:20 +0600 Subject: [PATCH 0352/1105] [beeg] Fix extraction (Closes #8225) --- youtube_dl/extractor/beeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index c8d921daf..d0174b818 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -60,7 +60,7 @@ class BeegIE(InfoExtractor): def decrypt_url(encrypted_url): encrypted_url = self._proto_relative_url( - encrypted_url.replace('{DATA_MARKERS}', ''), 'http:') + encrypted_url.replace('{DATA_MARKERS}', ''), 'https:') key = self._search_regex( r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None) if not key: From abb893e6e45b0b0c6ec0e3a1d29dbd1746cbee96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 19:57:56 +0600 Subject: [PATCH 0353/1105] [beeg] Update API URL --- youtube_dl/extractor/beeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index d0174b818..34c2a756f 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -34,7 +34,7 @@ class BeegIE(InfoExtractor): video_id = self._match_id(url) video = self._download_json( - 'http://beeg.com/api/v5/video/%s' % video_id, video_id) + 'https://api.beeg.com/api/v5/video/%s' % video_id, video_id) def split(o, e): def cut(s, x): From 11c60089a8772a2d12288f0ff382866e516f9a4b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 14 Jan 2016 15:43:21 +0100 Subject: [PATCH 0354/1105] release 2016.01.14 --- docs/supportedsites.md | 11 +++++++---- youtube_dl/version.py | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 8d0c7b97a..eb160bd2f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -65,6 +65,7 @@ - **Beeg** - **BehindKink** - **Bet** + - **Bigflix** - **Bild**: Bild.de - **BiliBili** - **BleacherReport** @@ -251,7 +252,7 @@ - **Instagram** - **instagram:user**: Instagram user profile - **InternetVideoArchive** - - **IPrima** + - **IPrima** (Currently broken) - **iqiyi**: 爱奇艺 - **Ir90Tv** - **ivi**: ivi.ru @@ -602,7 +603,9 @@ - **TruTube** - **Tube8** - **TubiTv** - - **Tudou** + - **tudou** + - **tudou:album** + - **tudou:playlist** - **Tumblr** - **tunein:clip** - **tunein:program** @@ -655,12 +658,12 @@ - **video.mit.edu** - **VideoDetective** - **videofy.me** - - **VideoMega** + - **VideoMega** (Currently broken) - **videomore** - **videomore:season** - **videomore:video** - **VideoPremium** - - **VideoTt**: video.tt - Your True Tube + - **VideoTt**: video.tt - Your True Tube (Currently broken) - **videoweed**: VideoWeed - **Vidme** - **Vidzi** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7030903c0..4d433b667 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.01.09' +__version__ = '2016.01.14' From 30e2f2d76f6dd52803effce14fa14f3a8051c84a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 14 Jan 2016 16:28:46 +0100 Subject: [PATCH 0355/1105] [YoutubeDL] use a more correct terminology in the error message for file:// URLs --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4915fbd45..6b73b8e06 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1994,7 +1994,7 @@ class YoutubeDL(object): file_handler = compat_urllib_request.FileHandler() def file_open(*args, **kwargs): - raise compat_urllib_error.URLError('file:/// protocol is explicitly disabled in youtube-dl for security reasons') + raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons') file_handler.file_open = file_open opener = compat_urllib_request.build_opener( From fbd90643cb123011a224da58b4ff1c4ba1c4f8f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 21:48:08 +0600 Subject: [PATCH 0356/1105] [vodlocker] Fix extraction (Closes #8231) --- youtube_dl/extractor/vodlocker.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index 357594a11..a97995a6d 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -5,12 +5,13 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse from ..utils import ( ExtractorError, + NO_DEFAULT, sanitized_Request, ) class VodlockerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?:embed-)?(?P[0-9a-zA-Z]+)(?:\..*?)?' + _VALID_URL = r'https?://(?:www\.)?vodlocker\.(?:com|city)/(?:embed-)?(?P[0-9a-zA-Z]+)(?:\..*?)?' _TESTS = [{ 'url': 'http://vodlocker.com/e8wvyzz4sl42', @@ -43,16 +44,31 @@ class VodlockerIE(InfoExtractor): webpage = self._download_webpage( req, video_id, 'Downloading video page') + def extract_file_url(html, default=NO_DEFAULT): + return self._search_regex( + r'file:\s*"(http[^\"]+)",', html, 'file url', default=default) + + video_url = extract_file_url(webpage, default=None) + + if not video_url: + embed_url = self._search_regex( + r']+src=(["\'])(?P(?:https?://)?vodlocker\.(?:com|city)/embed-.+?)\1', + webpage, 'embed url', group='url') + embed_webpage = self._download_webpage( + embed_url, video_id, 'Downloading embed webpage') + video_url = extract_file_url(embed_webpage) + thumbnail_webpage = embed_webpage + else: + thumbnail_webpage = webpage + title = self._search_regex( r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title') thumbnail = self._search_regex( - r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail') - url = self._search_regex( - r'file:\s*"(http[^\"]+)",', webpage, 'file url') + r'image:\s*"(http[^\"]+)",', thumbnail_webpage, 'thumbnail', fatal=False) formats = [{ 'format_id': 'sd', - 'url': url, + 'url': video_url, }] return { From 5cc9c5dfa8f731b6582b092e06f78cccbaefc3c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 21:53:24 +0600 Subject: [PATCH 0357/1105] [unistra] Fix extraction --- youtube_dl/extractor/unistra.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py index f70978299..594bee4f9 100644 --- a/youtube_dl/extractor/unistra.py +++ b/youtube_dl/extractor/unistra.py @@ -38,7 +38,7 @@ class UnistraIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - files = set(re.findall(r'file\s*:\s*"([^"]+)"', webpage)) + files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage)) quality = qualities(['SD', 'HD']) formats = [] From 163e8369b0d2f6b8cc59dd1e93b20a980590648f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 22:05:04 +0600 Subject: [PATCH 0358/1105] [ntvde] Fix extraction --- youtube_dl/extractor/ntvde.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py index d2cfe0961..8268eace7 100644 --- a/youtube_dl/extractor/ntvde.py +++ b/youtube_dl/extractor/ntvde.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( int_or_none, js_to_json, @@ -42,18 +43,24 @@ class NTVDeIE(InfoExtractor): webpage, 'player data'), video_id, transform_source=js_to_json) duration = parse_duration(vdata.get('duration')) - formats = [{ - 'format_id': 'flash', - 'url': 'rtmp://fms.n-tv.de/' + vdata['video'], - }, { - 'format_id': 'mobile', - 'url': 'http://video.n-tv.de' + vdata['videoMp4'], - 'tbr': 400, # estimation - }] - m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8'] - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', - entry_protocol='m3u8_native', preference=0)) + + formats = [] + if vdata.get('video'): + formats.append({ + 'format_id': 'flash', + 'url': 'rtmp://fms.n-tv.de/%s' % vdata['video'], + }) + if vdata.get('videoMp4'): + formats.append({ + 'format_id': 'mobile', + 'url': compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoMp4']), + 'tbr': 400, # estimation + }) + if vdata.get('videoM3u8'): + m3u8_url = compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoM3u8']) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + preference=0, m3u8_id='hls', fatal=False)) self._sort_formats(formats) return { From 4654c1d01613e26d782c95b13ce60e5fdd84892a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 22:07:42 +0600 Subject: [PATCH 0359/1105] [orf:fm4] Extend _VALID_URL (Closes #8234) --- youtube_dl/extractor/orf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 2e6c9872b..da598e7f7 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -170,7 +170,7 @@ class ORFOE1IE(InfoExtractor): class ORFFM4IE(InfoExtractor): IE_NAME = 'orf:fm4' IE_DESC = 'radio FM4' - _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P[0-9]+)/(?P\w+)' + _VALID_URL = r'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P[0-9]+)/(?P\w+)' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 986986064ec102b0d97b4ab008ae38ede6358796 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 22:11:33 +0600 Subject: [PATCH 0360/1105] [orf:fm4] Add test --- youtube_dl/extractor/orf.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index da598e7f7..c54775d54 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -172,6 +172,20 @@ class ORFFM4IE(InfoExtractor): IE_DESC = 'radio FM4' _VALID_URL = r'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P[0-9]+)/(?P\w+)' + _TEST = { + 'url': 'http://fm4.orf.at/player/20160110/IS/', + 'md5': '01e736e8f1cef7e13246e880a59ad298', + 'info_dict': { + 'id': '2016-01-10_2100_tl_54_7DaysSun13_11244', + 'ext': 'mp3', + 'title': 'Im Sumpf', + 'description': 'md5:384c543f866c4e422a55f66a62d669cd', + 'duration': 7173, + 'timestamp': 1452456073, + 'upload_date': '20160110', + }, + } + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) show_date = mobj.group('date') From 6b559c2fbcf70158bd84b3b5892ecd5fc4b03e91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 14 Jan 2016 22:12:24 +0600 Subject: [PATCH 0361/1105] [ntvde] Improve regex --- youtube_dl/extractor/ntvde.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py index 8268eace7..a83e85cb8 100644 --- a/youtube_dl/extractor/ntvde.py +++ b/youtube_dl/extractor/ntvde.py @@ -35,7 +35,7 @@ class NTVDeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) info = self._parse_json(self._search_regex( - r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'), + r'(?s)ntv\.pageInfo\.article\s*=\s*(\{.*?\});', webpage, 'info'), video_id, transform_source=js_to_json) timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp')) vdata = self._parse_json(self._search_regex( From 0baedd1851692a4b9f94c08b3eae5d57acf07f09 Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 13 Jan 2016 16:11:49 +0100 Subject: [PATCH 0362/1105] [prosiebensat1] add support for 7tv.de --- youtube_dl/extractor/prosiebensat1.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index baa54a3af..953df3efc 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -20,7 +20,7 @@ from ..utils import ( class ProSiebenSat1IE(InfoExtractor): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' - _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P.+)' _TESTS = [ { @@ -172,6 +172,20 @@ class ProSiebenSat1IE(InfoExtractor): }, 'playlist_count': 2, }, + { + 'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge', + 'info_dict': { + 'id': '4187506', + 'ext': 'flv', + 'title': 'Best of Circus HalliGalli', + 'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9', + 'upload_date': '20151229', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, ] _CLIPID_REGEXES = [ @@ -186,12 +200,14 @@ class ProSiebenSat1IE(InfoExtractor): r'\s*

(.+?)

', r'

\s*(.+?)

', r'
\s*

([^<]+)

\s*
', + r'

\s*(.+?)

', ] _DESCRIPTION_REGEXES = [ r'

\s*(.+?)

', r'
\s*

Beschreibung: (.+?)

', r'
\s*
\s*\s*(.+?)\s*