From 7b4137c351222a94f46f854bf490a299e4124acc Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 9 Sep 2015 10:42:47 +0100 Subject: [PATCH 01/97] [fktv] fix info extraction --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/fktv.py | 78 ++++++++------------------------ 2 files changed, 20 insertions(+), 63 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 57f55b479..f8d4c8462 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -170,10 +170,7 @@ from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE from .fivetv import FiveTVIE -from .fktv import ( - FKTVIE, - FKTVPosteckeIE, -) +from .fktv import FKTVIE from .flickr import FlickrIE from .folketinget import FolketingetIE from .footyroom import FootyRoomIE diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index 190d9f9ad..c2aa23aa2 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -1,13 +1,11 @@ from __future__ import unicode_literals import re -import random -import json from .common import InfoExtractor from ..utils import ( - get_element_by_id, clean_html, + determine_ext, ) @@ -17,66 +15,28 @@ class FKTVIE(InfoExtractor): _TEST = { 'url': 'http://fernsehkritik.tv/folge-1', + 'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79', 'info_dict': { - 'id': '00011', - 'ext': 'flv', + 'id': '1', + 'ext': 'mp4', 'title': 'Folge 1 vom 10. April 2007', - 'description': 'md5:fb4818139c7cfe6907d4b83412a6864f', }, } def _real_extract(self, url): - episode = int(self._match_id(url)) + episode = self._match_id(url) - video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode - start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode, - episode) - playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage, - 'playlist', flags=re.DOTALL) - files = json.loads(re.sub('{[^{}]*?}', '{}', playlist)) - - videos = [] - for i, _ in enumerate(files, 1): - video_id = '%04d%d' % (episode, i) - video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i) - videos.append({ - 'ext': 'flv', - 'id': video_id, - 'url': video_url, - 'title': clean_html(get_element_by_id('eptitle', start_webpage)), - 'description': clean_html(get_element_by_id('contentlist', start_webpage)), - 'thumbnail': video_thumbnail - }) - return { - '_type': 'multi_video', - 'entries': videos, - 'id': 'folge-%s' % episode, - } - - -class FKTVPosteckeIE(InfoExtractor): - IE_NAME = 'fernsehkritik.tv:postecke' - _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P[0-9]+)(&|$)' - _TEST = { - 'url': 'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120', - 'md5': '262f0adbac80317412f7e57b4808e5c4', - 'info_dict': { - 'id': '0120', - 'ext': 'flv', - 'title': 'Postecke 120', - } - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - episode = int(mobj.group('ep')) - - server = random.randint(2, 4) - video_id = '%04d' % episode - video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode) - video_title = 'Postecke %d' % episode - return { - 'id': video_id, - 'url': video_url, - 'title': video_title, - } + webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/play' % episode, episode) + title = clean_html(self._html_search_regex('

([^<]+?)

', webpage, 'title')) + matchs = re.search(r'(?s)]*poster="([^"]+)"[^>]*>(.*?)', webpage) + if matchs: + poster, sources = matchs.groups() + urls = re.findall(r'(?s)]*src="([^"]+)"[^>]*>', sources) + if sources: + formats = [{'url': url, 'format_id': determine_ext(url)} for url in urls] + return { + 'id': episode, + 'title': title, + 'formats': formats, + 'thumbnail': poster, + } From 6c91a5a7f5408cf666f3ee40b53c0d9e42521b88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Sep 2015 11:16:12 +0600 Subject: [PATCH 02/97] [extractor/generic] Fix following redirect in Refresh HTTP header on python 2 --- youtube_dl/extractor/generic.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ec748ed9f..8881a8a23 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import os import re +import sys from .common import InfoExtractor from .youtube import YoutubeIE @@ -230,6 +231,22 @@ class GenericIE(InfoExtractor): 'skip_download': False, } }, + { + # redirect in Refresh HTTP header + 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1', + 'info_dict': { + 'id': 'pO8h3EaFRdo', + 'ext': 'mp4', + 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set', + 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5', + 'upload_date': '20150917', + 'uploader_id': 'brtvofficial', + 'uploader': 'Boiler Room', + }, + 'params': { + 'skip_download': False, + }, + }, { 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', 'md5': '85b90ccc9d73b4acd9138d3af4c27f89', @@ -1808,6 +1825,9 @@ class GenericIE(InfoExtractor): # Look also in Refresh HTTP header refresh_header = head_response.headers.get('Refresh') if refresh_header: + # In python 2 response HTTP headers are bytestrings + if sys.version_info < (3, 0) and isinstance(refresh_header, str): + refresh_header = refresh_header.decode('iso-8859-1') found = re.search(REDIRECT_REGEX, refresh_header) if found: new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1))) From f817adc4689a2064fcab733d6aebf83fd0e2cff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Sep 2015 11:31:23 +0600 Subject: [PATCH 03/97] [youtube:history] Disable extractor until #6893 is investigated Wiped out part of my history as well --- youtube_dl/extractor/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c16e9e3d2..b93156232 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -808,7 +808,7 @@ from .youtube import ( YoutubeIE, YoutubeChannelIE, YoutubeFavouritesIE, - YoutubeHistoryIE, + #YoutubeHistoryIE, YoutubePlaylistIE, YoutubeRecommendedIE, YoutubeSearchDateIE, From 393ca8c94d1adb1490b23265370ce69043b92546 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Sep 2015 11:45:19 +0600 Subject: [PATCH 04/97] [arte:+7] Look for json vp url in iframe (Closes #6895) --- youtube_dl/extractor/arte.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 76de24477..9ecb6786c 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -4,6 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) from ..utils import ( find_xpath_attr, unified_strdate, @@ -77,7 +81,13 @@ class ArteTVPlus7IE(InfoExtractor): def _extract_from_webpage(self, webpage, video_id, lang): json_url = self._html_search_regex( [r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'], - webpage, 'json vp url') + webpage, 'json vp url', default=None) + if not json_url: + iframe_url = self._html_search_regex( + r']+src=(["\'])(?P.+\bjson_url=.+?)\1', + webpage, 'iframe url', group='iframe url') + json_url = compat_parse_qs( + compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] return self._extract_from_json_url(json_url, video_id, lang) def _extract_from_json_url(self, json_url, video_id, lang): From 5e39123b3b6fd740d133fd45824b49b4bc674fa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Sep 2015 11:47:03 +0600 Subject: [PATCH 05/97] [arte:+7] Fix typo --- youtube_dl/extractor/arte.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 9ecb6786c..2a00da3ee 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -85,7 +85,7 @@ class ArteTVPlus7IE(InfoExtractor): if not json_url: iframe_url = self._html_search_regex( r']+src=(["\'])(?P.+\bjson_url=.+?)\1', - webpage, 'iframe url', group='iframe url') + webpage, 'iframe url', group='url') json_url = compat_parse_qs( compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] return self._extract_from_json_url(json_url, video_id, lang) From f005f96ea594a82969885ecd124b094c7e09949a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 20 Sep 2015 12:23:13 +0200 Subject: [PATCH 06/97] [youtube:history] Explain why it has disabled and skip test --- test/test_all_urls.py | 2 +- youtube_dl/extractor/__init__.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index a9db42b30..a929afd15 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -99,7 +99,7 @@ class TestAllURLsMatching(unittest.TestCase): def test_keywords(self): self.assertMatch(':ytsubs', ['youtube:subscriptions']) self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) - self.assertMatch(':ythistory', ['youtube:history']) + # self.assertMatch(':ythistory', ['youtube:history']) self.assertMatch(':thedailyshow', ['ComedyCentralShows']) self.assertMatch(':tds', ['ComedyCentralShows']) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b93156232..2529d8657 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -808,6 +808,8 @@ from .youtube import ( YoutubeIE, YoutubeChannelIE, YoutubeFavouritesIE, + # disabled because it can wipe the watch history (see #6893) + # remember to uncumment test in test/test_all_urls when it's fixed #YoutubeHistoryIE, YoutubePlaylistIE, YoutubeRecommendedIE, From 5a1a2e94548f25b5fd540090af3f32a1d875f6b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Sep 2015 21:08:29 +0600 Subject: [PATCH 07/97] [utils] Fix kwargs on old python 2 (Closes #6905) --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 206dd56bc..1dc3153fd 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -619,7 +619,7 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): # expected HTTP responses to meet HTTP/1.0 or later (see also # https://github.com/rg3/youtube-dl/issues/6727) if sys.version_info < (3, 0): - kwargs['strict'] = True + kwargs[b'strict'] = True hc = http_class(*args, **kwargs) source_address = ydl_handler._params.get('source_address') if source_address is not None: From 5b4c54631a8888d75fa766c0bd6ec1822e6caec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Sep 2015 23:12:40 +0600 Subject: [PATCH 08/97] [nfl] Add team domains (#6907) --- youtube_dl/extractor/nfl.py | 132 +++++++++++++++++++++++------------- 1 file changed, 86 insertions(+), 46 deletions(-) diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index dc54634a5..fe143ef88 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -16,53 +16,93 @@ from ..utils import ( class NFLIE(InfoExtractor): IE_NAME = 'nfl.com' - _VALID_URL = r'''(?x)https?:// - (?P(?:www\.)?(?:nfl\.com|.*?\.clubs\.nfl\.com))/ - (?:.+?/)* - (?P(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12}))''' - _TESTS = [ - { - 'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', - 'md5': '394ef771ddcd1354f665b471d78ec4c6', - 'info_dict': { - 'id': '0ap3000000398478', - 'ext': 'mp4', - 'title': 'Week 3: Redskins vs. Eagles highlights', - 'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', - 'upload_date': '20140921', - 'timestamp': 1411337580, - 'thumbnail': 're:^https?://.*\.jpg$', - } - }, - { - 'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266', - 'md5': 'cf85bdb4bc49f6e9d3816d130c78279c', - 'info_dict': { - 'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266', - 'ext': 'mp4', - 'title': 'LIVE: Post Game vs. Browns', - 'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8', - 'upload_date': '20131229', - 'timestamp': 1388354455, - 'thumbnail': 're:^https?://.*\.jpg$', - } - }, - { - 'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish', - 'info_dict': { - 'id': '0ap3000000467607', - 'ext': 'mp4', - 'title': 'Frustrations flare on the field', - 'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.', - 'timestamp': 1422850320, - 'upload_date': '20150202', - }, - }, - { - 'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood', - 'only_matching': True, + _VALID_URL = r'''(?x) + https?:// + (?P + (?:www\.)? + (?: + (?: + nfl| + buffalobills| + miamidolphins| + patriots| + newyorkjets| + baltimoreravens| + bengals| + clevelandbrowns| + steelers| + houstontexans| + colts| + jaguars| + titansonline| + denverbroncos| + kcchiefs| + raiders| + chargers| + dallascowboys| + giants| + philadelphiaeagles| + redskins| + chicagobears| + detroitlions| + packers| + vikings| + atlantafalcons| + panthers| + neworleanssaints| + buccaneers| + azcardinals| + stlouisrams| + 49ers| + seahawks + )\.com| + .+?\.clubs\.nfl\.com + ) + )/ + (?:.+?/)* + (?P(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12})) + ''' + _TESTS = [{ + 'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', + 'md5': '394ef771ddcd1354f665b471d78ec4c6', + 'info_dict': { + 'id': '0ap3000000398478', + 'ext': 'mp4', + 'title': 'Week 3: Redskins vs. Eagles highlights', + 'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', + 'upload_date': '20140921', + 'timestamp': 1411337580, + 'thumbnail': 're:^https?://.*\.jpg$', } - ] + }, { + 'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266', + 'md5': 'cf85bdb4bc49f6e9d3816d130c78279c', + 'info_dict': { + 'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266', + 'ext': 'mp4', + 'title': 'LIVE: Post Game vs. Browns', + 'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8', + 'upload_date': '20131229', + 'timestamp': 1388354455, + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, { + 'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish', + 'info_dict': { + 'id': '0ap3000000467607', + 'ext': 'mp4', + 'title': 'Frustrations flare on the field', + 'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.', + 'timestamp': 1422850320, + 'upload_date': '20150202', + }, + }, { + 'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood', + 'only_matching': True, + }, { + 'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a', + 'only_matching': True, + }] @staticmethod def prepend_host(host, url): From 4423eba49b1ca5e9cfb78370ab0eee8f84fb72bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Sep 2015 23:45:01 +0600 Subject: [PATCH 09/97] [nfl] Add support for URLs without id (Closes #6907) --- youtube_dl/extractor/nfl.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py index fe143ef88..55dc6107d 100644 --- a/youtube_dl/extractor/nfl.py +++ b/youtube_dl/extractor/nfl.py @@ -60,7 +60,7 @@ class NFLIE(InfoExtractor): ) )/ (?:.+?/)* - (?P(?:[a-z0-9]{16}|\w{8}\-(?:\w{4}\-){3}\w{12})) + (?P[^/#?&]+) ''' _TESTS = [{ 'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', @@ -96,6 +96,17 @@ class NFLIE(InfoExtractor): 'timestamp': 1422850320, 'upload_date': '20150202', }, + }, { + 'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette', + 'md5': '4c319e2f625ffd0b481b4382c6fc124c', + 'info_dict': { + 'id': 'n-238346', + 'ext': 'mp4', + 'title': '10 Days at Gillette', + 'description': 'md5:8cd9cd48fac16de596eadc0b24add951', + 'timestamp': 1442618809, + 'upload_date': '20150918', + }, }, { 'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood', 'only_matching': True, @@ -135,13 +146,14 @@ class NFLIE(InfoExtractor): webpage = self._download_webpage(url, video_id) config_url = NFLIE.prepend_host(host, self._search_regex( - r'(?:config|configURL)\s*:\s*"([^"]+)"', webpage, 'config URL', - default='static/content/static/config/video/config.json')) + r'(?:(?:config|configURL)\s*:\s*|]+data-config\s*=\s*)(["\'])(?P.+?)\1', + webpage, 'config URL', default='static/content/static/config/video/config.json', + group='config')) # For articles, the id in the url is not the video id video_id = self._search_regex( - r'contentId\s*:\s*"([^"]+)"', webpage, 'video id', default=video_id) - config = self._download_json(config_url, video_id, - note='Downloading player config') + r'(?:]+data-contentId\s*=\s*|contentId\s*:\s*)(["\'])(?P.+?)\1', + webpage, 'video id', default=video_id, group='id') + config = self._download_json(config_url, video_id, 'Downloading player config') url_template = NFLIE.prepend_host( host, '{contentURLTemplate:}'.format(**config)) video_data = self._download_json( From 82c06a40acb41df77ee55acf8979eb5f0cfba4c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 20 Sep 2015 23:54:05 +0600 Subject: [PATCH 10/97] Fix typo --- youtube_dl/extractor/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2529d8657..35d7f0dd2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -809,8 +809,8 @@ from .youtube import ( YoutubeChannelIE, YoutubeFavouritesIE, # disabled because it can wipe the watch history (see #6893) - # remember to uncumment test in test/test_all_urls when it's fixed - #YoutubeHistoryIE, + # remember to uncomment test in test/test_all_urls when it's fixed + YoutubeHistoryIE, YoutubePlaylistIE, YoutubeRecommendedIE, YoutubeSearchDateIE, From 9f5e8d16b3c7a1a2731cbab9661a920fbe04e09f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 21 Sep 2015 01:53:28 +0600 Subject: [PATCH 11/97] [youtube:history] Disable exractor --- youtube_dl/extractor/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 35d7f0dd2..030c00790 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -810,7 +810,7 @@ from .youtube import ( YoutubeFavouritesIE, # disabled because it can wipe the watch history (see #6893) # remember to uncomment test in test/test_all_urls when it's fixed - YoutubeHistoryIE, + #YoutubeHistoryIE, YoutubePlaylistIE, YoutubeRecommendedIE, YoutubeSearchDateIE, From c6aa838b51484ad3d036db21ba4e9e834720a693 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 21 Sep 2015 21:28:02 +0600 Subject: [PATCH 12/97] [youtube:history] Enable exractor --- test/test_all_urls.py | 2 +- youtube_dl/extractor/__init__.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index a929afd15..a9db42b30 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -99,7 +99,7 @@ class TestAllURLsMatching(unittest.TestCase): def test_keywords(self): self.assertMatch(':ytsubs', ['youtube:subscriptions']) self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) - # self.assertMatch(':ythistory', ['youtube:history']) + self.assertMatch(':ythistory', ['youtube:history']) self.assertMatch(':thedailyshow', ['ComedyCentralShows']) self.assertMatch(':tds', ['ComedyCentralShows']) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 030c00790..c16e9e3d2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -808,9 +808,7 @@ from .youtube import ( YoutubeIE, YoutubeChannelIE, YoutubeFavouritesIE, - # disabled because it can wipe the watch history (see #6893) - # remember to uncomment test in test/test_all_urls when it's fixed - #YoutubeHistoryIE, + YoutubeHistoryIE, YoutubePlaylistIE, YoutubeRecommendedIE, YoutubeSearchDateIE, From 92085e7099d3607ff512f241454d6f6f4535b05a Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 20 Sep 2015 22:26:23 +0100 Subject: [PATCH 13/97] [viewster] accept https links and fix api_token extraction and extract mp4 video link(fixes #6787) --- youtube_dl/extractor/viewster.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index cda02ba24..c68a38167 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -16,7 +16,7 @@ from ..utils import ( class ViewsterIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?viewster\.com/(?:serie|movie)/(?P\d+-\d+-\d+)' + _VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P\d+-\d+-\d+)' _TESTS = [{ # movie, Type=Movie 'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/', @@ -74,8 +74,8 @@ class ViewsterIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) # Get 'api_token' cookie - self._request_webpage(HEADRequest(url), video_id) - cookies = self._get_cookies(url) + self._request_webpage(HEADRequest('http://www.viewster.com/'), video_id) + cookies = self._get_cookies('http://www.viewster.com/') self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value) info = self._download_json( @@ -98,7 +98,7 @@ class ViewsterIE(InfoExtractor): return self.playlist_result(entries, video_id, title, description) formats = [] - for media_type in ('application/f4m+xml', 'application/x-mpegURL'): + for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'): media = self._download_json( 'https://public-api.viewster.com/movies/%s/video?mediaType=%s' % (entry_id, compat_urllib_parse.quote(media_type)), @@ -122,6 +122,8 @@ class ViewsterIE(InfoExtractor): else: formats.append({ 'url': video_url, + 'height': int_or_none(media.get('Height')), + 'width': int_or_none(media.get('Width')), }) self._sort_formats(formats) From 8e97596b7b62fb35ccfeb37c0ce5cb008b217d99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Sep 2015 21:47:56 +0600 Subject: [PATCH 14/97] [viewster] Extract height from bitrate and prefer mp4 videos --- youtube_dl/extractor/viewster.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index c68a38167..c6aa73c0e 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -120,11 +120,18 @@ class ViewsterIE(InfoExtractor): fatal=False # m3u8 sometimes fail )) else: - formats.append({ + format_id = media.get('Bitrate') + f = { 'url': video_url, + 'format_id': 'mp4-%s' % format_id, 'height': int_or_none(media.get('Height')), 'width': int_or_none(media.get('Width')), - }) + 'preference': 1, + } + if format_id and not f['height']: + f['height'] = int_or_none(self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None)) + formats.append(f) self._sort_formats(formats) synopsis = info.get('Synopsis', {}) From cb4e421901ce5fe26b457c33c141267c5c18a237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Sep 2015 21:49:29 +0600 Subject: [PATCH 15/97] [voewster] Update tests --- youtube_dl/extractor/viewster.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index c6aa73c0e..8defc1800 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -20,10 +20,10 @@ class ViewsterIE(InfoExtractor): _TESTS = [{ # movie, Type=Movie 'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/', - 'md5': '14d3cfffe66d57b41ae2d9c873416f01', + 'md5': 'e642d1b27fcf3a4ffa79f194f5adde36', 'info_dict': { 'id': '1140-11855-000', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'The listening Project', 'description': 'md5:bac720244afd1a8ea279864e67baa071', 'timestamp': 1214870400, @@ -33,10 +33,10 @@ class ViewsterIE(InfoExtractor): }, { # series episode, Type=Episode 'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/', - 'md5': 'd5434c80fcfdb61651cc2199a88d6ba3', + 'md5': '9243079a8531809efe1b089db102c069', 'info_dict': { 'id': '1284-19427-001', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'The World and a Wall', 'description': 'md5:24814cf74d3453fdf5bfef9716d073e3', 'timestamp': 1428192000, From c430802e32932868e24a6e43d0845df963320829 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Sep 2015 21:50:20 +0600 Subject: [PATCH 16/97] [extractor/common] Add raise_geo_restricted --- youtube_dl/extractor/common.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index d694e818e..1e7db8a9b 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -516,6 +516,12 @@ class InfoExtractor(object): '%s. Use --username and --password or --netrc to provide account credentials.' % msg, expected=True) + @staticmethod + def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'): + raise ExtractorError( + '%s. You might want to use --proxy to workaround.' % msg, + expected=True) + # Methods for following #608 @staticmethod def url_result(url, ie=None, video_id=None, video_title=None): From cccedc1aa43f94ce4f8fa6f807a5301250c2213c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Sep 2015 21:52:41 +0600 Subject: [PATCH 17/97] [voewster] Detect series geo restriction --- youtube_dl/extractor/viewster.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index 8defc1800..f8f4143c6 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -3,12 +3,14 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( + compat_HTTPError, compat_urllib_request, compat_urllib_parse, compat_urllib_parse_unquote, ) from ..utils import ( determine_ext, + ExtractorError, int_or_none, parse_iso8601, HEADRequest, @@ -86,9 +88,15 @@ class ViewsterIE(InfoExtractor): # unfinished serie has no Type if info.get('Type') in ['Serie', None]: - episodes = self._download_json( - 'https://public-api.viewster.com/series/%s/episodes' % entry_id, - video_id, 'Downloading series JSON') + try: + episodes = self._download_json( + 'https://public-api.viewster.com/series/%s/episodes' % entry_id, + video_id, 'Downloading series JSON') + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + self.raise_geo_restricted() + else: + raise entries = [ self.url_result( 'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster') From 9612f2339909f958f0c7ee7ccc370151ec28358d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Sep 2015 21:54:32 +0600 Subject: [PATCH 18/97] [viewster] Detect video geo restriction --- youtube_dl/extractor/viewster.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index f8f4143c6..7c4328efb 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -140,6 +140,10 @@ class ViewsterIE(InfoExtractor): f['height'] = int_or_none(self._search_regex( r'^(\d+)[pP]$', format_id, 'height', default=None)) formats.append(f) + + if not formats and not info.get('LanguageSets') and not info.get('VODSettings'): + self.raise_geo_restricted() + self._sort_formats(formats) synopsis = info.get('Synopsis', {}) From 7ce50a355c3a4cbd61f18fb16d8175a97f14d510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Sep 2015 21:55:04 +0600 Subject: [PATCH 19/97] [viewster] Add geo restricted tests --- youtube_dl/extractor/viewster.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index 7c4328efb..487f2d744 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -63,6 +63,14 @@ class ViewsterIE(InfoExtractor): 'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1', }, 'playlist_mincount': 16, + }, { + # geo restricted series + 'url': 'https://www.viewster.com/serie/1280-18794-002/', + 'only_matching': True, + }, { + # geo restricted video + 'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/', + 'only_matching': True, }] _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01' From d0fed4ac028baa40b18082fd813ed93cd658ebe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 22 Sep 2015 22:00:50 +0600 Subject: [PATCH 20/97] [viewster] Use tuple --- youtube_dl/extractor/viewster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py index 487f2d744..632e57fb4 100644 --- a/youtube_dl/extractor/viewster.py +++ b/youtube_dl/extractor/viewster.py @@ -95,7 +95,7 @@ class ViewsterIE(InfoExtractor): entry_id = info.get('Id') or info['id'] # unfinished serie has no Type - if info.get('Type') in ['Serie', None]: + if info.get('Type') in ('Serie', None): try: episodes = self._download_json( 'https://public-api.viewster.com/series/%s/episodes' % entry_id, From de3fc356e1597b31c8f0a55a7bf7e201ae436c66 Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 21 Sep 2015 14:01:12 +0100 Subject: [PATCH 21/97] [ninegag] fix _VALID_URL regex and handle the use of other external providers --- youtube_dl/extractor/ninegag.py | 43 ++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index 7f842b5c2..eee65873a 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -9,17 +9,12 @@ from ..utils import str_to_int class NineGagIE(InfoExtractor): IE_NAME = '9gag' - _VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/ - (?: - v/(?P[0-9]+)| - p/(?P[a-zA-Z0-9]+)/(?P[^?#/]+) - ) - ''' + _VALID_URL = r'https?://(?:www\.)?9gag\.com/tv/p/(?P[a-zA-Z0-9]+)/(?P[^?#/]+)' _TESTS = [{ - "url": "http://9gag.tv/v/1912", + "url": "http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome", "info_dict": { - "id": "1912", + "id": "Kk2X5", "ext": "mp4", "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", "title": "\"People Are Awesome 2013\" Is Absolutely Awesome", @@ -31,7 +26,7 @@ class NineGagIE(InfoExtractor): }, 'add_ie': ['Youtube'] }, { - 'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar', + 'url': 'http://9gag.com/tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar', 'info_dict': { 'id': 'KklwM', 'ext': 'mp4', @@ -42,19 +37,39 @@ class NineGagIE(InfoExtractor): 'upload_date': '20140401', 'uploader_id': 'krishnashenoi93', }, + 'add_ie': ['Youtube'] }] + _EXTERNAL_VIDEO_PROVIDER = { + '1': { + 'url': '%s', + 'ie_key': 'Youtube', + }, + '2': { + 'url': 'http://player.vimeo.com/video/%s', + 'ie_key': 'Vimeo', + }, + '3': { + 'url': 'http://instagram.com/p/%s', + 'ie_key': 'Instagram', + }, + '4': { + 'url': 'http://vine.co/v/%s', + 'ie_key': 'Vine', + }, + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('numid') or mobj.group('id') - display_id = mobj.group('display_id') or video_id + video_id = mobj.group('id') + display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id) post_view = json.loads(self._html_search_regex( r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view')) - youtube_id = post_view['videoExternalId'] + external_video_id = post_view['videoExternalId'] + external_video_provider = post_view['videoExternalProvider'] title = post_view['title'] description = post_view['description'] view_count = str_to_int(post_view['externalView']) @@ -62,8 +77,8 @@ class NineGagIE(InfoExtractor): return { '_type': 'url_transparent', - 'url': youtube_id, - 'ie_key': 'Youtube', + 'url': self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id, + 'ie_key': self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key'], 'id': video_id, 'display_id': display_id, 'title': title, From 6b8ce312e3cb7d81e949cd5c64ad2a824d27830b Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 22 Sep 2015 19:20:18 +0100 Subject: [PATCH 22/97] [ninegag] extract source url --- youtube_dl/extractor/ninegag.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index eee65873a..00bf98ef5 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -68,8 +68,13 @@ class NineGagIE(InfoExtractor): post_view = json.loads(self._html_search_regex( r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view')) - external_video_id = post_view['videoExternalId'] - external_video_provider = post_view['videoExternalProvider'] + ie_key = None + source_url = post_view.get('sourceUrl') + if not source_url or source_url == '': + external_video_id = post_view['videoExternalId'] + external_video_provider = post_view['videoExternalProvider'] + source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id + ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key'] title = post_view['title'] description = post_view['description'] view_count = str_to_int(post_view['externalView']) @@ -77,8 +82,8 @@ class NineGagIE(InfoExtractor): return { '_type': 'url_transparent', - 'url': self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id, - 'ie_key': self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key'], + 'url': source_url, + 'ie_key': ie_key, 'id': video_id, 'display_id': display_id, 'title': title, From da9f18083596d0132d12652acd0bd8983c70c058 Mon Sep 17 00:00:00 2001 From: remitamine Date: Tue, 22 Sep 2015 20:28:00 +0100 Subject: [PATCH 23/97] [ninegag] remove unnecessary condition --- youtube_dl/extractor/ninegag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index 00bf98ef5..0a2725c65 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -70,7 +70,7 @@ class NineGagIE(InfoExtractor): ie_key = None source_url = post_view.get('sourceUrl') - if not source_url or source_url == '': + if not source_url: external_video_id = post_view['videoExternalId'] external_video_provider = post_view['videoExternalProvider'] source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id From e28c794699596912092635014b041d0af888fd08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Sep 2015 01:40:06 +0600 Subject: [PATCH 24/97] [9gag] Make display_id optional --- youtube_dl/extractor/ninegag.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index 0a2725c65..6103c7517 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -9,7 +9,7 @@ from ..utils import str_to_int class NineGagIE(InfoExtractor): IE_NAME = '9gag' - _VALID_URL = r'https?://(?:www\.)?9gag\.com/tv/p/(?P[a-zA-Z0-9]+)/(?P[^?#/]+)' + _VALID_URL = r'https?://(?:www\.)?9gag\.com/tv/p/(?P[a-zA-Z0-9]+)(?:/(?P[^?#/]+))?' _TESTS = [{ "url": "http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome", @@ -61,7 +61,7 @@ class NineGagIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - display_id = mobj.group('display_id') + display_id = mobj.group('display_id') or video_id webpage = self._download_webpage(url, display_id) From c3a4e2ec40f9a90d4ae8991e6764641c744ec8fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Sep 2015 01:41:44 +0600 Subject: [PATCH 25/97] [9gag] Remove redundant test --- youtube_dl/extractor/ninegag.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index 6103c7517..34879d1c8 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -26,18 +26,8 @@ class NineGagIE(InfoExtractor): }, 'add_ie': ['Youtube'] }, { - 'url': 'http://9gag.com/tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar', - 'info_dict': { - 'id': 'KklwM', - 'ext': 'mp4', - 'display_id': 'alternate-banned-opening-scene-of-gravity', - "description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.", - 'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie", - 'uploader': 'Krishna Shenoi', - 'upload_date': '20140401', - 'uploader_id': 'krishnashenoi93', - }, - 'add_ie': ['Youtube'] + 'url': 'http://9gag.com/tv/p/KklwM', + 'only_matching': True, }] _EXTERNAL_VIDEO_PROVIDER = { '1': { From 6400f8ec0f20011a7e39da62e0a3e55e0fd2759a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Sep 2015 01:43:26 +0600 Subject: [PATCH 26/97] [9gag] Allow old .tv domain There are still references to it in webpage's source --- youtube_dl/extractor/ninegag.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index 34879d1c8..d157b0d10 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -9,7 +9,7 @@ from ..utils import str_to_int class NineGagIE(InfoExtractor): IE_NAME = '9gag' - _VALID_URL = r'https?://(?:www\.)?9gag\.com/tv/p/(?P[a-zA-Z0-9]+)(?:/(?P[^?#/]+))?' + _VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/p/(?P[a-zA-Z0-9]+)(?:/(?P[^?#/]+))?' _TESTS = [{ "url": "http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome", @@ -28,6 +28,9 @@ class NineGagIE(InfoExtractor): }, { 'url': 'http://9gag.com/tv/p/KklwM', 'only_matching': True, + }, { + 'url': 'http://9gag.tv/p/Kk2X5', + 'only_matching': True, }] _EXTERNAL_VIDEO_PROVIDER = { '1': { From 5600e214c3b63e3e2a0862bea230026c02073d7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Sep 2015 01:44:38 +0600 Subject: [PATCH 27/97] [9gag] Make post view regex more robust --- youtube_dl/extractor/ninegag.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index d157b0d10..692cc3368 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -59,7 +59,8 @@ class NineGagIE(InfoExtractor): webpage = self._download_webpage(url, display_id) post_view = json.loads(self._html_search_regex( - r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view')) + r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost', + webpage, 'post view')) ie_key = None source_url = post_view.get('sourceUrl') From 8ca2e93e1ab3485946df19d56a87e56699b2a712 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Sep 2015 01:46:40 +0600 Subject: [PATCH 28/97] [9gag] Relax optional fields --- youtube_dl/extractor/ninegag.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index 692cc3368..6daae7318 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -70,8 +70,8 @@ class NineGagIE(InfoExtractor): source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key'] title = post_view['title'] - description = post_view['description'] - view_count = str_to_int(post_view['externalView']) + description = post_view.get('description') + view_count = str_to_int(post_view.get('externalView')) thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w') return { From c659022b5cabebaba3275df2d0f4ae97b468eac8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Sep 2015 01:48:13 +0600 Subject: [PATCH 29/97] [9gag] Modernize --- youtube_dl/extractor/ninegag.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index 6daae7318..d8e103189 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -58,9 +58,11 @@ class NineGagIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - post_view = json.loads(self._html_search_regex( - r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost', - webpage, 'post view')) + post_view = self._parse_json( + self._search_regex( + r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost', + webpage, 'post view'), + display_id) ie_key = None source_url = post_view.get('sourceUrl') From 8ea6bd2802a0dbb860187bb5aaf687585ff7c1c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Sep 2015 01:55:16 +0600 Subject: [PATCH 30/97] [9gag] Add vimeo test --- youtube_dl/extractor/ninegag.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index d8e103189..0ef6a6c65 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..utils import str_to_int @@ -22,9 +21,21 @@ class NineGagIE(InfoExtractor): 'uploader': 'CompilationChannel', 'upload_date': '20131110', "view_count": int, - "thumbnail": "re:^https?://", }, - 'add_ie': ['Youtube'] + 'add_ie': ['Youtube'], + }, { + 'url': 'http://9gag.com/tv/p/aKolP3', + 'info_dict': { + 'id': 'aKolP3', + 'ext': 'mp4', + 'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video', + 'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!", + 'uploader_id': 'rickmereki', + 'uploader': 'Rick Mereki', + 'upload_date': '20110803', + 'view_count': int, + }, + 'add_ie': ['Vimeo'], }, { 'url': 'http://9gag.com/tv/p/KklwM', 'only_matching': True, From d8fef8faacad4f3b9d13e20df4ee9344ada3c68d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Sep 2015 01:56:26 +0600 Subject: [PATCH 31/97] [9gag] Quotes consistency --- youtube_dl/extractor/ninegag.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index 0ef6a6c65..4adff197e 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -11,16 +11,16 @@ class NineGagIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/p/(?P[a-zA-Z0-9]+)(?:/(?P[^?#/]+))?' _TESTS = [{ - "url": "http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome", - "info_dict": { - "id": "Kk2X5", - "ext": "mp4", - "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)", - "title": "\"People Are Awesome 2013\" Is Absolutely Awesome", + 'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome', + 'info_dict': { + 'id': 'Kk2X5', + 'ext': 'mp4', + 'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)', + 'title': '\"People Are Awesome 2013\" Is Absolutely Awesome', 'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA', 'uploader': 'CompilationChannel', 'upload_date': '20131110', - "view_count": int, + 'view_count': int, }, 'add_ie': ['Youtube'], }, { From 78f9fb902b36c2a12dfe50d9724ac470b03c87d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Sep 2015 01:58:47 +0600 Subject: [PATCH 32/97] [9gag] Support embed URLs --- youtube_dl/extractor/ninegag.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index 4adff197e..a06d38afd 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -8,7 +8,7 @@ from ..utils import str_to_int class NineGagIE(InfoExtractor): IE_NAME = '9gag' - _VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/p/(?P[a-zA-Z0-9]+)(?:/(?P[^?#/]+))?' + _VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P[a-zA-Z0-9]+)(?:/(?P[^?#/]+))?' _TESTS = [{ 'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome', @@ -42,7 +42,11 @@ class NineGagIE(InfoExtractor): }, { 'url': 'http://9gag.tv/p/Kk2X5', 'only_matching': True, + }, { + 'url': 'http://9gag.com/tv/embed/a5Dmvl', + 'only_matching': True, }] + _EXTERNAL_VIDEO_PROVIDER = { '1': { 'url': '%s', From b942db3dc3d51db0f24ac98ada861d8e2b3451db Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 22 Sep 2015 22:41:53 +0200 Subject: [PATCH 33/97] release 2015.09.22 --- docs/supportedsites.md | 17 ++++++++++------- youtube_dl/version.py | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 66091e6be..ab153af6b 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -122,7 +122,6 @@ - **defense.gouv.fr** - **DHM**: Filmarchiv - Deutsches Historisches Museum - **Discovery** - - **divxstage**: DivxStage - **Dotsub** - **DouyuTV**: 斗鱼 - **dramafever** @@ -286,7 +285,7 @@ - **Minhateca** - **MinistryGrid** - **miomio.tv** - - **mitele.es** + - **MiTele**: mitele.es - **mixcloud** - **MLB** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net @@ -317,7 +316,6 @@ - **Myvi** - **myvideo** - **MyVidster** - - **N-JOY** - **n-tv.de** - **NationalGeographic** - **Naver** @@ -326,7 +324,9 @@ - **NBCNews** - **NBCSports** - **NBCSportsVPlayer** - - **ndr**: NDR.de - Mediathek + - **ndr**: NDR.de - Norddeutscher Rundfunk + - **ndr:embed** + - **ndr:embed:base** - **NDTV** - **NerdCubedFeed** - **Nerdist** @@ -349,12 +349,16 @@ - **nhl.com:videocenter**: NHL videocenter category - **niconico**: ニコニコ動画 - **NiconicoPlaylist** + - **njoy**: N-JOY + - **njoy:embed** - **Noco** - **Normalboots** - **NosVideo** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **novamov**: NovaMov - - **Nowness** + - **nowness** + - **nowness:playlist** + - **nowness:series** - **NowTV** - **nowvideo**: NowVideo - **npo**: npo.nl and ntr.nl @@ -375,7 +379,6 @@ - **OnionStudios** - **Ooyala** - **OoyalaExternal** - - **OpenFilm** - **orf:fm4**: radio FM4 - **orf:iptv**: iptv.ORF.at - **orf:oe1**: Radio Österreich 1 @@ -530,7 +533,7 @@ - **techtv.mit.edu** - **ted** - **TeleBruxelles** - - **telecinco.es** + - **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telegraaf** - **TeleMB** - **TeleTask** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0cc7411f2..7ef4f2755 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.09.09' +__version__ = '2015.09.22' From f1028194636e7acafb29fb38244cc7b1347d9313 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Sep 2015 02:46:24 +0600 Subject: [PATCH 34/97] [downloader/hls] Pass http headers to downloader --- youtube_dl/downloader/hls.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 7743e176a..a62d2047b 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -28,9 +28,18 @@ class HlsFD(FileDownloader): return False ffpp.check_version() - args = [ - encodeArgument(opt) - for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] + args = [ffpp.executable, '-y'] + + if info_dict['http_headers']: + # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: + # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. + args += [ + '-headers', + ''.join('%s: %s\r\n' % (key, val) for key, val in info_dict['http_headers'].items())] + + args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc'] + + args = [encodeArgument(opt) for opt in args] args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) self._debug_cmd(args) From eb11cbe8674705647d6bd2947a44e08543663633 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 23 Sep 2015 19:54:40 +0600 Subject: [PATCH 35/97] [soundcloud] Update client id (Closes #6930) --- youtube_dl/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index ed5dcc0d3..2b60d354a 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -113,7 +113,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28' + _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' def report_resolve(self, video_id): From 57565375c85ff6adb11beb961fd61d6bdd023ec1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 23 Sep 2015 22:22:04 +0800 Subject: [PATCH 36/97] [iqiyi] Fix extraction (fixes #6878) --- youtube_dl/extractor/iqiyi.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 393e67e35..2ce6627a4 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -95,6 +95,10 @@ class IqiyiIE(InfoExtractor): ('10', 'h1'), ] + @staticmethod + def md5_text(text): + return hashlib.md5(text.encode('utf-8')).hexdigest() + def construct_video_urls(self, data, video_id, _uuid): def do_xor(x, y): a = y % 3 @@ -179,6 +183,7 @@ class IqiyiIE(InfoExtractor): def get_raw_data(self, tvid, video_id, enc_key, _uuid): tm = str(int(time.time())) + tail = tm + tvid param = { 'key': 'fvip', 'src': hashlib.md5(b'youtube-dl').hexdigest(), @@ -186,13 +191,11 @@ class IqiyiIE(InfoExtractor): 'vid': video_id, 'vinfo': 1, 'tm': tm, - 'enc': hashlib.md5( - (enc_key + tm + tvid).encode('utf8')).hexdigest(), + 'enc': self.md5_text((enc_key + tail)[1:64:2] + tail), 'qyid': _uuid, 'tn': random.random(), 'um': 0, - 'authkey': hashlib.md5( - (tm + tvid).encode('utf8')).hexdigest() + 'authkey': self.md5_text(self.md5_text('') + tail), } api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ @@ -201,7 +204,8 @@ class IqiyiIE(InfoExtractor): return raw_data def get_enc_key(self, swf_url, video_id): - enc_key = '3601ba290e4f4662848c710e2122007e' # last update at 2015-08-10 for Zombie + # TODO: automatic key extraction + enc_key = 'eac64f22daf001da6ba9aa8da4d501508bbe90a4d4091fea3b0582a85b38c2cc' # last update at 2015-09-23-23 for Zombie::bite return enc_key def _real_extract(self, url): From 19f93d906e29e9a505d4bf5d286d75224c342c37 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 23 Sep 2015 22:25:16 +0800 Subject: [PATCH 37/97] [iqiyi] Use md5_text for all MD5 calls --- youtube_dl/extractor/iqiyi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 2ce6627a4..ce1ab3820 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -125,7 +125,7 @@ class IqiyiIE(InfoExtractor): note='Download path key of segment %d for format %s' % (segment_index + 1, format_id) )['t'] t = str(int(math.floor(int(tm) / (600.0)))) - return hashlib.md5((t + mg + x).encode('utf8')).hexdigest() + return self.md5_text(t + mg + x) video_urls_dict = {} for format_item in data['vp']['tkl'][0]['vs']: @@ -186,7 +186,7 @@ class IqiyiIE(InfoExtractor): tail = tm + tvid param = { 'key': 'fvip', - 'src': hashlib.md5(b'youtube-dl').hexdigest(), + 'src': self.md5_text('youtube-dl'), 'tvId': tvid, 'vid': video_id, 'vinfo': 1, From 4395ca2e04f38c110259270fa4dfc4d9814aa926 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Sep 2015 19:56:54 +0600 Subject: [PATCH 38/97] [xhamster] Fix title extraction (Closes #6944) --- youtube_dl/extractor/xhamster.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 97315750f..f12fe13b1 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -63,7 +63,9 @@ class XHamsterIE(InfoExtractor): mrss_url = '%s://xhamster.com/movies/%s/%s.html' % (proto, video_id, seo) webpage = self._download_webpage(mrss_url, video_id) - title = self._html_search_regex(r'(?P<title>.+?) - xHamster\.com', webpage, 'title') + title = self._html_search_regex( + [r'(?P<title>.+?)(?:, Free Porn: xHamster| - xHamster\.com)', + r'

([^<]+)

'], webpage, 'title') # Only a few videos have an description mobj = re.search(r'Description: ([^<]+)', webpage) From 05b476a27087a45b8d418607123d8db62bf1770f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Sep 2015 23:38:53 +0600 Subject: [PATCH 39/97] [vidme] Prefer non clip (Closes #6924) --- youtube_dl/extractor/vidme.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py index 9a794e609..078d283b2 100644 --- a/youtube_dl/extractor/vidme.py +++ b/youtube_dl/extractor/vidme.py @@ -119,6 +119,7 @@ class VidmeIE(InfoExtractor): 'url': f['uri'], 'width': int_or_none(f.get('width')), 'height': int_or_none(f.get('height')), + 'preference': 0 if f.get('type', '').endswith('clip') else 1, } for f in video.get('formats', []) if f.get('uri')] self._sort_formats(formats) From 9fbd4b35a27a83055c3e170ab32f2b3e56f9616e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Sep 2015 23:48:23 +0600 Subject: [PATCH 40/97] [nhl] Add support for embedded URLs --- youtube_dl/extractor/nhl.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 279b18386..970a11f7c 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -72,7 +72,7 @@ class NHLBaseInfoExtractor(InfoExtractor): class NHLIE(NHLBaseInfoExtractor): IE_NAME = 'nhl.com' - _VALID_URL = r'https?://video(?P\.[^.]*)?\.nhl\.com/videocenter/(?:console)?(?:\?(?:.*?[?&])?)(?:id|hlg)=(?P[-0-9a-zA-Z,]+)' + _VALID_URL = r'https?://video(?P\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P[-0-9a-zA-Z,]+)' _TESTS = [{ 'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614', @@ -136,6 +136,9 @@ class NHLIE(NHLBaseInfoExtractor): 'params': { 'skip_download': True, # Requires rtmpdump } + }, { + 'url': 'http://video.nhl.com/videocenter/embed?playlist=836127', + 'only_matching': True, }] def _real_extract(self, url): From 9c58885c70af75655288933220a99b4c4215ab4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 24 Sep 2015 23:54:16 +0600 Subject: [PATCH 41/97] [nhl:news] Add support for iframe embeds (Closes #6941) --- youtube_dl/extractor/nhl.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py index 970a11f7c..e98a5ef89 100644 --- a/youtube_dl/extractor/nhl.py +++ b/youtube_dl/extractor/nhl.py @@ -149,9 +149,9 @@ class NHLIE(NHLBaseInfoExtractor): class NHLNewsIE(NHLBaseInfoExtractor): IE_NAME = 'nhl.com:news' IE_DESC = 'NHL news' - _VALID_URL = r'https?://(?:www\.)?nhl\.com/ice/news\.html?(?:\?(?:.*?[?&])?)id=(?P[-0-9a-zA-Z]+)' + _VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P[-0-9a-zA-Z]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.nhl.com/ice/news.htm?id=750727', 'md5': '4b3d1262e177687a3009937bd9ec0be8', 'info_dict': { @@ -162,13 +162,26 @@ class NHLNewsIE(NHLBaseInfoExtractor): 'duration': 37, 'upload_date': '20150128', }, - } + }, { + # iframe embed + 'url': 'http://sabres.nhl.com/club/news.htm?id=780189', + 'md5': '9f663d1c006c90ac9fb82777d4294e12', + 'info_dict': { + 'id': '836127', + 'ext': 'mp4', + 'title': 'Morning Skate: OTT vs. BUF (9/23/15)', + 'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.", + 'duration': 93, + 'upload_date': '20150923', + }, + }] def _real_extract(self, url): news_id = self._match_id(url) webpage = self._download_webpage(url, news_id) video_id = self._search_regex( - [r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'"], + [r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'", + r']+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'], webpage, 'video id') return self._real_extract_video(video_id) From 47024eb564fd4047e362680f0a68304d1df79495 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 24 Sep 2015 19:49:10 +0100 Subject: [PATCH 42/97] [hostingbulk] remove extractor --- youtube_dl/extractor/__init__.py | 1 - youtube_dl/extractor/hostingbulk.py | 80 ----------------------------- 2 files changed, 81 deletions(-) delete mode 100644 youtube_dl/extractor/hostingbulk.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c16e9e3d2..7272859db 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -228,7 +228,6 @@ from .historicfilms import HistoricFilmsIE from .history import HistoryIE from .hitbox import HitboxIE, HitboxLiveIE from .hornbunny import HornBunnyIE -from .hostingbulk import HostingBulkIE from .hotnewhiphop import HotNewHipHopIE from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py deleted file mode 100644 index a3154cfde..000000000 --- a/youtube_dl/extractor/hostingbulk.py +++ /dev/null @@ -1,80 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..compat import ( - compat_urllib_request, -) -from ..utils import ( - ExtractorError, - int_or_none, - urlencode_postdata, -) - - -class HostingBulkIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?://(?:www\.)?hostingbulk\.com/ - (?:embed-)?(?P[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html''' - _FILE_DELETED_REGEX = r'File Not Found' - _TEST = { - 'url': 'http://hostingbulk.com/n0ulw1hv20fm.html', - 'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f', - 'info_dict': { - 'id': 'n0ulw1hv20fm', - 'ext': 'mp4', - 'title': 'md5:5afeba33f48ec87219c269e054afd622', - 'filesize': 6816081, - 'thumbnail': 're:^http://.*\.jpg$', - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - url = 'http://hostingbulk.com/{0:}.html'.format(video_id) - - # Custom request with cookie to set language to English, so our file - # deleted regex would work. - request = compat_urllib_request.Request( - url, headers={'Cookie': 'lang=english'}) - webpage = self._download_webpage(request, video_id) - - if re.search(self._FILE_DELETED_REGEX, webpage) is not None: - raise ExtractorError('Video %s does not exist' % video_id, - expected=True) - - title = self._html_search_regex(r'

(.*?)

', webpage, 'title') - filesize = int_or_none( - self._search_regex( - r'\((\d+)\sbytes?\)', - webpage, - 'filesize', - fatal=False - ) - ) - thumbnail = self._search_regex( - r' Date: Thu, 24 Sep 2015 23:05:32 +0200 Subject: [PATCH 43/97] More title extraction fixing. --- youtube_dl/extractor/xhamster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index f12fe13b1..8938c0e45 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -64,7 +64,7 @@ class XHamsterIE(InfoExtractor): webpage = self._download_webpage(mrss_url, video_id) title = self._html_search_regex( - [r'(?P<title>.+?)(?:, Free Porn: xHamster| - xHamster\.com)', + [r'(?P<title>.+?)(?:, (?:[^,]+? )?Porn: xHamster| - xHamster\.com)', r'

([^<]+)

'], webpage, 'title') # Only a few videos have an description From 857421024daf810e92036149cc02bcf1c337da5c Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 24 Sep 2015 21:55:44 +0100 Subject: [PATCH 44/97] [iconosquare] fix info extraction --- youtube_dl/extractor/iconosquare.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/iconosquare.py b/youtube_dl/extractor/iconosquare.py index 70e4c0d41..4fff8c0b3 100644 --- a/youtube_dl/extractor/iconosquare.py +++ b/youtube_dl/extractor/iconosquare.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + get_element_by_id, +) class IconosquareIE(InfoExtractor): @@ -12,7 +15,7 @@ class IconosquareIE(InfoExtractor): 'info_dict': { 'id': '522207370455279102_24101272', 'ext': 'mp4', - 'title': 'Instagram media by @aguynamedpatrick (Patrick Janelle)', + 'title': 'A little over a year ago, I posted my first #dailycortado, a drink introduced to...', 'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d', 'timestamp': 1376471991, 'upload_date': '20130814', @@ -29,8 +32,7 @@ class IconosquareIE(InfoExtractor): webpage = self._download_webpage(url, video_id) media = self._parse_json( - self._search_regex( - r'window\.media\s*=\s*({.+?});\n', webpage, 'media'), + get_element_by_id('mediaJson', webpage), video_id) formats = [{ @@ -42,7 +44,7 @@ class IconosquareIE(InfoExtractor): self._sort_formats(formats) title = self._html_search_regex( - r'(.+?)(?: *\(Videos?\))? \| (?:Iconosquare|Statigram)', + r'(.+?)', webpage, 'title') timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time')) From d4364f30bd39528b4da487799380737c330e88c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 25 Sep 2015 04:44:52 +0600 Subject: [PATCH 45/97] [iconosquare] Revert title (Closes #6954) --- youtube_dl/extractor/iconosquare.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/iconosquare.py b/youtube_dl/extractor/iconosquare.py index 4fff8c0b3..bb69c7a0f 100644 --- a/youtube_dl/extractor/iconosquare.py +++ b/youtube_dl/extractor/iconosquare.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( int_or_none, get_element_by_id, + remove_end, ) @@ -15,7 +16,7 @@ class IconosquareIE(InfoExtractor): 'info_dict': { 'id': '522207370455279102_24101272', 'ext': 'mp4', - 'title': 'A little over a year ago, I posted my first #dailycortado, a drink introduced to...', + 'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)', 'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d', 'timestamp': 1376471991, 'upload_date': '20130814', @@ -43,9 +44,7 @@ class IconosquareIE(InfoExtractor): } for format_id, f in media['videos'].items()] self._sort_formats(formats) - title = self._html_search_regex( - r'(.+?)', - webpage, 'title') + title = remove_end(self._og_search_title(webpage), ' - via Iconosquare') timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time')) description = media.get('caption', {}).get('text') From 9b166fc1f8039fe3f3632c40848ce590ade9f3fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 25 Sep 2015 04:45:31 +0600 Subject: [PATCH 46/97] [iconosquare] Extract comments --- youtube_dl/extractor/iconosquare.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/iconosquare.py b/youtube_dl/extractor/iconosquare.py index bb69c7a0f..a39f422e9 100644 --- a/youtube_dl/extractor/iconosquare.py +++ b/youtube_dl/extractor/iconosquare.py @@ -62,6 +62,14 @@ class IconosquareIE(InfoExtractor): 'height': int_or_none(t.get('height')) } for thumbnail_id, t in media.get('images', {}).items()] + comments = [{ + 'id': comment.get('id'), + 'text': comment['text'], + 'timestamp': int_or_none(comment.get('created_time')), + 'author': comment.get('from', {}).get('full_name'), + 'author_id': comment.get('from', {}).get('username'), + } for comment in media.get('comments', {}).get('data', []) if 'text' in comment] + return { 'id': video_id, 'title': title, @@ -73,4 +81,5 @@ class IconosquareIE(InfoExtractor): 'comment_count': comment_count, 'like_count': like_count, 'formats': formats, + 'comments': comments, } From 882fc9052e310b5ac6675488bba767c43ca2185e Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 23 Sep 2015 17:48:39 +0100 Subject: [PATCH 47/97] [condenast] fix extraction and add support for other sites --- youtube_dl/extractor/condenast.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 3db4db4e4..22c66da26 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..compat import ( @@ -24,13 +23,25 @@ class CondeNastIE(InfoExtractor): # The keys are the supported sites and the values are the name to be shown # to the user and in the extractor description. _SITES = { - 'wired': 'WIRED', - 'gq': 'GQ', - 'vogue': 'Vogue', - 'glamour': 'Glamour', - 'wmagazine': 'W Magazine', - 'vanityfair': 'Vanity Fair', + 'allure': 'Allure', + 'architecturaldigest': 'Architectural Digest', + 'arstechnica': 'Ars Technica', + 'bonappetit': 'Bon Appetit', + 'brides': 'Brides', 'cnevids': 'Condé Nast', + 'cntraveler': 'Condé Nast Traveler', + 'details': 'Details', + 'epicurious': 'Epicurious', + 'glamour': 'Glamour', + 'golfdigest': 'Golf Digest', + 'gq': 'GQ', + 'newyorker': 'The New Yorker', + 'self': 'SELF', + 'teenvogue': 'Teen Vogue', + 'vanityfair': 'Vanity Fair', + 'vogue': 'Vogue', + 'wired': 'WIRED', + 'wmagazine': 'W Magazine', } _VALID_URL = r'http://(video|www|player)\.(?P%s)\.com/(?Pwatch|series|video|embed)/(?P[^/?#]+)' % '|'.join(_SITES.keys()) @@ -86,8 +97,8 @@ class CondeNastIE(InfoExtractor): info_url = base_info_url + data info_page = self._download_webpage(info_url, video_id, 'Downloading video info') - video_info = self._search_regex(r'var video = ({.+?});', info_page, 'video info') - video_info = json.loads(video_info) + video_info = self._search_regex(r'var\s*video\s*=\s*({.+?});', info_page, 'video info') + video_info = self._parse_json(video_info, video_id) formats = [{ 'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']), From 2949a6cda9c39b3ff32891bdf0d6b48c46973f82 Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 24 Sep 2015 15:54:23 +0100 Subject: [PATCH 48/97] [condenast] fix video info regex --- youtube_dl/extractor/condenast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index 22c66da26..d1380ac8d 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -97,7 +97,7 @@ class CondeNastIE(InfoExtractor): info_url = base_info_url + data info_page = self._download_webpage(info_url, video_id, 'Downloading video info') - video_info = self._search_regex(r'var\s*video\s*=\s*({.+?});', info_page, 'video info') + video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info') video_info = self._parse_json(video_info, video_id) formats = [{ From 42ca72dff38c6cb23724dd91b39550e805bd8d25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 25 Sep 2015 05:15:21 +0600 Subject: [PATCH 49/97] [condenast] Keep acute accent --- youtube_dl/extractor/condenast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index d1380ac8d..ef1d28091 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -26,7 +26,7 @@ class CondeNastIE(InfoExtractor): 'allure': 'Allure', 'architecturaldigest': 'Architectural Digest', 'arstechnica': 'Ars Technica', - 'bonappetit': 'Bon Appetit', + 'bonappetit': 'Bon Appétit', 'brides': 'Brides', 'cnevids': 'Condé Nast', 'cntraveler': 'Condé Nast Traveler', From 0940c5b4c6a068d4919fd29a8db2a85ab3bbf703 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 25 Sep 2015 05:18:45 +0600 Subject: [PATCH 50/97] [condenast] Do not capture unused group in _VALID_URL --- youtube_dl/extractor/condenast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index ef1d28091..d6949ca28 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -44,7 +44,7 @@ class CondeNastIE(InfoExtractor): 'wmagazine': 'W Magazine', } - _VALID_URL = r'http://(video|www|player)\.(?P%s)\.com/(?Pwatch|series|video|embed)/(?P[^/?#]+)' % '|'.join(_SITES.keys()) + _VALID_URL = r'http://(?:video|www|player)\.(?P%s)\.com/(?Pwatch|series|video|embed)/(?P[^/?#]+)' % '|'.join(_SITES.keys()) IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) EMBED_URL = r'(?:https?:)?//player\.(?P%s)\.com/(?Pembed)/.+?' % '|'.join(_SITES.keys()) From 2e40a12225fbe64b84b3975b3063a676df0f4522 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 25 Sep 2015 17:24:35 +0800 Subject: [PATCH 51/97] [fktv] Correct spellings --- youtube_dl/extractor/fktv.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index c2aa23aa2..93c4fd641 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -28,9 +28,9 @@ class FKTVIE(InfoExtractor): webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/play' % episode, episode) title = clean_html(self._html_search_regex('

([^<]+?)

', webpage, 'title')) - matchs = re.search(r'(?s)]*poster="([^"]+)"[^>]*>(.*?)', webpage) - if matchs: - poster, sources = matchs.groups() + matches = re.search(r'(?s)]*poster="([^"]+)"[^>]*>(.*?)', webpage) + if matches: + poster, sources = matches.groups() urls = re.findall(r'(?s)]*src="([^"]+)"[^>]*>', sources) if sources: formats = [{'url': url, 'format_id': determine_ext(url)} for url in urls] From 8ddf48d59f2d04b7411202eb6bf02c6eaa387035 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 25 Sep 2015 17:48:51 +0800 Subject: [PATCH 52/97] [fktv] Raise an error is no videos found --- youtube_dl/extractor/fktv.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index 93c4fd641..74c6cf866 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..utils import ( clean_html, determine_ext, + ExtractorError, ) @@ -29,14 +30,15 @@ class FKTVIE(InfoExtractor): webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/play' % episode, episode) title = clean_html(self._html_search_regex('

([^<]+?)

', webpage, 'title')) matches = re.search(r'(?s)]*poster="([^"]+)"[^>]*>(.*?)', webpage) - if matches: - poster, sources = matches.groups() - urls = re.findall(r'(?s)]*src="([^"]+)"[^>]*>', sources) - if sources: - formats = [{'url': url, 'format_id': determine_ext(url)} for url in urls] - return { - 'id': episode, - 'title': title, - 'formats': formats, - 'thumbnail': poster, - } + if matches is None: + raise ExtractorError('Unable to extract the video') + + poster, sources = matches.groups() + urls = re.findall(r'(?s)]*src="([^"]+)"[^>]*>', sources) + formats = [{'url': url, 'format_id': determine_ext(url)} for url in urls] + return { + 'id': episode, + 'title': title, + 'formats': formats, + 'thumbnail': poster, + } From 140359fc2cfe7e9cbfecfd2fd625c6407232fe0f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 25 Sep 2015 17:51:48 +0800 Subject: [PATCH 53/97] [fktv] Correct and improve some regexs --- youtube_dl/extractor/fktv.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index 74c6cf866..b081eb535 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -28,13 +28,13 @@ class FKTVIE(InfoExtractor): episode = self._match_id(url) webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/play' % episode, episode) - title = clean_html(self._html_search_regex('

([^<]+?)

', webpage, 'title')) - matches = re.search(r'(?s)]*poster="([^"]+)"[^>]*>(.*?)', webpage) + title = clean_html(self._html_search_regex('

([^<]+)

', webpage, 'title')) + matches = re.search(r'(?s)]+poster="([^"]+)"[^>]*>(.*)', webpage) if matches is None: raise ExtractorError('Unable to extract the video') poster, sources = matches.groups() - urls = re.findall(r'(?s)]*src="([^"]+)"[^>]*>', sources) + urls = re.findall(r']+src="([^"]+)"', sources) formats = [{'url': url, 'format_id': determine_ext(url)} for url in urls] return { 'id': episode, From 577380396171ba096240bfb3101f8151e32b587a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 25 Sep 2015 17:58:44 +0800 Subject: [PATCH 54/97] [fktv] Correct thumbnail extraction and add the test --- youtube_dl/extractor/fktv.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index b081eb535..289cbb8c8 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -21,6 +21,7 @@ class FKTVIE(InfoExtractor): 'id': '1', 'ext': 'mp4', 'title': 'Folge 1 vom 10. April 2007', + 'thumbnail': 're:^https?://.*\.jpg$', }, } @@ -29,11 +30,14 @@ class FKTVIE(InfoExtractor): webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/play' % episode, episode) title = clean_html(self._html_search_regex('

([^<]+)

', webpage, 'title')) - matches = re.search(r'(?s)]+poster="([^"]+)"[^>]*>(.*)', webpage) + matches = re.search(r'(?s)]+(?:poster="([^"]+)")?[^>]*>(.*)', webpage) if matches is None: raise ExtractorError('Unable to extract the video') poster, sources = matches.groups() + if poster is None: + self.report_warning('unable to extract thumbnail') + urls = re.findall(r']+src="([^"]+)"', sources) formats = [{'url': url, 'format_id': determine_ext(url)} for url in urls] return { From 711762f0b70b65f2f28f2cd8023497f50fccd81a Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 25 Sep 2015 18:01:08 +0800 Subject: [PATCH 55/97] [fktv] Coding style --- youtube_dl/extractor/fktv.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index 289cbb8c8..0c14834f9 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -28,9 +28,12 @@ class FKTVIE(InfoExtractor): def _real_extract(self, url): episode = self._match_id(url) - webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/play' % episode, episode) - title = clean_html(self._html_search_regex('

([^<]+)

', webpage, 'title')) - matches = re.search(r'(?s)]+(?:poster="([^"]+)")?[^>]*>(.*)', webpage) + webpage = self._download_webpage( + 'http://fernsehkritik.tv/folge-%s/play' % episode, episode) + title = clean_html(self._html_search_regex( + '

([^<]+)

', webpage, 'title')) + matches = re.search( + r'(?s)]+(?:poster="([^"]+)")?[^>]*>(.*)', webpage) if matches is None: raise ExtractorError('Unable to extract the video') @@ -39,7 +42,10 @@ class FKTVIE(InfoExtractor): self.report_warning('unable to extract thumbnail') urls = re.findall(r']+src="([^"]+)"', sources) - formats = [{'url': url, 'format_id': determine_ext(url)} for url in urls] + formats = [{ + 'url': url, + 'format_id': determine_ext(url), + } for url in urls] return { 'id': episode, 'title': title, From 8de28761c41ca0451e3bbe4b9ee236d6651cca44 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 25 Sep 2015 18:17:48 +0800 Subject: [PATCH 56/97] [fktv] Fix a regex --- youtube_dl/extractor/fktv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index 0c14834f9..d9fc9952d 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -33,7 +33,8 @@ class FKTVIE(InfoExtractor): title = clean_html(self._html_search_regex( '

([^<]+)

', webpage, 'title')) matches = re.search( - r'(?s)]+(?:poster="([^"]+)")?[^>]*>(.*)', webpage) + r'(?s)])+(?:poster="([^"]+)")?[^>]*>(.*)', + webpage) if matches is None: raise ExtractorError('Unable to extract the video') From c44c7895b8774fb819b0b664bfcf64a7ebeea4e8 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 25 Sep 2015 11:28:26 +0100 Subject: [PATCH 57/97] [kuwo] fix title extraction and update test --- youtube_dl/extractor/kuwo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index fa233377d..51137a982 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -57,6 +57,7 @@ class KuwoIE(KuwoBaseIE): 'upload_date': '20080122', 'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c' }, + 'skip': 'this song has been offline because of copyright issues', }, { 'url': 'http://www.kuwo.cn/yinyue/6446136/', 'info_dict': { @@ -78,7 +79,7 @@ class KuwoIE(KuwoBaseIE): errnote='Unable to get song detail info') song_name = self._html_search_regex( - r']+title="([^"]+)">', webpage, 'song name') + r'(?s)class="[^"]*title[^"]*".*?]+title="([^"]+)"', webpage, 'song name') singer_name = self._html_search_regex( r']+class="s_img">\s*]+title="([^>]+)"', webpage, 'singer name', fatal=False) From 3d09aa4c82100649279d979f9910a8c84ba301ff Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 25 Sep 2015 11:40:32 +0100 Subject: [PATCH 58/97] [kuwo] extract title inside element with class title exactly --- youtube_dl/extractor/kuwo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 51137a982..a3c260838 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -79,7 +79,7 @@ class KuwoIE(KuwoBaseIE): errnote='Unable to get song detail info') song_name = self._html_search_regex( - r'(?s)class="[^"]*title[^"]*".*?]+title="([^"]+)"', webpage, 'song name') + r'(?s)class="(?:[^" ]+ +)*title(?: +[^" ]+)*".*?]+title="([^"]+)"', webpage, 'song name') singer_name = self._html_search_regex( r']+class="s_img">\s*]+title="([^>]+)"', webpage, 'singer name', fatal=False) From 4866b72eb2bf5747cde4654e75b1c1be0d456456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 25 Sep 2015 21:58:45 +0200 Subject: [PATCH 59/97] [fktv] Don't redefine 'url' in list comprehension Detected with flake8. --- youtube_dl/extractor/fktv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index d9fc9952d..fb9739b49 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -44,9 +44,9 @@ class FKTVIE(InfoExtractor): urls = re.findall(r']+src="([^"]+)"', sources) formats = [{ - 'url': url, + 'url': furl, 'format_id': determine_ext(url), - } for url in urls] + } for furl in urls] return { 'id': episode, 'title': title, From 4c917d0314f0442fd17d6f8ec8e583252167512c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 25 Sep 2015 22:02:48 +0200 Subject: [PATCH 60/97] [README.md] Document the 'duration' field in the output template (#6929) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 2ed751791..80152071d 100644 --- a/README.md +++ b/README.md @@ -281,6 +281,7 @@ The `-o` option allows users to indicate a template for the output file names. T - `playlist`: The sequence will be replaced by the name or the id of the playlist that contains the video. - `playlist_index`: The sequence will be replaced by the index of the video in the playlist padded with leading zeros according to the total length of the playlist. - `format_id`: The sequence will be replaced by the format code specified by `--format`. + - `duration`: The sequence will be replaced by the length of the video in seconds. The current default template is `%(title)s-%(id)s.%(ext)s`. From 08bea4adde5d5beb5f2a5cc875216b4205b90d50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 25 Sep 2015 22:34:02 +0200 Subject: [PATCH 61/97] Also run tests with python 3.5 --- .travis.yml | 1 + tox.ini | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index e78a2fa76..cc21fae8f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,7 @@ python: - "3.2" - "3.3" - "3.4" + - "3.5" sudo: false script: nosetests test --verbose notifications: diff --git a/tox.ini b/tox.ini index cd805fe8a..48504329f 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py26,py27,py33,py34 +envlist = py26,py27,py33,py34,py35 [testenv] deps = nose From 3706fb5dc8a38163689a29c7ed2b64f3b3d093e3 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 26 Sep 2015 07:51:11 +0100 Subject: [PATCH 62/97] [fktv] get format_id from video file ext --- youtube_dl/extractor/fktv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py index fb9739b49..40ea27895 100644 --- a/youtube_dl/extractor/fktv.py +++ b/youtube_dl/extractor/fktv.py @@ -45,7 +45,7 @@ class FKTVIE(InfoExtractor): urls = re.findall(r']+src="([^"]+)"', sources) formats = [{ 'url': furl, - 'format_id': determine_ext(url), + 'format_id': determine_ext(furl), } for furl in urls] return { 'id': episode, From 5db34f680f93917ef79ba59d501b9f82e6d44330 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 26 Sep 2015 10:31:32 +0100 Subject: [PATCH 63/97] [kuwo] check for the offline error page --- youtube_dl/extractor/kuwo.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index a3c260838..36cb265ab 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -77,9 +77,11 @@ class KuwoIE(KuwoBaseIE): webpage = self._download_webpage( url, song_id, note='Download song detail info', errnote='Unable to get song detail info') + if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: + raise ExtractorError('this song has been offline because of copyright issues') song_name = self._html_search_regex( - r'(?s)class="(?:[^" ]+ +)*title(?: +[^" ]+)*".*?]+title="([^"]+)"', webpage, 'song name') + r'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?]+title="([^"]+)"', webpage, 'song name') singer_name = self._html_search_regex( r']+class="s_img">\s*]+title="([^>]+)"', webpage, 'singer name', fatal=False) From 7193650641f3fd84872b231e48263752d10b37b7 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 26 Sep 2015 11:44:35 +0100 Subject: [PATCH 64/97] [kuwo] treat the offline error as an expected ExtractorError --- youtube_dl/extractor/kuwo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 36cb265ab..0c8ed5d07 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -78,7 +78,7 @@ class KuwoIE(KuwoBaseIE): url, song_id, note='Download song detail info', errnote='Unable to get song detail info') if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: - raise ExtractorError('this song has been offline because of copyright issues') + raise ExtractorError('this song has been offline because of copyright issues', expected=True) song_name = self._html_search_regex( r'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?]+title="([^"]+)"', webpage, 'song name') From fc42bc6ec9ab4858ec6bf1c8fe348ec47126375a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Sep 2015 19:45:43 +0600 Subject: [PATCH 65/97] [mtv] Look for sm4:video:embed (Closes #6936, closes #6970) --- youtube_dl/extractor/mtv.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index a597714e9..4020cc2a9 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -200,7 +200,13 @@ class MTVServicesInfoExtractor(InfoExtractor): if mgid is None or ':' not in mgid: mgid = self._search_regex( [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], - webpage, 'mgid') + webpage, 'mgid', default=None) + + if not mgid: + sm4_embed = self._html_search_meta( + 'sm4:video:embed', webpage, 'sm4 embed', default='') + mgid = self._search_regex( + r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid') videos_info = self._get_videos_info(mgid) return videos_info From fe1d858e35dde4c3068b9bec8a1bac7bd2cafadb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Sep 2015 19:46:42 +0600 Subject: [PATCH 66/97] [mtvservices:embedded] Add _extract_url --- youtube_dl/extractor/mtv.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 4020cc2a9..302c9bf35 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -228,6 +228,13 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): }, } + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+?src=(["\'])(?P(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage) + if mobj: + return mobj.group('url') + def _get_feed_url(self, uri): video_id = self._id_from_uri(uri) site_id = uri.replace(video_id, '') From 46fde8a1a2e72bca3c22b6de4db06490fb90b59a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 26 Sep 2015 19:47:20 +0600 Subject: [PATCH 67/97] [extractor/generic] Use _extract_url for mtvservices --- youtube_dl/extractor/generic.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8881a8a23..22ec78a02 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -50,6 +50,7 @@ from .dailymotion import DailymotionCloudIE from .onionstudios import OnionStudiosIE from .snagfilms import SnagFilmsEmbedIE from .screenwavemedia import ScreenwaveMediaIE +from .mtv import MTVServicesEmbeddedIE class GenericIE(InfoExtractor): @@ -1611,12 +1612,9 @@ class GenericIE(InfoExtractor): return self.url_result(url, ie='Vulture') # Look for embedded mtvservices player - mobj = re.search( - r'