From f04a83da42526851bf21951be545159c8f49cc41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Thu, 29 Sep 2016 16:20:50 +0200 Subject: [PATCH] Fix tests and rely on `_match_id` for some extractors --- youtube_dl/extractor/anysex.py | 4 +--- youtube_dl/extractor/byutv.py | 12 +++++------- youtube_dl/extractor/clubic.py | 11 +++-------- youtube_dl/extractor/criterion.py | 11 ++++------- youtube_dl/extractor/dreisat.py | 5 +---- youtube_dl/extractor/dropbox.py | 3 +-- youtube_dl/extractor/freesound.py | 4 ++-- youtube_dl/extractor/ina.py | 8 ++------ youtube_dl/extractor/moviezine.py | 8 ++------ youtube_dl/extractor/reverbnation.py | 23 +++++++++++++---------- youtube_dl/extractor/slutload.py | 7 ++----- youtube_dl/extractor/techtalks.py | 7 +++---- youtube_dl/extractor/unistra.py | 5 +---- 13 files changed, 40 insertions(+), 68 deletions(-) diff --git a/youtube_dl/extractor/anysex.py b/youtube_dl/extractor/anysex.py index ad86d6e58..07b20d1e0 100644 --- a/youtube_dl/extractor/anysex.py +++ b/youtube_dl/extractor/anysex.py @@ -26,9 +26,7 @@ class AnySexIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL') diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 3aec601f8..4ee69b83a 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import json import re from .common import InfoExtractor @@ -8,7 +7,7 @@ from ..utils import ExtractorError class BYUtvIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P[^/?#]+)' + _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P[^/?#]+)' _TEST = { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'md5': '05850eb8c749e2ee05ad5a1c34668493', @@ -27,15 +26,14 @@ class BYUtvIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) episode_code = self._search_regex( r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information') - episode_json = re.sub( - r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code) - ep = json.loads(episode_json) + ep = self._parse_json(re.sub( + r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', + episode_code), video_id) if ep['providerType'] == 'Ooyala': return { diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py index 2fba93543..f7ee3a8f8 100644 --- a/youtube_dl/extractor/clubic.py +++ b/youtube_dl/extractor/clubic.py @@ -1,9 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor from ..utils import ( clean_html, @@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id player_page = self._download_webpage(player_url, video_id) - config_json = self._search_regex( + config = self._parse_json(self._search_regex( r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page, - 'configuration') - config = json.loads(config_json) + 'configuration'), video_id) video_info = config['videoInfo'] sources = config['sources'] diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index ad32673a8..7a6b23279 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -20,16 +18,15 @@ class CriterionIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) final_url = self._search_regex( - r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') + r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') title = self._og_search_title(webpage) description = self._html_search_meta('description', webpage) thumbnail = self._search_regex( - r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', + r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;', webpage, 'thumbnail url') return { diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 908c9e514..75cfc0b19 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .zdf import ZDFIE @@ -32,7 +30,6 @@ class DreiSatIE(ZDFIE): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id return self.extract_from_xml_url(video_id, details_url) diff --git a/youtube_dl/extractor/dropbox.py b/youtube_dl/extractor/dropbox.py index 14b6c00b0..463d0fd29 100644 --- a/youtube_dl/extractor/dropbox.py +++ b/youtube_dl/extractor/dropbox.py @@ -26,8 +26,7 @@ class DropboxIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) fn = compat_urllib_parse_unquote(url_basename(url)) title = os.path.splitext(fn)[0] video_url = re.sub(r'[?&]dl=0', '', url) diff --git a/youtube_dl/extractor/freesound.py b/youtube_dl/extractor/freesound.py index 5ff62af2a..49adeb04f 100644 --- a/youtube_dl/extractor/freesound.py +++ b/youtube_dl/extractor/freesound.py @@ -20,8 +20,8 @@ class FreesoundIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - music_id = mobj.group('id') + music_id = self._match_id(url) + webpage = self._download_webpage(url, music_id) title = self._html_search_regex( r'
.*?(.+?)', diff --git a/youtube_dl/extractor/ina.py b/youtube_dl/extractor/ina.py index 65712abc2..50ce1923a 100644 --- a/youtube_dl/extractor/ina.py +++ b/youtube_dl/extractor/ina.py @@ -1,8 +1,6 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -19,9 +17,7 @@ class InaIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - - video_id = mobj.group('id') + video_id = self._match_id(url) mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id info_doc = self._download_xml(mrss_url, video_id) diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py index aa091a62c..143e40c4d 100644 --- a/youtube_dl/extractor/moviezine.py +++ b/youtube_dl/extractor/moviezine.py @@ -1,14 +1,11 @@ -# -*- coding: utf-8 -*- +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor class MoviezineIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P[^?#]+)' - _TEST = { 'url': 'http://www.moviezine.se/video/205866', 'info_dict': { @@ -21,8 +18,7 @@ class MoviezineIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player') diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py index 3c6725aeb..b4481230b 100644 --- a/youtube_dl/extractor/reverbnation.py +++ b/youtube_dl/extractor/reverbnation.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import str_or_none @@ -10,20 +8,19 @@ class ReverbNationIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' _TESTS = [{ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', - 'md5': '3da12ebca28c67c111a7f8b262d3f7a7', + 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', 'info_dict': { 'id': '16965047', 'ext': 'mp3', 'title': 'MONA LISA', 'uploader': 'ALKILADOS', 'uploader_id': '216429', - 'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$' + 'thumbnail': 're:^https?://.*\.jpg', }, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - song_id = mobj.group('id') + song_id = self._match_id(url) api_res = self._download_json( 'https://api.reverbnation.com/song/%s' % song_id, @@ -31,14 +28,20 @@ class ReverbNationIE(InfoExtractor): note='Downloading information of song %s' % song_id ) + thumbnails = [{ + 'url': api_res.get('image'), + }, { + 'url': api_res.get('thumbnail'), + 'preference': -2, + }] + return { 'id': song_id, - 'title': api_res.get('name'), - 'url': api_res.get('url'), + 'title': api_res['name'], + 'url': api_res['url'], 'uploader': api_res.get('artist', {}).get('name'), 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), - 'thumbnail': self._proto_relative_url( - api_res.get('image', api_res.get('thumbnail'))), + 'thumbnails': thumbnails, 'ext': 'mp3', 'vcodec': 'none', } diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py index 7efb29f65..0464b0198 100644 --- a/youtube_dl/extractor/slutload.py +++ b/youtube_dl/extractor/slutload.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -9,7 +7,7 @@ class SlutloadIE(InfoExtractor): _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P[^/]+)/?$' _TEST = { 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', - 'md5': '0cf531ae8006b530bd9df947a6a0df77', + 'md5': '868309628ba00fd488cf516a113fd717', 'info_dict': { 'id': 'TD73btpBqSxc', 'ext': 'mp4', @@ -20,8 +18,7 @@ class SlutloadIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) diff --git a/youtube_dl/extractor/techtalks.py b/youtube_dl/extractor/techtalks.py index 16e945d8e..f38337803 100644 --- a/youtube_dl/extractor/techtalks.py +++ b/youtube_dl/extractor/techtalks.py @@ -4,7 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( - get_element_by_attribute, + get_element_by_class, clean_html, ) @@ -41,15 +41,14 @@ class TechTalksIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - talk_id = mobj.group('id') + talk_id = self._match_id(url) webpage = self._download_webpage(url, talk_id) rtmp_url = self._search_regex( r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url') play_path = self._search_regex( r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"', webpage, 'presenter play path') - title = clean_html(get_element_by_attribute('class', 'title', webpage)) + title = clean_html(get_element_by_class('title', webpage)) video_info = { 'id': talk_id, 'title': title, diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py index a724cdbef..2675d3eea 100644 --- a/youtube_dl/extractor/unistra.py +++ b/youtube_dl/extractor/unistra.py @@ -8,7 +8,6 @@ from ..utils import qualities class UnistraIE(InfoExtractor): _VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P\d+)' - _TESTS = [ { 'url': 'http://utv.unistra.fr/video.php?id_video=154', @@ -33,9 +32,7 @@ class UnistraIE(InfoExtractor): ] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage))