From f04a83da42526851bf21951be545159c8f49cc41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A9stin=20Reed?= <trox1972@users.noreply.github.com>
Date: Thu, 29 Sep 2016 16:20:50 +0200
Subject: [PATCH] Fix tests and rely on `_match_id` for some extractors

---
 youtube_dl/extractor/anysex.py       |  4 +---
 youtube_dl/extractor/byutv.py        | 12 +++++-------
 youtube_dl/extractor/clubic.py       | 11 +++--------
 youtube_dl/extractor/criterion.py    | 11 ++++-------
 youtube_dl/extractor/dreisat.py      |  5 +----
 youtube_dl/extractor/dropbox.py      |  3 +--
 youtube_dl/extractor/freesound.py    |  4 ++--
 youtube_dl/extractor/ina.py          |  8 ++------
 youtube_dl/extractor/moviezine.py    |  8 ++------
 youtube_dl/extractor/reverbnation.py | 23 +++++++++++++----------
 youtube_dl/extractor/slutload.py     |  7 ++-----
 youtube_dl/extractor/techtalks.py    |  7 +++----
 youtube_dl/extractor/unistra.py      |  5 +----
 13 files changed, 40 insertions(+), 68 deletions(-)
diff --git a/youtube_dl/extractor/anysex.py b/youtube_dl/extractor/anysex.py
index ad86d6e58..07b20d1e0 100644
--- a/youtube_dl/extractor/anysex.py
+++ b/youtube_dl/extractor/anysex.py
@@ -26,9 +26,7 @@ class AnySexIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py
index 3aec601f8..4ee69b83a 100644
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import json
 import re
 
 from .common import InfoExtractor
@@ -8,7 +7,7 @@ from ..utils import ExtractorError
 
 
 class BYUtvIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
+    _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<id>[^/?#]+)'
     _TEST = {
         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
         'md5': '05850eb8c749e2ee05ad5a1c34668493',
@@ -27,15 +26,14 @@ class BYUtvIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('video_id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
         episode_code = self._search_regex(
             r'(?s)episode:(.*?\}),\s*\n', webpage, 'episode information')
-        episode_json = re.sub(
-            r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"', episode_code)
-        ep = json.loads(episode_json)
+        ep = self._parse_json(re.sub(
+            r'(\n\s+)([a-zA-Z]+):\s+\'(.*?)\'', r'\1"\2": "\3"',
+            episode_code), video_id)
 
         if ep['providerType'] == 'Ooyala':
             return {
diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py
index 2fba93543..f7ee3a8f8 100644
--- a/youtube_dl/extractor/clubic.py
+++ b/youtube_dl/extractor/clubic.py
@@ -1,9 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import json
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     clean_html,
@@ -30,16 +27,14 @@ class ClubicIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id
         player_page = self._download_webpage(player_url, video_id)
 
-        config_json = self._search_regex(
+        config = self._parse_json(self._search_regex(
             r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page,
-            'configuration')
-        config = json.loads(config_json)
+            'configuration'), video_id)
 
         video_info = config['videoInfo']
         sources = config['sources']
diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py
index ad32673a8..7a6b23279 100644
--- a/youtube_dl/extractor/criterion.py
+++ b/youtube_dl/extractor/criterion.py
@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
@@ -20,16 +18,15 @@ class CriterionIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         final_url = self._search_regex(
-            r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
+            r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
         title = self._og_search_title(webpage)
         description = self._html_search_meta('description', webpage)
         thumbnail = self._search_regex(
-            r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
+            r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
             webpage, 'thumbnail url')
 
         return {
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py
index 908c9e514..75cfc0b19 100644
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .zdf import ZDFIE
 
 
@@ -32,7 +30,6 @@ class DreiSatIE(ZDFIE):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
         return self.extract_from_xml_url(video_id, details_url)
diff --git a/youtube_dl/extractor/dropbox.py b/youtube_dl/extractor/dropbox.py
index 14b6c00b0..463d0fd29 100644
--- a/youtube_dl/extractor/dropbox.py
+++ b/youtube_dl/extractor/dropbox.py
@@ -26,8 +26,7 @@ class DropboxIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         fn = compat_urllib_parse_unquote(url_basename(url))
         title = os.path.splitext(fn)[0]
         video_url = re.sub(r'[?&]dl=0', '', url)
diff --git a/youtube_dl/extractor/freesound.py b/youtube_dl/extractor/freesound.py
index 5ff62af2a..49adeb04f 100644
--- a/youtube_dl/extractor/freesound.py
+++ b/youtube_dl/extractor/freesound.py
@@ -20,8 +20,8 @@ class FreesoundIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        music_id = mobj.group('id')
+        music_id = self._match_id(url)
+
         webpage = self._download_webpage(url, music_id)
         title = self._html_search_regex(
             r'<div id="single_sample_header">.*?<a href="#">(.+?)</a>',
diff --git a/youtube_dl/extractor/ina.py b/youtube_dl/extractor/ina.py
index 65712abc2..50ce1923a 100644
--- a/youtube_dl/extractor/ina.py
+++ b/youtube_dl/extractor/ina.py
@@ -1,8 +1,6 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
@@ -19,9 +17,7 @@ class InaIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id
         info_doc = self._download_xml(mrss_url, video_id)
 
diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py
index aa091a62c..143e40c4d 100644
--- a/youtube_dl/extractor/moviezine.py
+++ b/youtube_dl/extractor/moviezine.py
@@ -1,14 +1,11 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
 class MoviezineIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)'
-
     _TEST = {
         'url': 'http://www.moviezine.se/video/205866',
         'info_dict': {
@@ -21,8 +18,7 @@ class MoviezineIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
         jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player')
diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py
index 3c6725aeb..b4481230b 100644
--- a/youtube_dl/extractor/reverbnation.py
+++ b/youtube_dl/extractor/reverbnation.py
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import str_or_none
 
@@ -10,20 +8,19 @@ class ReverbNationIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
     _TESTS = [{
         'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
-        'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
+        'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
         'info_dict': {
             'id': '16965047',
             'ext': 'mp3',
             'title': 'MONA LISA',
             'uploader': 'ALKILADOS',
             'uploader_id': '216429',
-            'thumbnail': 're:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$'
+            'thumbnail': 're:^https?://.*\.jpg',
         },
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        song_id = mobj.group('id')
+        song_id = self._match_id(url)
 
         api_res = self._download_json(
             'https://api.reverbnation.com/song/%s' % song_id,
@@ -31,14 +28,20 @@ class ReverbNationIE(InfoExtractor):
             note='Downloading information of song %s' % song_id
         )
 
+        thumbnails = [{
+            'url': api_res.get('image'),
+        }, {
+            'url': api_res.get('thumbnail'),
+            'preference': -2,
+        }]
+
         return {
             'id': song_id,
-            'title': api_res.get('name'),
-            'url': api_res.get('url'),
+            'title': api_res['name'],
+            'url': api_res['url'],
             'uploader': api_res.get('artist', {}).get('name'),
             'uploader_id': str_or_none(api_res.get('artist', {}).get('id')),
-            'thumbnail': self._proto_relative_url(
-                api_res.get('image', api_res.get('thumbnail'))),
+            'thumbnails': thumbnails,
             'ext': 'mp3',
             'vcodec': 'none',
         }
diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py
index 7efb29f65..0464b0198 100644
--- a/youtube_dl/extractor/slutload.py
+++ b/youtube_dl/extractor/slutload.py
@@ -1,7 +1,5 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
@@ -9,7 +7,7 @@ class SlutloadIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
     _TEST = {
         'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
-        'md5': '0cf531ae8006b530bd9df947a6a0df77',
+        'md5': '868309628ba00fd488cf516a113fd717',
         'info_dict': {
             'id': 'TD73btpBqSxc',
             'ext': 'mp4',
@@ -20,8 +18,7 @@ class SlutloadIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
 
diff --git a/youtube_dl/extractor/techtalks.py b/youtube_dl/extractor/techtalks.py
index 16e945d8e..f38337803 100644
--- a/youtube_dl/extractor/techtalks.py
+++ b/youtube_dl/extractor/techtalks.py
@@ -4,7 +4,7 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
-    get_element_by_attribute,
+    get_element_by_class,
     clean_html,
 )
 
@@ -41,15 +41,14 @@ class TechTalksIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        talk_id = mobj.group('id')
+        talk_id = self._match_id(url)
         webpage = self._download_webpage(url, talk_id)
         rtmp_url = self._search_regex(
             r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
         play_path = self._search_regex(
             r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
             webpage, 'presenter play path')
-        title = clean_html(get_element_by_attribute('class', 'title', webpage))
+        title = clean_html(get_element_by_class('title', webpage))
         video_info = {
             'id': talk_id,
             'title': title,
diff --git a/youtube_dl/extractor/unistra.py b/youtube_dl/extractor/unistra.py
index a724cdbef..2675d3eea 100644
--- a/youtube_dl/extractor/unistra.py
+++ b/youtube_dl/extractor/unistra.py
@@ -8,7 +8,6 @@ from ..utils import qualities
 
 class UnistraIE(InfoExtractor):
     _VALID_URL = r'https?://utv\.unistra\.fr/(?:index|video)\.php\?id_video\=(?P<id>\d+)'
-
     _TESTS = [
         {
             'url': 'http://utv.unistra.fr/video.php?id_video=154',
@@ -33,9 +32,7 @@ class UnistraIE(InfoExtractor):
     ]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-
+        video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         files = set(re.findall(r'file\s*:\s*"(/[^"]+)"', webpage))