From e0bd46eedc18113512da9e3f3e1a9238bf640b0d Mon Sep 17 00:00:00 2001 From: "Andrew \"Akari\" Alexeyew" Date: Wed, 2 Dec 2015 06:00:47 +0200 Subject: [PATCH 1/6] [trollvids] Added a basic trollvids.com extractor. Supports only the nuevo player for now (most common). --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/trollvids.py | 52 +++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/trollvids.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 947b83683..bbf656090 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -666,6 +666,7 @@ from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE +from .trollvids import TrollvidsIE from .trutube import TruTubeIE from .tube8 import Tube8IE from .tubitv import TubiTvIE diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py new file mode 100644 index 000000000..beb93862b --- /dev/null +++ b/youtube_dl/extractor/trollvids.py @@ -0,0 +1,52 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..compat import ( + compat_urllib_parse_unquote +) + +import re +import xml.etree.ElementTree + +class TrollvidsIE(InfoExtractor): + _VALID_URL = r"http://(?:www\.)?trollvids\.com/+video/+(?P[0-9]+)/+(?P[^?&]+)" + IE_NAME = 'trollvids' + + def _real_extract(self, url): + match = re.match(self._VALID_URL, url) + + video_id = match.group('id') + raw_video_title = match.group('title') + video_title = compat_urllib_parse_unquote(raw_video_title) + url = "http://trollvids.com/video/%s/%s" % (video_id, raw_video_title) + + info = { + "id": video_id, + "title": video_title, + "webpage_url": url, + "age_limit": 18 + } + + sdformats = [] + hdformats = [] + + tree = self._download_xml("http://trollvids.com/nuevo/player/config.php?v=%s" % video_id, video_id) + + for child in tree: + tag, val = child.tag, child.text + + if tag == "file": + sdformats.append({"url": val}) + elif tag == "filehd": + hdformats.append({"url": val}) + elif tag == "duration": + info["duration"] = float(val) + elif tag == "image": + info["thumbnail"] = val + elif tag == "title": + info["title"] = val + + info["formats"] = sdformats + hdformats + return info From 16afcbaa5feb05f652d533ae832d6f4276fd08c5 Mon Sep 17 00:00:00 2001 From: "Andrew \"Akari\" Alexeyew" <akari@dbc.1gb.ua> Date: Wed, 2 Dec 2015 06:06:48 +0200 Subject: [PATCH 2/6] [trollvids] convert duration to an int --- youtube_dl/extractor/trollvids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py index beb93862b..6781e7346 100644 --- a/youtube_dl/extractor/trollvids.py +++ b/youtube_dl/extractor/trollvids.py @@ -42,7 +42,7 @@ class TrollvidsIE(InfoExtractor): elif tag == "filehd": hdformats.append({"url": val}) elif tag == "duration": - info["duration"] = float(val) + info["duration"] = int(float(val)) elif tag == "image": info["thumbnail"] = val elif tag == "title": From 0116e2efad073a59981f5ce1815a49139315d95c Mon Sep 17 00:00:00 2001 From: "Andrew \"Akari\" Alexeyew" <akari@dbc.1gb.ua> Date: Wed, 2 Dec 2015 06:15:01 +0200 Subject: [PATCH 3/6] [trollvids] added a test --- youtube_dl/extractor/trollvids.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py index 6781e7346..0519e9898 100644 --- a/youtube_dl/extractor/trollvids.py +++ b/youtube_dl/extractor/trollvids.py @@ -50,3 +50,18 @@ class TrollvidsIE(InfoExtractor): info["formats"] = sdformats + hdformats return info + + _TESTS = [ + { + 'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff', + 'md5': '1d53866b2c514b23ed69e4352fdc9839', + 'info_dict': { + 'id': '2349002', + 'ext': 'mp4', + 'title': "【MMD R-18】ガールフレンド carry_me_off", + 'age_limit': 18, + 'duration': 216, + }, + }, + ] + From 6ba1b6cb67b1ec3d3ae6eea5f63dc7a80410005e Mon Sep 17 00:00:00 2001 From: "Andrew \"Akari\" Alexeyew" <akari@dbc.1gb.ua> Date: Wed, 2 Dec 2015 06:17:49 +0200 Subject: [PATCH 4/6] [trollvids] made flake8 shut up --- youtube_dl/extractor/trollvids.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py index 0519e9898..2b817639a 100644 --- a/youtube_dl/extractor/trollvids.py +++ b/youtube_dl/extractor/trollvids.py @@ -8,7 +8,7 @@ from ..compat import ( ) import re -import xml.etree.ElementTree + class TrollvidsIE(InfoExtractor): _VALID_URL = r"http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)" @@ -64,4 +64,3 @@ class TrollvidsIE(InfoExtractor): }, }, ] - From 0dc78520808cb65596c5f13d68fa933b822346c3 Mon Sep 17 00:00:00 2001 From: "Andrew \"Akari\" Alexeyew" <akari@dbc.1gb.ua> Date: Tue, 8 Dec 2015 23:18:48 +0200 Subject: [PATCH 5/6] Generalized the Nuevo extractor Affects: anitube, trollvids, trutube --- youtube_dl/extractor/anitube.py | 34 ++++--------------------------- youtube_dl/extractor/nuevo.py | 33 ++++++++++++++++++++++++++++++ youtube_dl/extractor/trollvids.py | 30 +++++---------------------- youtube_dl/extractor/trutube.py | 21 ++++--------------- 4 files changed, 46 insertions(+), 72 deletions(-) create mode 100644 youtube_dl/extractor/nuevo.py diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py index 23f942ae2..73690df82 100644 --- a/youtube_dl/extractor/anitube.py +++ b/youtube_dl/extractor/anitube.py @@ -2,10 +2,10 @@ from __future__ import unicode_literals import re -from .common import InfoExtractor +from .nuevo import NuevoBaseIE -class AnitubeIE(InfoExtractor): +class AnitubeIE(NuevoBaseIE): IE_NAME = 'anitube.se' _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)' @@ -29,31 +29,5 @@ class AnitubeIE(InfoExtractor): key = self._search_regex( r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key') - config_xml = self._download_xml( - 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key) - - video_title = config_xml.find('title').text - thumbnail = config_xml.find('image').text - duration = float(config_xml.find('duration').text) - - formats = [] - video_url = config_xml.find('file') - if video_url is not None: - formats.append({ - 'format_id': 'sd', - 'url': video_url.text, - }) - video_url = config_xml.find('filehd') - if video_url is not None: - formats.append({ - 'format_id': 'hd', - 'url': video_url.text, - }) - - return { - 'id': video_id, - 'title': video_title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats - } + config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key + return self._extract_nuevo(config_url, video_id) diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py new file mode 100644 index 000000000..d79bd601d --- /dev/null +++ b/youtube_dl/extractor/nuevo.py @@ -0,0 +1,33 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import float_or_none + +class NuevoBaseIE(InfoExtractor): + def _extract_nuevo(self, config_url, video_id, info=None, ignore_hd=False): + if info is None: + info = {} + + sdformats, hdformats = [], [] + tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip()) + + for child in tree: + tag, val = child.tag, child.text + + if tag == "file": + sdformats.append({"url": val}) + elif tag == "filehd" and not ignore_hd: + hdformats.append({"url": val}) + elif tag == "duration": + info["duration"] = float_or_none(val) + elif tag == "image": + info["thumbnail"] = val + elif tag == "title": + info["title"] = val.strip() + + info["id"] = video_id + info["formats"] = sdformats + hdformats + + return info diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py index 2b817639a..c4863cac0 100644 --- a/youtube_dl/extractor/trollvids.py +++ b/youtube_dl/extractor/trollvids.py @@ -1,7 +1,7 @@ # encoding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor +from .nuevo import NuevoBaseIE from ..compat import ( compat_urllib_parse_unquote @@ -10,7 +10,7 @@ from ..compat import ( import re -class TrollvidsIE(InfoExtractor): +class TrollvidsIE(NuevoBaseIE): _VALID_URL = r"http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)" IE_NAME = 'trollvids' @@ -21,35 +21,15 @@ class TrollvidsIE(InfoExtractor): raw_video_title = match.group('title') video_title = compat_urllib_parse_unquote(raw_video_title) url = "http://trollvids.com/video/%s/%s" % (video_id, raw_video_title) + config_url = "http://trollvids.com/nuevo/player/config.php?v=%s" % video_id info = { - "id": video_id, "title": video_title, "webpage_url": url, "age_limit": 18 } - sdformats = [] - hdformats = [] - - tree = self._download_xml("http://trollvids.com/nuevo/player/config.php?v=%s" % video_id, video_id) - - for child in tree: - tag, val = child.tag, child.text - - if tag == "file": - sdformats.append({"url": val}) - elif tag == "filehd": - hdformats.append({"url": val}) - elif tag == "duration": - info["duration"] = int(float(val)) - elif tag == "image": - info["thumbnail"] = val - elif tag == "title": - info["title"] = val - - info["formats"] = sdformats + hdformats - return info + return self._extract_nuevo(config_url, video_id, info) _TESTS = [ { @@ -60,7 +40,7 @@ class TrollvidsIE(InfoExtractor): 'ext': 'mp4', 'title': "【MMD R-18】ガールフレンド carry_me_off", 'age_limit': 18, - 'duration': 216, + 'duration': 216.78, }, }, ] diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py index e7b79243a..e2f5a4a2c 100644 --- a/youtube_dl/extractor/trutube.py +++ b/youtube_dl/extractor/trutube.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import xpath_text +from .nuevo import NuevoBaseIE -class TruTubeIE(InfoExtractor): +class TruTubeIE(NuevoBaseIE): _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', @@ -22,19 +21,7 @@ class TruTubeIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - - config = self._download_xml( - 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id, - video_id, transform_source=lambda s: s.strip()) + config_url = "https://trutube.tv/nuevo/player/config.php?v=%s" % video_id # filehd is always 404 - video_url = xpath_text(config, './file', 'video URL', fatal=True) - title = xpath_text(config, './title', 'title').strip() - thumbnail = xpath_text(config, './image', ' thumbnail') - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - } + return self._extract_nuevo(config_url, video_id, ignore_hd=True) From 310bcd73008032cb7c310f2f765335f84708ef00 Mon Sep 17 00:00:00 2001 From: "Andrew \"Akari\" Alexeyew" <akari@dbc.1gb.ua> Date: Sat, 12 Dec 2015 09:40:30 +0200 Subject: [PATCH 6/6] [nuevo] Complied with the code comments. --- youtube_dl/extractor/nuevo.py | 46 +++++++++++++++++-------------- youtube_dl/extractor/trollvids.py | 23 +++++++++------- youtube_dl/extractor/trutube.py | 10 +++++-- 3 files changed, 45 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py index d79bd601d..ccc697e4f 100644 --- a/youtube_dl/extractor/nuevo.py +++ b/youtube_dl/extractor/nuevo.py @@ -3,31 +3,35 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import float_or_none +from ..utils import ( + float_or_none, + xpath_text +) + class NuevoBaseIE(InfoExtractor): - def _extract_nuevo(self, config_url, video_id, info=None, ignore_hd=False): - if info is None: - info = {} - - sdformats, hdformats = [], [] + def _extract_nuevo(self, config_url, video_id): tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip()) - for child in tree: - tag, val = child.tag, child.text + title = xpath_text(tree, './title') + if title: + title = title.strip() - if tag == "file": - sdformats.append({"url": val}) - elif tag == "filehd" and not ignore_hd: - hdformats.append({"url": val}) - elif tag == "duration": - info["duration"] = float_or_none(val) - elif tag == "image": - info["thumbnail"] = val - elif tag == "title": - info["title"] = val.strip() + thumbnail = xpath_text(tree, './image') + duration = float_or_none(xpath_text(tree, './duration')) - info["id"] = video_id - info["formats"] = sdformats + hdformats + formats = [] + for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')): + video_url = tree.find(element_name) + video_url is None or formats.append({ + 'format_id': format_id, + 'url': video_url.text + }) - return info + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats + } diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py index c4863cac0..e4fe620f7 100644 --- a/youtube_dl/extractor/trollvids.py +++ b/youtube_dl/extractor/trollvids.py @@ -11,7 +11,7 @@ import re class TrollvidsIE(NuevoBaseIE): - _VALID_URL = r"http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)" + _VALID_URL = r'http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)' IE_NAME = 'trollvids' def _real_extract(self, url): @@ -19,17 +19,20 @@ class TrollvidsIE(NuevoBaseIE): video_id = match.group('id') raw_video_title = match.group('title') - video_title = compat_urllib_parse_unquote(raw_video_title) - url = "http://trollvids.com/video/%s/%s" % (video_id, raw_video_title) - config_url = "http://trollvids.com/nuevo/player/config.php?v=%s" % video_id + url = 'http://trollvids.com/video/%s/%s' % (video_id, raw_video_title) + config_url = 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id - info = { - "title": video_title, - "webpage_url": url, - "age_limit": 18 - } + info = self._extract_nuevo(config_url, video_id) - return self._extract_nuevo(config_url, video_id, info) + info.update({ + 'webpage_url': url, + 'age_limit': 18 + }) + + if 'title' not in info: + info['title'] = compat_urllib_parse_unquote(raw_video_title) + + return info _TESTS = [ { diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py index e2f5a4a2c..d7ec2ec26 100644 --- a/youtube_dl/extractor/trutube.py +++ b/youtube_dl/extractor/trutube.py @@ -21,7 +21,11 @@ class TruTubeIE(NuevoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - config_url = "https://trutube.tv/nuevo/player/config.php?v=%s" % video_id + config_url = 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id - # filehd is always 404 - return self._extract_nuevo(config_url, video_id, ignore_hd=True) + info = self._extract_nuevo(config_url, video_id) + + # filehd always 404s + info['formats'] = info['formats'][:1] + + return info