[nuevo] Generalize nuevo extractor and add support for trollvids

Supports only the nuevo player for now (most common). [trollvids] convert duration to an int [trollvids] added a test [trollvids] made flake8 shut up Generalized the Nuevo extractor Affects: anitube, trollvids, trutube [nuevo] Complied with the code comments.
2024-12-21 21:22:50 +08:00 · 2015-12-02 06:00:47 +02:00 · 2015-12-02 06:00:47 +02:00 · d570746e45
commit d570746e45
parent 4fcd9d147d
5 changed files with 98 additions and 46 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -726,6 +726,7 @@ from .toutv import TouTvIE
 from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
 from .trollvids import TrollvidsIE
 from .trutube import TruTubeIE
 from .tube8 import Tube8IE
 from .tubitv import TubiTvIE
--- a/youtube_dl/extractor/anitube.py
+++ b/youtube_dl/extractor/anitube.py
@ -2,10 +2,10 @@ from __future__ import unicode_literals
 import re
-from .common import InfoExtractor
+from .nuevo import NuevoBaseIE
-class AnitubeIE(InfoExtractor):
+class AnitubeIE(NuevoBaseIE):
    IE_NAME = 'anitube.se'
    _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
@ -29,31 +29,5 @@ class AnitubeIE(InfoExtractor):
        key = self._search_regex(
            r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
-        config_xml = self._download_xml(
+        config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key
-            'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
+        return self._extract_nuevo(config_url, video_id)
        video_title = config_xml.find('title').text
        thumbnail = config_xml.find('image').text
        duration = float(config_xml.find('duration').text)
        formats = []
        video_url = config_xml.find('file')
        if video_url is not None:
            formats.append({
                'format_id': 'sd',
                'url': video_url.text,
            })
        video_url = config_xml.find('filehd')
        if video_url is not None:
            formats.append({
                'format_id': 'hd',
                'url': video_url.text,
            })
        return {
            'id': video_id,
            'title': video_title,
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats
        }
--- a/youtube_dl/extractor/nuevo.py
+++ b/youtube_dl/extractor/nuevo.py
@ -0,0 +1,37 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    float_or_none,
    xpath_text
 )
 class NuevoBaseIE(InfoExtractor):
    def _extract_nuevo(self, config_url, video_id):
        tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip())
        title = xpath_text(tree, './title')
        if title:
            title = title.strip()
        thumbnail = xpath_text(tree, './image')
        duration = float_or_none(xpath_text(tree, './duration'))
        formats = []
        for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')):
            video_url = tree.find(element_name)
            video_url is None or formats.append({
                'format_id': format_id,
                'url': video_url.text
            })
        return {
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats
        }
--- a/youtube_dl/extractor/trollvids.py
+++ b/youtube_dl/extractor/trollvids.py
@ -0,0 +1,49 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 from .nuevo import NuevoBaseIE
 from ..compat import (
    compat_urllib_parse_unquote
 )
 import re
 class TrollvidsIE(NuevoBaseIE):
    _VALID_URL = r'http://(?:www\.)?trollvids\.com/+video/+(?P<id>[0-9]+)/+(?P<title>[^?&]+)'
    IE_NAME = 'trollvids'
    def _real_extract(self, url):
        match = re.match(self._VALID_URL, url)
        video_id = match.group('id')
        raw_video_title = match.group('title')
        url = 'http://trollvids.com/video/%s/%s' % (video_id, raw_video_title)
        config_url = 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id
        info = self._extract_nuevo(config_url, video_id)
        info.update({
            'webpage_url': url,
            'age_limit': 18
        })
        if 'title' not in info:
            info['title'] = compat_urllib_parse_unquote(raw_video_title)
        return info
    _TESTS = [
        {
            'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff',
            'md5': '1d53866b2c514b23ed69e4352fdc9839',
            'info_dict': {
                'id': '2349002',
                'ext': 'mp4',
                'title': "【MMD R-18】ガールフレンド carry_me_off",
                'age_limit': 18,
                'duration': 216.78,
            },
        },
    ]
--- a/youtube_dl/extractor/trutube.py
+++ b/youtube_dl/extractor/trutube.py
@ -1,10 +1,9 @@
 from __future__ import unicode_literals
-from .common import InfoExtractor
+from .nuevo import NuevoBaseIE
 from ..utils import xpath_text
-class TruTubeIE(InfoExtractor):
+class TruTubeIE(NuevoBaseIE):
    _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
@ -22,19 +21,11 @@ class TruTubeIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
        config_url = 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id
-        config = self._download_xml(
+        info = self._extract_nuevo(config_url, video_id)
            'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id,
            video_id, transform_source=lambda s: s.strip())
-        # filehd is always 404
+        # filehd always 404s
-        video_url = xpath_text(config, './file', 'video URL', fatal=True)
+        info['formats'] = info['formats'][:1]
        title = xpath_text(config, './title', 'title').strip()
        thumbnail = xpath_text(config, './image', ' thumbnail')
-        return {
+        return info
            'id': video_id,
            'url': video_url,
            'title': title,
            'thumbnail': thumbnail,
        }