add JukeboxIE

2024-11-27 14:52:59 +08:00 · 2013-06-25 13:28:59 +02:00 · 2013-06-25 13:28:59 +02:00 · 28ef06f7c2
commit 28ef06f7c2
parent f8bf74575a
2 changed files with 61 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -22,6 +22,7 @@ from .hypem import HypemIE
 from .ina import InaIE
 from .infoq import InfoQIE
 from .justintv import JustinTVIE
 from .jukebox import JukeboxIE
 from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .metacafe import MetacafeIE
@ -125,6 +126,7 @@ def gen_extractors():
        GametrailersIE(),
        StatigramIE(),
        BreakIE(),
        JukeboxIE(),
        GenericIE()
    ]
--- a/youtube_dl/extractor/jukebox.py
+++ b/youtube_dl/extractor/jukebox.py
@ -0,0 +1,59 @@
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    unescapeHTML,
 )
 class JukeboxIE(InfoExtractor):
    _VALID_URL = r'^http://www\.jukebox\.es\/.+[,](?P<video_id>[a-z0-9]+).html'
    _IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
    _VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
    _TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
    _NOT_AVAILABLE = r'<span>Este video no está disponible por el momento [!]</span>'
    _IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"'
    IE_NAME = u'jukebox'
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_id = mobj.group('video_id')
        html = self._download_webpage(url, video_id)
        mobj = re.search(self._IFRAME, html)
        if mobj is None:
            raise ExtractorError(u'Cannot extract iframe url')
        iframe_url = unescapeHTML(mobj.group('iframe'))
        iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
        mobj = re.search(self._NOT_AVAILABLE, iframe_html)
        if mobj is not None:
            raise ExtractorError(u'Video is not available(in your country?)!')
        self.report_extraction(video_id)
        mobj = re.search(self._VIDEO_URL, iframe_html)
        if mobj is None:
            mobj = re.search(self._IS_YOUTUBE, iframe_html)
            if mobj is None:
                raise ExtractorError(u'Cannot extract video url')
            youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/')
            #TODO: call YoutubeIE !?
            raise ExtractorError(u'found YT video. Please run youtube-dl again and use the following link: '+youtube_url+' ')
        video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/')
        video_ext = unescapeHTML(mobj.group('video_ext'))
        mobj = re.search(self._TITLE, html)
        if mobj is None:
            raise ExtractorError(u'Cannot extract title')
        title = unescapeHTML(mobj.group('title'))
        artist = unescapeHTML(mobj.group('artist'))
        return [{'id': video_id,
                 'url': video_url,
                 'title': artist + '-' + title,
                 'ext': video_ext
                 }]