[empflix] Add new extractor

2024-11-22 06:22:51 +08:00 · 2014-05-12 04:10:29 -07:00 · 2014-05-12 04:10:29 -07:00 · 877bea9ce1
commit 877bea9ce1
parent e399853d0c
2 changed files with 47 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -72,6 +72,7 @@ from .ehow import EHowIE
 from .eighttracks import EightTracksIE
 from .eitb import EitbIE
 from .elpais import ElPaisIE
 from .empflix import EmpflixIE
 from .engadget import EngadgetIE
 from .escapist import EscapistIE
 from .everyonesmixtape import EveryonesMixtapeIE
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@ -0,0 +1,46 @@
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
 )
 class EmpflixIE(InfoExtractor):
    _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<videoid>[^\.]+)\.html'
    _TEST = {
        u'url': u'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
        u'file': u'Amateur-Finger-Fuck-33051.flv',
        u'md5': u'5e5cc160f38ca9857f318eb97146e13e',
        u'info_dict': {
            u"title": u"Amateur Finger Fuck",
            u"age_limit": 18,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
        # Get webpage content
        webpage = self._download_webpage(url, video_id)
        age_limit = self._rta_search(webpage)
        # Get the video title
        video_title = self._html_search_regex(r'name="title" value="(?P<title>[^"]*)"',
            webpage, u'title').strip()
        cfg_url = self._html_search_regex(r'flashvars\.config = escape\("([^"]+)"',
            webpage, u'flashvars.config').strip()
        cfg_xml = self._download_xml(cfg_url, video_id, note=u'Downloading metadata')
        video_url = cfg_xml.find('videoLink').text
        info = {'id': video_id,
                'url': video_url,
                'title': video_title,
                'ext': 'flv',
                'age_limit': age_limit}
        return [info]