Playvid extractor

2024-12-31 02:23:06 +08:00 · 2014-03-10 14:41:19 +01:00 · 2014-03-10 14:41:19 +01:00 · 4ea3137e41
commit 4ea3137e41
parent ae7ed92057
2 changed files with 86 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -176,6 +176,7 @@ from .ooyala import OoyalaIE
 from .orf import ORFIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
 from .playvid import PlayvidIE
 from .podomatic import PodomaticIE
 from .pornhd import PornHdIE
 from .pornhub import PornHubIE
--- a/youtube_dl/extractor/playvid.py
+++ b/youtube_dl/extractor/playvid.py
@ -0,0 +1,85 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    compat_urllib_parse,
    determine_ext,
 )
 class PlayvidIE(InfoExtractor):
    _VALID_URL = r'^(?:https?://)?www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(#|$)'
    _TEST = {
        'url': 'http://www.playvid.com/watch/agbDDi7WZTV',
        'file': 'agbDDi7WZTV.mp4',
        'md5': '44930f8afa616efdf9482daf4fe53e1e',
        'info_dict': {
            'title': 'Michelle Lewin in Miami Beach',
            'duration': 240,
            'age_limit': 18,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        self.report_extraction(video_id)
        video_title = None
        duration = None
        video_thumbnail = None
        formats = []
        # most of the information is stored in the flashvars
        flashvars_match = re.search(r'flashvars="(.+?)"',webpage)
        if flashvars_match:
            infos = compat_urllib_parse.unquote(flashvars_match.group(1)).split(r'&amp;')
            for info in infos:
                videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$',info)
                if videovars_match:
                    key = videovars_match.group(1)
                    val = videovars_match.group(2)
                    if key == 'title':
                        video_title = val.replace('+',' ')
                    if key == 'duration':
                        try:
                            duration = val
                        except ValueError:
                            duration = None
                    if key == 'big_thumb':
                        video_thumbnail = val
                    videourl_match = re.match(r'^video_urls\]\[(?P<resolution>\d+)p',key)
                    if videourl_match:
                        resolution = int(videourl_match.group('resolution'))
                        formats.append({
                            'resolution': resolution,            # 360, 480, ...
                            'ext': determine_ext(val),
                            'url': val
                        })
        # fatal error, if no download url is found
        if len(formats) == 0:
            raise ExtractorError,'no video url found'
        # Extract title - should be in the flashvars; if not, look elsewhere
        if video_title is None:
            video_title = self._html_search_regex(
                r'<title>(.*?)</title', webpage, 'title')
        return {
            'id': video_id,
            'formats': formats,
            'title': video_title,
            'thumbnail': video_thumbnail,
            'duration': duration,
            'description': None,
            'age_limit': 18
        }