youtube-dl/youtube_dl/extractor/wimp.py

import re
import base64

from .common import InfoExtractor


class WimpIE(InfoExtractor):
    _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'
    _TEST = {
        u'url': u'http://www.wimp.com/deerfence/',
        u'file': u'deerfence.flv',
        u'md5': u'8b215e2e0168c6081a1cf84b2846a2b5',
        u'info_dict': {
            u"title": u"Watch Till End: Herd of deer jump over a fence."
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        webpage = self._download_webpage(url, video_id)
        title = self._search_regex(r'<meta name="description" content="(.+?)" />',webpage, 'video title')
        thumbnail_url = self._search_regex(r'<meta property="og\:image" content="(.+?)" />', webpage,'video thumbnail')
        googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url')
        googleString = base64.b64decode(googleString).decode('ascii')
        final_url = self._search_regex('","(.*?)"', googleString,'final video url')
        ext = final_url.rpartition(u'.')[2]

        return [{
            'id':        video_id,
            'url':       final_url,
            'ext':       ext,
            'title':     title,
            'thumbnail': thumbnail_url,
        }]
added an IE for wimp.com 2013-06-26 18:25:53 +08:00			`import re`
			`import base64`
[wimp] minor readability improvements (#940) 2013-06-27 00:22:26 +08:00
added an IE for wimp.com 2013-06-26 18:25:53 +08:00			`from .common import InfoExtractor`


			`class WimpIE(InfoExtractor):`
			`_VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/'`
Move tests to the IE definitions 2013-06-28 02:46:46 +08:00			`_TEST = {`
			`u'url': u'http://www.wimp.com/deerfence/',`
			`u'file': u'deerfence.flv',`
			`u'md5': u'8b215e2e0168c6081a1cf84b2846a2b5',`
			`u'info_dict': {`
			`u"title": u"Watch Till End: Herd of deer jump over a fence."`
			`}`
			`}`
added an IE for wimp.com 2013-06-26 18:25:53 +08:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`video_id = mobj.group(1)`
			`webpage = self._download_webpage(url, video_id)`
[wimp] minor readability improvements (#940) 2013-06-27 00:22:26 +08:00			`title = self._search_regex(r'<meta name="description" content="(.+?)" />',webpage, 'video title')`
			`thumbnail_url = self._search_regex(r'<meta property="og\:image" content="(.+?)" />', webpage,'video thumbnail')`
			`googleString = self._search_regex("googleCode = '(.*?)'", webpage, 'file url')`
added .decode('ascii') 2013-06-26 22:41:55 +08:00			`googleString = base64.b64decode(googleString).decode('ascii')`
			`final_url = self._search_regex('","(.*?)"', googleString,'final video url')`
[wimp] minor readability improvements (#940) 2013-06-27 00:22:26 +08:00			`ext = final_url.rpartition(u'.')[2]`

added an IE for wimp.com 2013-06-26 18:25:53 +08:00			`return [{`
			`'id': video_id,`
			`'url': final_url,`
			`'ext': ext,`
			`'title': title,`
			`'thumbnail': thumbnail_url,`
			`}]`
changed wimp.py according to the changes suggested by jaime 2013-06-26 20:26:59 +08:00