youtube-dl/youtube_dl/extractor/cloudy.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import (
    compat_parse_qs,
    compat_urllib_parse,
    compat_HTTPError,
)
from ..utils import (
    ExtractorError,
    HEADRequest,
    remove_end,
)


class CloudyIE(InfoExtractor):
    _IE_DESC = 'cloudy.ec and videoraj.ch'
    _VALID_URL = r'''(?x)
        https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.ch)/
        (?:v/|embed\.php\?id=)
        (?P<id>[A-Za-z0-9]+)
        '''
    _EMBED_URL = 'http://www.%s/embed.php?id=%s'
    _API_URL = 'http://www.%s/api/player.api.php?%s'
    _MAX_TRIES = 2
    _TESTS = [
        {
            'url': 'https://www.cloudy.ec/v/af511e2527aac',
            'md5': '5cb253ace826a42f35b4740539bedf07',
            'info_dict': {
                'id': 'af511e2527aac',
                'ext': 'flv',
                'title': 'Funny Cats and Animals Compilation june 2013',
            }
        },
        {
            'url': 'http://www.videoraj.ch/v/47f399fd8bb60',
            'md5': '7d0f8799d91efd4eda26587421c3c3b0',
            'info_dict': {
                'id': '47f399fd8bb60',
                'ext': 'flv',
                'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?',
            }
        }
    ]

    def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0):

        if try_num > self._MAX_TRIES - 1:
            raise ExtractorError('Unable to extract video URL', expected=True)

        form = {
            'file': video_id,
            'key': file_key,
        }

        if error_url:
            form.update({
                'numOfErrors': try_num,
                'errorCode': '404',
                'errorUrl': error_url,
            })

        data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form))
        player_data = self._download_webpage(
            data_url, video_id, 'Downloading player data')
        data = compat_parse_qs(player_data)

        try_num += 1

        if 'error' in data:
            raise ExtractorError(
                '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
                expected=True)

        title = data.get('title', [None])[0]
        if title:
            title = remove_end(title, '&asdasdas').strip()

        video_url = data.get('url', [None])[0]

        if video_url:
            try:
                self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
            except ExtractorError as e:
                if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
                    self.report_warning('Invalid video URL, requesting another', video_id)
                    return self._extract_video(video_host, video_id, file_key, video_url, try_num)

        return {
            'id': video_id,
            'url': video_url,
            'title': title,
        }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_host = mobj.group('host')
        video_id = mobj.group('id')

        url = self._EMBED_URL % (video_host, video_id)
        webpage = self._download_webpage(url, video_id)

        file_key = self._search_regex(
            r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')

        return self._extract_video(video_host, video_id, file_key)
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`import re`

			`from .common import InfoExtractor`
[util] Move compatibility functions out of util utils is large enough without these compatibility functions. Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py . Everything else (i.e. youtube-dl-specific helpers) goes into utils.py . 2014-11-02 18:23:40 +08:00			`from ..compat import (`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00			`compat_parse_qs,`
			`compat_urllib_parse,`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 20:04:16 +08:00			`compat_HTTPError,`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00			`)`
[util] Move compatibility functions out of util utils is large enough without these compatibility functions. Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py . Everything else (i.e. youtube-dl-specific helpers) goes into utils.py . 2014-11-02 18:23:40 +08:00			`from ..utils import (`
			`ExtractorError,`
			`HEADRequest,`
			`remove_end,`
			`)`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00

			`class CloudyIE(InfoExtractor):`
[cloudy] Add support for videoraj.ch 2014-09-14 06:17:21 +08:00			`_IE_DESC = 'cloudy.ec and videoraj.ch'`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00			`_VALID_URL = r'''(?x)`
[cloudy] Add support for videoraj.ch 2014-09-14 06:17:21 +08:00			`https?://(?:www\.)?(?P<host>cloudy\.ec\|videoraj\.ch)/`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00			`(?:v/\|embed\.php\?id=)`
			`(?P<id>[A-Za-z0-9]+)`
			`'''`
[cloudy] Add support for videoraj.ch 2014-09-14 06:17:21 +08:00			`_EMBED_URL = 'http://www.%s/embed.php?id=%s'`
			`_API_URL = 'http://www.%s/api/player.api.php?%s'`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 20:04:16 +08:00			`_MAX_TRIES = 2`
[cloudy] Add support for videoraj.ch 2014-09-14 06:17:21 +08:00			`_TESTS = [`
			`{`
			`'url': 'https://www.cloudy.ec/v/af511e2527aac',`
			`'md5': '5cb253ace826a42f35b4740539bedf07',`
			`'info_dict': {`
			`'id': 'af511e2527aac',`
			`'ext': 'flv',`
			`'title': 'Funny Cats and Animals Compilation june 2013',`
			`}`
			`},`
			`{`
			`'url': 'http://www.videoraj.ch/v/47f399fd8bb60',`
			`'md5': '7d0f8799d91efd4eda26587421c3c3b0',`
			`'info_dict': {`
			`'id': '47f399fd8bb60',`
			`'ext': 'flv',`
			`'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?',`
			`}`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00			`}`
[cloudy] Add support for videoraj.ch 2014-09-14 06:17:21 +08:00			`]`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 20:04:16 +08:00			`def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0):`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 20:04:16 +08:00			`if try_num > self._MAX_TRIES - 1:`
			`raise ExtractorError('Unable to extract video URL', expected=True)`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 20:04:16 +08:00			`form = {`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00			`'file': video_id,`
			`'key': file_key,`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 20:04:16 +08:00			`}`

			`if error_url:`
			`form.update({`
			`'numOfErrors': try_num,`
			`'errorCode': '404',`
			`'errorUrl': error_url,`
			`})`

			`data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form))`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00			`player_data = self._download_webpage(`
			`data_url, video_id, 'Downloading player data')`
			`data = compat_parse_qs(player_data)`

[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 20:04:16 +08:00			`try_num += 1`

[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00			`if 'error' in data:`
			`raise ExtractorError(`
			`'%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),`
			`expected=True)`

			`title = data.get('title', [None])[0]`
			`if title:`
[cloudy] Minor changes 2014-09-14 06:01:25 +08:00			`title = remove_end(title, '&asdasdas').strip()`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00
[cloudy] Minor changes 2014-09-14 06:01:25 +08:00			`video_url = data.get('url', [None])[0]`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 20:04:16 +08:00
[cloudy] Minor changes 2014-09-14 06:01:25 +08:00			`if video_url:`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 20:04:16 +08:00			`try:`
			`self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')`
			`except ExtractorError as e:`
			`if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:`
			`self.report_warning('Invalid video URL, requesting another', video_id)`
			`return self._extract_video(video_host, video_id, file_key, video_url, try_num)`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00
			`return {`
			`'id': video_id,`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 20:04:16 +08:00			`'url': video_url,`
[cloudy] Add new extractor. Closes #3743 2014-09-14 05:12:36 +08:00			`'title': title,`
			`}`
[cloudy] Retry extraction on 410 status code (#3743 #3744) 2014-09-14 20:04:16 +08:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`video_host = mobj.group('host')`
			`video_id = mobj.group('id')`

			`url = self._EMBED_URL % (video_host, video_id)`
			`webpage = self._download_webpage(url, video_id)`

			`file_key = self._search_regex(`
			`r'filekey\s=\s"([^"]+)"', webpage, 'file_key')`

			`return self._extract_video(video_host, video_id, file_key)`