youtube-dl/youtube_dl/extractor/beeg.py

from __future__ import unicode_literals

from .common import InfoExtractor
from ..compat import (
    compat_chr,
    compat_ord,
    compat_urllib_parse_unquote,
)
from ..utils import (
    int_or_none,
    parse_iso8601,
)


class BeegIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
    _TEST = {
        'url': 'http://beeg.com/5416503',
        'md5': '46c384def73b33dbc581262e5ee67cef',
        'info_dict': {
            'id': '5416503',
            'ext': 'mp4',
            'title': 'Sultry Striptease',
            'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2',
            'timestamp': 1391813355,
            'upload_date': '20140207',
            'duration': 383,
            'tags': list,
            'age_limit': 18,
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)

        video = self._download_json(
            'https://api.beeg.com/api/v5/video/%s' % video_id, video_id)

        def split(o, e):
            def cut(s, x):
                n.append(s[:x])
                return s[x:]
            n = []
            r = len(o) % e
            if r > 0:
                o = cut(o, r)
            while len(o) > e:
                o = cut(o, e)
            n.append(o)
            return n

        def decrypt_key(key):
            # Reverse engineered from http://static.beeg.com/cpl/1105.js
            a = '5ShMcIQlssOd7zChAIOlmeTZDaUxULbJRnywYaiB'
            e = compat_urllib_parse_unquote(key)
            o = ''.join([
                compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
                for n in range(len(e))])
            return ''.join(split(o, 3)[::-1])

        def decrypt_url(encrypted_url):
            encrypted_url = self._proto_relative_url(
                encrypted_url.replace('{DATA_MARKERS}', ''), 'https:')
            key = self._search_regex(
                r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)
            if not key:
                return encrypted_url
            return encrypted_url.replace(key, decrypt_key(key))

        formats = []
        for format_id, video_url in video.items():
            if not video_url:
                continue
            height = self._search_regex(
                r'^(\d+)[pP]$', format_id, 'height', default=None)
            if not height:
                continue
            formats.append({
                'url': decrypt_url(video_url),
                'format_id': format_id,
                'height': int(height),
            })
        self._sort_formats(formats)

        title = video['title']
        video_id = video.get('id') or video_id
        display_id = video.get('code')
        description = video.get('desc')

        timestamp = parse_iso8601(video.get('date'), ' ')
        duration = int_or_none(video.get('duration'))

        tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None

        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'description': description,
            'timestamp': timestamp,
            'duration': duration,
            'tags': tags,
            'formats': formats,
            'age_limit': 18,
        }
Add support for beeg.com 2014-08-31 17:57:10 +08:00			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`
[beeg] Decrypt URL (Closes #7736) 2015-12-04 02:59:32 +08:00			`from ..compat import (`
			`compat_chr,`
			`compat_ord,`
			`compat_urllib_parse_unquote,`
			`)`
[beeg] Fix extraction (Closes #7155) 2015-10-13 23:04:39 +08:00			`from ..utils import (`
			`int_or_none,`
			`parse_iso8601,`
			`)`
Add support for beeg.com 2014-08-31 17:57:10 +08:00

			`class BeegIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'`
			`_TEST = {`
			`'url': 'http://beeg.com/5416503',`
[beeg] Fix extraction (Closes #7155) 2015-10-13 23:04:39 +08:00			`'md5': '46c384def73b33dbc581262e5ee67cef',`
Add support for beeg.com 2014-08-31 17:57:10 +08:00			`'info_dict': {`
			`'id': '5416503',`
			`'ext': 'mp4',`
			`'title': 'Sultry Striptease',`
[beeg] Fix extraction (Closes #7155) 2015-10-13 23:04:39 +08:00			`'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2',`
			`'timestamp': 1391813355,`
			`'upload_date': '20140207',`
			`'duration': 383,`
			`'tags': list,`
[beeg] Add age_limit 2014-09-02 05:13:04 +08:00			`'age_limit': 18,`
Add support for beeg.com 2014-08-31 17:57:10 +08:00			`}`
			`}`

			`def _real_extract(self, url):`
[beeg] Fix extraction (Closes #7155) 2015-10-13 23:04:39 +08:00			`video_id = self._match_id(url)`
[beeg] Extract all formats 2014-09-02 21:54:00 +08:00
[beeg] Fix extraction (Closes #7155) 2015-10-13 23:04:39 +08:00			`video = self._download_json(`
[beeg] Update API URL 2016-01-14 21:57:56 +08:00			`'https://api.beeg.com/api/v5/video/%s' % video_id, video_id)`
[beeg] API v5 (Closes #7846) 2015-12-12 04:52:20 +08:00
			`def split(o, e):`
			`def cut(s, x):`
			`n.append(s[:x])`
			`return s[x:]`
			`n = []`
			`r = len(o) % e`
			`if r > 0:`
			`o = cut(o, r)`
			`while len(o) > e:`
			`o = cut(o, e)`
			`n.append(o)`
			`return n`
[beeg] Extract all formats 2014-09-02 21:54:00 +08:00
[beeg] Decrypt URL (Closes #7736) 2015-12-04 02:59:32 +08:00			`def decrypt_key(key):`
[beeg] API v5 (Closes #7846) 2015-12-12 04:52:20 +08:00			`# Reverse engineered from http://static.beeg.com/cpl/1105.js`
			`a = '5ShMcIQlssOd7zChAIOlmeTZDaUxULbJRnywYaiB'`
[beeg] Decrypt URL (Closes #7736) 2015-12-04 02:59:32 +08:00			`e = compat_urllib_parse_unquote(key)`
[beeg] API v5 (Closes #7846) 2015-12-12 04:52:20 +08:00			`o = ''.join([`
			`compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)`
[beeg] Decrypt URL (Closes #7736) 2015-12-04 02:59:32 +08:00			`for n in range(len(e))])`
[beeg] API v5 (Closes #7846) 2015-12-12 04:52:20 +08:00			`return ''.join(split(o, 3)[::-1])`
[beeg] Decrypt URL (Closes #7736) 2015-12-04 02:59:32 +08:00
			`def decrypt_url(encrypted_url):`
			`encrypted_url = self._proto_relative_url(`
[beeg] Fix extraction (Closes #8225) 2016-01-14 21:57:20 +08:00			`encrypted_url.replace('{DATA_MARKERS}', ''), 'https:')`
[beeg] Decrypt URL (Closes #7736) 2015-12-04 02:59:32 +08:00			`key = self._search_regex(`
			`r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)`
			`if not key:`
			`return encrypted_url`
			`return encrypted_url.replace(key, decrypt_key(key))`

[beeg] Fix extraction (Closes #7155) 2015-10-13 23:04:39 +08:00			`formats = []`
			`for format_id, video_url in video.items():`
[beeg] Skip empty URLs (Closes #7392) 2015-11-07 08:23:00 +08:00			`if not video_url:`
			`continue`
[beeg] Fix extraction (Closes #7155) 2015-10-13 23:04:39 +08:00			`height = self._search_regex(`
			`r'^(\d+)[pP]$', format_id, 'height', default=None)`
			`if not height:`
			`continue`
			`formats.append({`
[beeg] Decrypt URL (Closes #7736) 2015-12-04 02:59:32 +08:00			`'url': decrypt_url(video_url),`
[beeg] Fix extraction (Closes #7155) 2015-10-13 23:04:39 +08:00			`'format_id': format_id,`
			`'height': int(height),`
			`})`
[beeg] Extract all formats 2014-09-02 21:54:00 +08:00			`self._sort_formats(formats)`
Add support for beeg.com 2014-08-31 17:57:10 +08:00
[beeg] Fix extraction (Closes #7155) 2015-10-13 23:04:39 +08:00			`title = video['title']`
			`video_id = video.get('id') or video_id`
			`display_id = video.get('code')`
			`description = video.get('desc')`
PEP8 applied 2014-11-24 03:41:03 +08:00
[beeg] Fix extraction (Closes #7155) 2015-10-13 23:04:39 +08:00			`timestamp = parse_iso8601(video.get('date'), ' ')`
			`duration = int_or_none(video.get('duration'))`
Add support for beeg.com 2014-08-31 17:57:10 +08:00
[beeg] Fix extraction (Closes #7155) 2015-10-13 23:04:39 +08:00			`tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None`
Add support for beeg.com 2014-08-31 17:57:10 +08:00
			`return {`
			`'id': video_id,`
[beeg] Fix extraction (Closes #7155) 2015-10-13 23:04:39 +08:00			`'display_id': display_id,`
Add support for beeg.com 2014-08-31 17:57:10 +08:00			`'title': title,`
			`'description': description,`
[beeg] Fix extraction (Closes #7155) 2015-10-13 23:04:39 +08:00			`'timestamp': timestamp,`
			`'duration': duration,`
			`'tags': tags,`
[beeg] Extract all formats 2014-09-02 21:54:00 +08:00			`'formats': formats,`
[beeg] Add age_limit 2014-09-02 05:13:04 +08:00			`'age_limit': 18,`
Add support for beeg.com 2014-08-31 17:57:10 +08:00			`}`