youtube-dl/youtube_dl/extractor/hotnewhiphop.py

from __future__ import unicode_literals

import re
import base64

from .common import InfoExtractor
from ..utils import (
    compat_urllib_parse,
    compat_urllib_request,
    ExtractorError,
    HEADRequest,
)


class HotNewHipHopIE(InfoExtractor):
    _VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
    _TEST = {
        'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
        'file': '1435540.mp3',
        'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
        'info_dict': {
            'title': 'Freddie Gibbs - Lay It Down'
        }
    }

    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')

        webpage_src = self._download_webpage(url, video_id)

        video_url_base64 = self._search_regex(
            r'data-path="(.*?)"', webpage_src, u'video URL', fatal=False)

        if video_url_base64 is None:
            video_url = self._search_regex(
                r'"contentUrl" content="(.*?)"', webpage_src, u'video URL')
            return self.url_result(video_url, ie='Youtube')

        reqdata = compat_urllib_parse.urlencode([
            ('mediaType', 's'),
            ('mediaId', video_id),
        ])
        r = compat_urllib_request.Request(
            'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
        r.add_header('Content-Type', 'application/x-www-form-urlencoded')
        mkd = self._download_json(
            r, video_id, note='Requesting media key',
            errnote='Could not download media key')
        if 'mediaKey' not in mkd:
            raise ExtractorError('Did not get a media key')

        redirect_url = base64.b64decode(video_url_base64).decode('utf-8')
        redirect_req = HEADRequest(redirect_url)
        req = self._request_webpage(
            redirect_req, video_id,
            note='Resolving final URL', errnote='Could not resolve final URL')
        video_url = req.geturl()
        if video_url.endswith('.html'):
            raise ExtractorError('Redirect failed')

        video_title = self._og_search_title(webpage_src).strip()

        return {
            'id': video_id,
            'url': video_url,
            'title': video_title,
            'thumbnail': self._og_search_thumbnail(webpage_src),
        }
[hotnewhiphop] Retrieve media key 2014-01-22 08:55:50 +08:00			`from __future__ import unicode_literals`

Added HotNewHipHop IE 2013-06-27 09:38:48 +08:00			`import re`
			`import base64`

			`from .common import InfoExtractor`
[hotnewhiphop] Retrieve media key 2014-01-22 08:55:50 +08:00			`from ..utils import (`
			`compat_urllib_parse,`
			`compat_urllib_request,`
			`ExtractorError,`
			`HEADRequest,`
			`)`
Added HotNewHipHop IE 2013-06-27 09:38:48 +08:00

			`class HotNewHipHopIE(InfoExtractor):`
Properly escape ‘.’ in some _VALID_URL properties 2014-01-25 18:48:08 +08:00			`_VALID_URL = r'http://www\.hotnewhiphop\.com/.\.(?P<id>.)\.html'`
Move tests to the IE definitions 2013-06-28 02:46:46 +08:00			`_TEST = {`
[hotnewhiphop] Retrieve media key 2014-01-22 08:55:50 +08:00			`'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',`
			`'file': '1435540.mp3',`
			`'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',`
			`'info_dict': {`
			`'title': 'Freddie Gibbs - Lay It Down'`
Move tests to the IE definitions 2013-06-28 02:46:46 +08:00			`}`
			`}`
Added HotNewHipHop IE 2013-06-27 09:38:48 +08:00
			`def _real_extract(self, url):`
			`m = re.match(self._VALID_URL, url)`
			`video_id = m.group('id')`

			`webpage_src = self._download_webpage(url, video_id)`

[hotnewhiphop] Retrieve media key 2014-01-22 08:55:50 +08:00			`video_url_base64 = self._search_regex(`
			`r'data-path="(.*?)"', webpage_src, u'video URL', fatal=False)`
fix for detecting youtube embedded videos. 2013-06-27 23:39:32 +08:00
[hotnewhiphop] Retrieve media key 2014-01-22 08:55:50 +08:00			`if video_url_base64 is None:`
			`video_url = self._search_regex(`
			`r'"contentUrl" content="(.*?)"', webpage_src, u'video URL')`
fix for detecting youtube embedded videos. 2013-06-27 23:39:32 +08:00			`return self.url_result(video_url, ie='Youtube')`
Added HotNewHipHop IE 2013-06-27 09:38:48 +08:00
[hotnewhiphop] Retrieve media key 2014-01-22 08:55:50 +08:00			`reqdata = compat_urllib_parse.urlencode([`
			`('mediaType', 's'),`
			`('mediaId', video_id),`
			`])`
			`r = compat_urllib_request.Request(`
			`'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)`
			`r.add_header('Content-Type', 'application/x-www-form-urlencoded')`
			`mkd = self._download_json(`
			`r, video_id, note='Requesting media key',`
			`errnote='Could not download media key')`
			`if 'mediaKey' not in mkd:`
			`raise ExtractorError('Did not get a media key')`

			`redirect_url = base64.b64decode(video_url_base64).decode('utf-8')`
			`redirect_req = HEADRequest(redirect_url)`
			`req = self._request_webpage(`
			`redirect_req, video_id,`
			`note='Resolving final URL', errnote='Could not resolve final URL')`
			`video_url = req.geturl()`
			`if video_url.endswith('.html'):`
			`raise ExtractorError('Redirect failed')`
Added HotNewHipHop IE 2013-06-27 09:38:48 +08:00
[hotnewhiphop] Retrieve media key 2014-01-22 08:55:50 +08:00			`video_title = self._og_search_title(webpage_src).strip()`
Added HotNewHipHop IE 2013-06-27 09:38:48 +08:00
[hotnewhiphop] Retrieve media key 2014-01-22 08:55:50 +08:00			`return {`
			`'id': video_id,`
			`'url': video_url,`
			`'title': video_title,`
			`'thumbnail': self._og_search_thumbnail(webpage_src),`
			`}`