youtube-dl/youtube_dl/extractor/pyvideo.py

from __future__ import unicode_literals

import re
import os

from .common import InfoExtractor


class PyvideoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'

    _TESTS = [
        {
            'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',
            'md5': '520915673e53a5c5d487c36e0c4d85b5',
            'info_dict': {
                'id': '24_4WWkSmNo',
                'ext': 'webm',
                'title': 'Become a logging expert in 30 minutes',
                'description': 'md5:9665350d466c67fb5b1598de379021f7',
                'upload_date': '20130320',
                'uploader': 'Next Day Video',
                'uploader_id': 'NextDayVideo',
            },
            'add_ie': ['Youtube'],
        },
        {
            'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',
            'md5': '5fe1c7e0a8aa5570330784c847ff6d12',
            'info_dict': {
                'id': '2542',
                'ext': 'm4v',
                'title': 'Gloriajw-SpotifyWithErikBernhardsson182',
            },
        },
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)

        m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)
        if m_youtube is not None:
            return self.url_result(m_youtube.group(1), 'Youtube')

        title = self._html_search_regex(
            r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>',
            webpage, 'title', flags=re.DOTALL)
        video_url = self._search_regex(
            [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
            webpage, 'video url', flags=re.DOTALL)

        return {
            'id': video_id,
            'title': os.path.splitext(title)[0],
            'url': video_url,
        }
[pyvideo] Modernize 2014-03-31 20:31:48 +08:00			`from __future__ import unicode_literals`

Adding pyvideo support 2013-12-07 13:11:01 +08:00			`import re`
[pyvideo] add support for videos that don't come from Youtube 2013-12-07 18:19:59 +08:00			`import os`
Adding pyvideo support 2013-12-07 13:11:01 +08:00
			`from .common import InfoExtractor`


			`class PyvideoIE(InfoExtractor):`
Add support for https for all extractors as preventive and future-proof measure 2016-03-21 23:36:32 +08:00			`_VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)'`
[pyvideo] Modernize 2014-03-31 20:31:48 +08:00
			`_TESTS = [`
			`{`
			`'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes',`
update tests related to the change in youtube http format sorting the change was done in 82156fdbf0913c75181484dcc813565713bf78e9 2016-03-06 04:52:24 +08:00			`'md5': '520915673e53a5c5d487c36e0c4d85b5',`
[pyvideo] Modernize 2014-03-31 20:31:48 +08:00			`'info_dict': {`
			`'id': '24_4WWkSmNo',`
update tests related to the change in youtube http format sorting the change was done in 82156fdbf0913c75181484dcc813565713bf78e9 2016-03-06 04:52:24 +08:00			`'ext': 'webm',`
[pyvideo] Modernize 2014-03-31 20:31:48 +08:00			`'title': 'Become a logging expert in 30 minutes',`
			`'description': 'md5:9665350d466c67fb5b1598de379021f7',`
			`'upload_date': '20130320',`
update tests related to the change in youtube http format sorting the change was done in 82156fdbf0913c75181484dcc813565713bf78e9 2016-03-06 04:52:24 +08:00			`'uploader': 'Next Day Video',`
[pyvideo] Modernize 2014-03-31 20:31:48 +08:00			`'uploader_id': 'NextDayVideo',`
			`},`
			`'add_ie': ['Youtube'],`
[pyvideo] Cleanup and fix test 2013-12-07 17:59:18 +08:00			`},`
[pyvideo] Modernize 2014-03-31 20:31:48 +08:00			`{`
			`'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v',`
			`'md5': '5fe1c7e0a8aa5570330784c847ff6d12',`
			`'info_dict': {`
			`'id': '2542',`
			`'ext': 'm4v',`
			`'title': 'Gloriajw-SpotifyWithErikBernhardsson182',`
			`},`
[pyvideo] add support for videos that don't come from Youtube 2013-12-07 18:19:59 +08:00			`},`
			`]`
Adding pyvideo support 2013-12-07 13:11:01 +08:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
[pyvideo] add support for videos that don't come from Youtube 2013-12-07 18:19:59 +08:00			`video_id = mobj.group('id')`
[pyvideo] Modernize 2014-03-31 20:31:48 +08:00
Adding pyvideo support 2013-12-07 13:11:01 +08:00			`webpage = self._download_webpage(url, video_id)`

[pyvideo] Modernize 2014-03-31 20:31:48 +08:00			`m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage)`
Adding pyvideo support 2013-12-07 13:11:01 +08:00			`if m_youtube is not None:`
			`return self.url_result(m_youtube.group(1), 'Youtube')`
[pyvideo] add support for videos that don't come from Youtube 2013-12-07 18:19:59 +08:00
[pyvideo] Modernize 2014-03-31 20:31:48 +08:00			`title = self._html_search_regex(`
[pyvideo] Fix title extraction 2014-07-14 01:38:10 +08:00			`r'<div class="section">\s<h3(?:\s+class="[^"]"[^>]*)?>([^>]+?)</h3>',`
[pyvideo] Fix title 2014-04-11 08:20:50 +08:00			`webpage, 'title', flags=re.DOTALL)`
[pyvideo] Modernize 2014-03-31 20:31:48 +08:00			`video_url = self._search_regex(`
			`[r'<source src="(.?)"', r'<dt>Download</dt>.?<a href="(.+?)"'],`
			`webpage, 'video url', flags=re.DOTALL)`

[pyvideo] add support for videos that don't come from Youtube 2013-12-07 18:19:59 +08:00			`return {`
			`'id': video_id,`
			`'title': os.path.splitext(title)[0],`
			`'url': video_url,`
			`}`