youtube-dl/youtube_dl/extractor/myspass.py

from __future__ import unicode_literals
import os.path

from .common import InfoExtractor
from ..compat import (
    compat_urllib_parse_urlparse,
)
from ..utils import (
    ExtractorError,
)


class MySpassIE(InfoExtractor):
    _VALID_URL = r'http://www\.myspass\.de/.*'
    _TEST = {
        'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
        'md5': '0b49f4844a068f8b33f4b7c88405862b',
        'info_dict': {
            'id': '11741',
            'ext': 'mp4',
            "description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",
            "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",
        },
    }

    def _real_extract(self, url):
        META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'

        # video id is the last path element of the URL
        # usually there is a trailing slash, so also try the second but last
        url_path = compat_urllib_parse_urlparse(url).path
        url_parent_path, video_id = os.path.split(url_path)
        if not video_id:
            _, video_id = os.path.split(url_parent_path)

        # get metadata
        metadata_url = META_DATA_URL_TEMPLATE % video_id
        metadata = self._download_xml(
            metadata_url, video_id, transform_source=lambda s: s.strip())

        # extract values from metadata
        url_flv_el = metadata.find('url_flv')
        if url_flv_el is None:
            raise ExtractorError('Unable to extract download url')
        video_url = url_flv_el.text
        title_el = metadata.find('title')
        if title_el is None:
            raise ExtractorError('Unable to extract title')
        title = title_el.text
        format_id_el = metadata.find('format_id')
        if format_id_el is None:
            format = 'mp4'
        else:
            format = format_id_el.text
        description_el = metadata.find('description')
        if description_el is not None:
            description = description_el.text
        else:
            description = None
        imagePreview_el = metadata.find('imagePreview')
        if imagePreview_el is not None:
            thumbnail = imagePreview_el.text
        else:
            thumbnail = None

        return {
            'id': video_id,
            'url': video_url,
            'title': title,
            'format': format,
            'thumbnail': thumbnail,
            'description': description,
        }
[myspass] Simplify and use unicode_literals 2014-01-29 23:55:23 +08:00			`from __future__ import unicode_literals`
[myspass] Move into own file and default to mp4 ext 2013-06-24 04:20:45 +08:00			`import os.path`

			`from .common import InfoExtractor`
Fix imports and general cleanup · Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail. · Use _match_id consistently whenever possible · Fix some outdated tests · Use consistent valid URL (always match the whole protocol, no ^ at start required) · Use modern test definitions 2014-12-13 19:24:42 +08:00			`from ..compat import (`
[myspass] Move into own file and default to mp4 ext 2013-06-24 04:20:45 +08:00			`compat_urllib_parse_urlparse,`
Fix imports and general cleanup · Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail. · Use _match_id consistently whenever possible · Fix some outdated tests · Use consistent valid URL (always match the whole protocol, no ^ at start required) · Use modern test definitions 2014-12-13 19:24:42 +08:00			`)`
			`from ..utils import (`
[myspass] Move into own file and default to mp4 ext 2013-06-24 04:20:45 +08:00			`ExtractorError,`
			`)`


			`class MySpassIE(InfoExtractor):`
Correct some extractor _VALID_URL regexes 2013-12-04 21:34:47 +08:00			`_VALID_URL = r'http://www\.myspass\.de/.*'`
Move tests to the IE definitions 2013-06-28 02:46:46 +08:00			`_TEST = {`
[myspass] Simplify and use unicode_literals 2014-01-29 23:55:23 +08:00			`'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',`
			`'md5': '0b49f4844a068f8b33f4b7c88405862b',`
			`'info_dict': {`
[myspass] Modernize test case 2014-11-12 22:01:48 +08:00			`'id': '11741',`
			`'ext': 'mp4',`
[myspass] Simplify and use unicode_literals 2014-01-29 23:55:23 +08:00			`"description": "Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?",`
			`"title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2",`
			`},`
Move tests to the IE definitions 2013-06-28 02:46:46 +08:00			`}`
[myspass] Move into own file and default to mp4 ext 2013-06-24 04:20:45 +08:00
			`def _real_extract(self, url):`
			`META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'`

			`# video id is the last path element of the URL`
			`# usually there is a trailing slash, so also try the second but last`
			`url_path = compat_urllib_parse_urlparse(url).path`
			`url_parent_path, video_id = os.path.split(url_path)`
			`if not video_id:`
			`_, video_id = os.path.split(url_parent_path)`

			`# get metadata`
			`metadata_url = META_DATA_URL_TEMPLATE % video_id`
[myspass] Fix extraction (closes #6206) 2015-07-14 00:21:11 +08:00			`metadata = self._download_xml(`
			`metadata_url, video_id, transform_source=lambda s: s.strip())`
[myspass] Move into own file and default to mp4 ext 2013-06-24 04:20:45 +08:00
			`# extract values from metadata`
			`url_flv_el = metadata.find('url_flv')`
			`if url_flv_el is None:`
[myspass] Simplify and use unicode_literals 2014-01-29 23:55:23 +08:00			`raise ExtractorError('Unable to extract download url')`
[myspass] Move into own file and default to mp4 ext 2013-06-24 04:20:45 +08:00			`video_url = url_flv_el.text`
			`title_el = metadata.find('title')`
			`if title_el is None:`
[myspass] Simplify and use unicode_literals 2014-01-29 23:55:23 +08:00			`raise ExtractorError('Unable to extract title')`
[myspass] Move into own file and default to mp4 ext 2013-06-24 04:20:45 +08:00			`title = title_el.text`
			`format_id_el = metadata.find('format_id')`
			`if format_id_el is None:`
			`format = 'mp4'`
			`else:`
			`format = format_id_el.text`
			`description_el = metadata.find('description')`
			`if description_el is not None:`
			`description = description_el.text`
			`else:`
			`description = None`
			`imagePreview_el = metadata.find('imagePreview')`
			`if imagePreview_el is not None:`
			`thumbnail = imagePreview_el.text`
			`else:`
			`thumbnail = None`
[myspass] Simplify and use unicode_literals 2014-01-29 23:55:23 +08:00
			`return {`
[myspass] Move into own file and default to mp4 ext 2013-06-24 04:20:45 +08:00			`'id': video_id,`
			`'url': video_url,`
			`'title': title,`
			`'format': format,`
			`'thumbnail': thumbnail,`
[myspass] Simplify and use unicode_literals 2014-01-29 23:55:23 +08:00			`'description': description,`
[myspass] Move into own file and default to mp4 ext 2013-06-24 04:20:45 +08:00			`}`