youtube-dl/youtube_dl/extractor/mlb.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    parse_duration,
    parse_iso8601,
    find_xpath_attr,
)


class MLBIE(InfoExtractor):
    _VALID_URL = r'https?://m\.mlb\.com/(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
    _TESTS = [
        {
            'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
            'md5': 'ff56a598c2cf411a9a38a69709e97079',
            'info_dict': {
                'id': '34698933',
                'ext': 'mp4',
                'title': "Ackley's spectacular catch",
                'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
                'duration': 66,
                'timestamp': 1405980600,
                'upload_date': '20140721',
                'thumbnail': 're:^https?://.*\.jpg$',
            },
        },
        {
            'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
            'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
            'info_dict': {
                'id': '34496663',
                'ext': 'mp4',
                'title': 'Stanton prepares for Derby',
                'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
                'duration': 46,
                'timestamp': 1405105800,
                'upload_date': '20140711',
                'thumbnail': 're:^https?://.*\.jpg$',
            },
        },
        {
            'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby',
            'md5': '0e6e73d509321e142409b695eadd541f',
            'info_dict': {
                'id': '34578115',
                'ext': 'mp4',
                'title': 'Cespedes repeats as Derby champ',
                'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
                'duration': 488,
                'timestamp': 1405399936,
                'upload_date': '20140715',
                'thumbnail': 're:^https?://.*\.jpg$',
            },
        },
        {
            'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance',
            'md5': 'b8fd237347b844365d74ea61d4245967',
            'info_dict': {
                'id': '34577915',
                'ext': 'mp4',
                'title': 'Bautista on Home Run Derby',
                'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
                'duration': 52,
                'timestamp': 1405390722,
                'upload_date': '20140715',
                'thumbnail': 're:^https?://.*\.jpg$',
            },
        },
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        detail = self._download_xml(
            'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
            % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)

        title = detail.find('./headline').text
        description = detail.find('./big-blurb').text
        duration = parse_duration(detail.find('./duration').text)
        timestamp = parse_iso8601(detail.attrib['date'][:-5])

        thumbnail = find_xpath_attr(
            detail, './thumbnailScenarios/thumbnailScenario', 'type', '45').text

        formats = []
        for media_url in detail.findall('./url'):
            playback_scenario = media_url.attrib['playback_scenario']
            fmt = {
                'url': media_url.text,
                'format_id': playback_scenario,
            }
            m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)
            if m:
                fmt.update({
                    'vbr': int(m.group('vbr')) * 1000,
                    'width': int(m.group('width')),
                    'height': int(m.group('height')),
                })
            formats.append(fmt)

        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'duration': duration,
            'timestamp': timestamp,
            'formats': formats,
            'thumbnail': thumbnail,
        }
[MLB] Add new extractor 2014-07-15 02:00:55 +08:00			`from __future__ import unicode_literals`

			`import re`

			`from .common import InfoExtractor`
[mlb] Extract more metadata and all formats, provide more tests 2014-07-16 21:40:28 +08:00			`from ..utils import (`
			`parse_duration,`
			`parse_iso8601,`
			`find_xpath_attr,`
			`)`
[MLB] Add new extractor 2014-07-15 02:00:55 +08:00

[mlb] Extract more metadata and all formats, provide more tests 2014-07-16 21:40:28 +08:00			`class MLBIE(InfoExtractor):`
[mlb] Fix regex 2014-07-23 05:54:05 +08:00			`_VALID_URL = r'https?://m\.mlb\.com/(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'`
[mlb] Extract more metadata and all formats, provide more tests 2014-07-16 21:40:28 +08:00			`_TESTS = [`
[MLB] Enhanced _VALID_URL to cover more MLB videos 2014-07-23 05:10:27 +08:00			`{`
			`'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',`
			`'md5': 'ff56a598c2cf411a9a38a69709e97079',`
			`'info_dict': {`
			`'id': '34698933',`
			`'ext': 'mp4',`
			`'title': "Ackley's spectacular catch",`
			`'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',`
			`'duration': 66,`
			`'timestamp': 1405980600,`
			`'upload_date': '20140721',`
			`'thumbnail': 're:^https?://.*\.jpg$',`
			`},`
			`},`
[mlb] Extract more metadata and all formats, provide more tests 2014-07-16 21:40:28 +08:00			`{`
			`'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',`
			`'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',`
			`'info_dict': {`
			`'id': '34496663',`
			`'ext': 'mp4',`
			`'title': 'Stanton prepares for Derby',`
			`'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',`
			`'duration': 46,`
			`'timestamp': 1405105800,`
			`'upload_date': '20140711',`
			`'thumbnail': 're:^https?://.*\.jpg$',`
			`},`
[MLB] Add new extractor 2014-07-15 02:00:55 +08:00			`},`
[mlb] Extract more metadata and all formats, provide more tests 2014-07-16 21:40:28 +08:00			`{`
			`'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby',`
			`'md5': '0e6e73d509321e142409b695eadd541f',`
			`'info_dict': {`
			`'id': '34578115',`
			`'ext': 'mp4',`
			`'title': 'Cespedes repeats as Derby champ',`
			`'description': 'md5:08df253ce265d4cf6fb09f581fafad07',`
			`'duration': 488,`
			`'timestamp': 1405399936,`
			`'upload_date': '20140715',`
			`'thumbnail': 're:^https?://.*\.jpg$',`
			`},`
			`},`
			`{`
			`'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance',`
			`'md5': 'b8fd237347b844365d74ea61d4245967',`
			`'info_dict': {`
			`'id': '34577915',`
			`'ext': 'mp4',`
			`'title': 'Bautista on Home Run Derby',`
			`'description': 'md5:b80b34031143d0986dddc64a8839f0fb',`
			`'duration': 52,`
			`'timestamp': 1405390722,`
			`'upload_date': '20140715',`
			`'thumbnail': 're:^https?://.*\.jpg$',`
			`},`
			`},`
			`]`
[MLB] Add new extractor 2014-07-15 02:00:55 +08:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`video_id = mobj.group('id')`

[mlb] Extract more metadata and all formats, provide more tests 2014-07-16 21:40:28 +08:00			`detail = self._download_xml(`
			`'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'`
			`% (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)`

			`title = detail.find('./headline').text`
			`description = detail.find('./big-blurb').text`
			`duration = parse_duration(detail.find('./duration').text)`
			`timestamp = parse_iso8601(detail.attrib['date'][:-5])`

			`thumbnail = find_xpath_attr(`
			`detail, './thumbnailScenarios/thumbnailScenario', 'type', '45').text`
[MLB] Add new extractor 2014-07-15 02:00:55 +08:00
[mlb] Extract more metadata and all formats, provide more tests 2014-07-16 21:40:28 +08:00			`formats = []`
			`for media_url in detail.findall('./url'):`
			`playback_scenario = media_url.attrib['playback_scenario']`
			`fmt = {`
			`'url': media_url.text,`
			`'format_id': playback_scenario,`
			`}`
			`m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)`
			`if m:`
			`fmt.update({`
			`'vbr': int(m.group('vbr')) * 1000,`
			`'width': int(m.group('width')),`
			`'height': int(m.group('height')),`
			`})`
			`formats.append(fmt)`
Switched to use media detail XML to extract video URL 2014-07-16 04:55:23 +08:00
[mlb] Extract more metadata and all formats, provide more tests 2014-07-16 21:40:28 +08:00			`self._sort_formats(formats)`
Switched to use media detail XML to extract video URL 2014-07-16 04:55:23 +08:00
[MLB] Add new extractor 2014-07-15 02:00:55 +08:00			`return {`
			`'id': video_id,`
			`'title': title,`
			`'description': description,`
[mlb] Extract more metadata and all formats, provide more tests 2014-07-16 21:40:28 +08:00			`'duration': duration,`
			`'timestamp': timestamp,`
			`'formats': formats,`
[MLB] Add new extractor 2014-07-15 02:00:55 +08:00			`'thumbnail': thumbnail,`
			`}`