youtube-dl/youtube_dl/extractor/bild.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import (
    int_or_none,
    unescapeHTML,
)


class BildIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
    IE_DESC = 'Bild.de'
    _TEST = {
        'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
        'md5': 'dd495cbd99f2413502a1713a1156ac8a',
        'info_dict': {
            'id': '38184146',
            'ext': 'mp4',
            'title': 'Das können die  neuen iPads',
            'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 196,
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)

        video_data = self._download_json(
            url.split('.bild.html')[0] + ',view=json.bild.html', video_id)

        return {
            'id': video_id,
            'title': unescapeHTML(video_data['title']).strip(),
            'description': unescapeHTML(video_data.get('description')),
            'url': video_data['clipList'][0]['srces'][0]['src'],
            'thumbnail': video_data.get('poster'),
            'duration': int_or_none(video_data.get('durationSec')),
        }
PEP8 applied 2014-11-24 03:41:03 +08:00			`# coding: utf-8`
[bild] Simplify (#3983) 2014-10-24 21:09:43 +08:00			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`
[BildIE] Escape ampersands in xml and update test thumbnail 2015-05-07 22:07:11 +08:00			`from ..utils import (`
Fix indents 2015-05-07 22:09:27 +08:00			`int_or_none,`
[bild] extract info from json request 2015-10-10 07:45:23 +08:00			`unescapeHTML,`
[BildIE] Escape ampersands in xml and update test thumbnail 2015-05-07 22:07:11 +08:00			`)`
[bild] Simplify (#3983) 2014-10-24 21:09:43 +08:00

			`class BildIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'`
			`IE_DESC = 'Bild.de'`
			`_TEST = {`
			`'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',`
			`'md5': 'dd495cbd99f2413502a1713a1156ac8a',`
			`'info_dict': {`
			`'id': '38184146',`
			`'ext': 'mp4',`
[bild] Strip title 2015-10-11 21:16:51 +08:00			`'title': 'Das können die neuen iPads',`
			`'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f',`
Fix "invalid escape sequences" error on Python 3.6 2017-01-02 20:08:07 +08:00			`'thumbnail': r're:^https?://.*\.jpg$',`
[bild] Simplify (#3983) 2014-10-24 21:09:43 +08:00			`'duration': 196,`
			`}`
			`}`

			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`

[bild] Strip title 2015-10-11 21:16:51 +08:00			`video_data = self._download_json(`
			`url.split('.bild.html')[0] + ',view=json.bild.html', video_id)`
[bild] Simplify (#3983) 2014-10-24 21:09:43 +08:00
			`return {`
			`'id': video_id,`
[bild] Strip title 2015-10-11 21:16:51 +08:00			`'title': unescapeHTML(video_data['title']).strip(),`
[bild] extract info from json request 2015-10-10 07:45:23 +08:00			`'description': unescapeHTML(video_data.get('description')),`
			`'url': video_data['clipList'][0]['srces'][0]['src'],`
			`'thumbnail': video_data.get('poster'),`
			`'duration': int_or_none(video_data.get('durationSec')),`
[bild] Simplify (#3983) 2014-10-24 21:09:43 +08:00			`}`