Merge branch 'master' of github.com:rg3/youtube-dl

2024-12-31 01:02:53 +08:00 · 2015-02-20 23:20:14 +01:00 · 2015-02-20 23:20:14 +01:00 · e14ced7918
commit e14ced7918
parent 5e9a033e6e ab9d02f53b
4 changed files with 148 additions and 29 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -490,6 +490,7 @@ from .tumblr import TumblrIE
 from .tunein import TuneInIE
 from .turbo import TurboIE
 from .tutv import TutvIE
 from .tv4 import TV4IE
 from .tvigle import TvigleIE
 from .tvp import TvpIE, TvpSeriesIE
 from .tvplay import TVPlayIE
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dl/extractor/blinkx.py
@ -1,40 +1,35 @@
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
-from ..utils import remove_start
+from ..utils import (
    remove_start,
    int_or_none,
 )
 class BlinkxIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
+    _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
    IE_NAME = 'blinkx'
    _TEST = {
-        'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB',
+        'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
-        'md5': '2e9a07364af40163a908edbf10bb2492',
+        'md5': '337cf7a344663ec79bf93a526a2e06c7',
        'info_dict': {
-            'id': '8aQUy7GV',
+            'id': 'Da0Gw3xc',
            'ext': 'mp4',
-            'title': 'Police Car Rolls Away',
+            'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
-            'uploader': 'stupidvideos.com',
+            'uploader': 'IGN News',
-            'upload_date': '20131215',
+            'upload_date': '20150217',
-            'timestamp': 1387068000,
+            'timestamp': 1424215740,
-            'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!',
+            'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
-            'duration': 14.886,
+            'duration': 47.743333,
            'thumbnails': [{
                'width': 100,
                'height': 76,
                'resolution': '100x76',
                'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg',
            }],
        },
    }
-    def _real_extract(self, rl):
+    def _real_extract(self, url):
-        m = re.match(self._VALID_URL, rl)
+        video_id = self._match_id(url)
        video_id = m.group('id')
        display_id = video_id[:8]
        api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor):
            elif m['type'] in ('flv', 'mp4'):
                vcodec = remove_start(m['vcodec'], 'ff')
                acodec = remove_start(m['acodec'], 'ff')
-                tbr = (int(m['vbr']) + int(m['abr'])) // 1000
+                vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
                abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
                tbr = vbr + abr if vbr and abr else None
                format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
                formats.append({
                    'format_id': format_id,
                    'url': m['link'],
                    'vcodec': vcodec,
                    'acodec': acodec,
-                    'abr': int(m['abr']) // 1000,
+                    'abr': abr,
-                    'vbr': int(m['vbr']) // 1000,
+                    'vbr': vbr,
                    'tbr': tbr,
-                    'width': int(m['w']),
+                    'width': int_or_none(m.get('w')),
-                    'height': int(m['h']),
+                    'height': int_or_none(m.get('h')),
                })
        self._sort_formats(formats)
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@ -83,6 +83,22 @@ class TEDIE(SubtitlesInfoExtractor):
        'params': {
            'skip_download': True,
        },
    }, {
        # YouTube video
        'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond',
        'add_ie': ['Youtube'],
        'info_dict': {
            'id': 'aFBIPO-P7LM',
            'ext': 'mp4',
            'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville',
            'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1',
            'uploader': 'TEDx Talks',
            'uploader_id': 'TEDxTalks',
            'upload_date': '20111216',
        },
        'params': {
            'skip_download': True,
        },
    }]
    _NATIVE_FORMATS = {
@ -132,11 +148,16 @@ class TEDIE(SubtitlesInfoExtractor):
        talk_info = self._extract_info(webpage)['talks'][0]
-        if talk_info.get('external') is not None:
+        external = talk_info.get('external')
-            self.to_screen('Found video from %s' % talk_info['external']['service'])
+        if external:
            service = external['service']
            self.to_screen('Found video from %s' % service)
            ext_url = None
            if service.lower() == 'youtube':
                ext_url = external.get('code')
            return {
                '_type': 'url',
-                'url': talk_info['external']['uri'],
+                'url': ext_url or external['uri'],
            }
        formats = [{
--- a/youtube_dl/extractor/tv4.py
+++ b/youtube_dl/extractor/tv4.py
@ -0,0 +1,100 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    parse_iso8601,
 )
 class TV4IE(InfoExtractor):
    IE_DESC = 'tv4.se and tv4play.se'
    _VALID_URL = r'''(?x)https?://(?:www\.)?
        (?:
            tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
            tv4play\.se/
            (?:
                (?:program|barn)/(?:[^\?]+)\?video_id=|
                iframe/video/|
                film/|
                sport/|
            )
        )(?P<id>[0-9]+)'''
    _TESTS = [
        {
            'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
            'md5': '909d6454b87b10a25aa04c4bdd416a9b',
            'info_dict': {
                'id': '2491650',
                'ext': 'mp4',
                'title': 'Kalla Fakta 5 (english subtitles)',
                'thumbnail': 're:^https?://.*\.jpg$',
                'timestamp': int,
                'upload_date': '20131125',
            },
        },
        {
            'url': 'http://www.tv4play.se/iframe/video/3054113',
            'md5': '77f851c55139ffe0ebd41b6a5552489b',
            'info_dict': {
                'id': '3054113',
                'ext': 'mp4',
                'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
                'thumbnail': 're:^https?://.*\.jpg$',
                'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
                'timestamp': int,
                'upload_date': '20150130',
            },
        },
        {
            'url': 'http://www.tv4play.se/sport/3060959',
            'only_matching': True,
        },
        {
            'url': 'http://www.tv4play.se/film/2378136',
            'only_matching': True,
        },
        {
            'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
            'only_matching': True,
        },
    ]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        info = self._download_json(
            'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON')
        # If is_geo_restricted is true, it doesn't neceserally mean we can't download it
        if info['is_geo_restricted']:
            self.report_warning('This content might not be available in your country due to licensing restrictions.')
        if info['requires_subscription']:
            raise ExtractorError('This content requires subscription.', expected=True)
        sources_data = self._download_json(
            'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON')
        sources = sources_data['playback']
        formats = []
        for item in sources.get('items', {}).get('item', []):
            ext, bitrate = item['mediaFormat'], item['bitrate']
            formats.append({
                'format_id': '%s_%s' % (ext, bitrate),
                'tbr': bitrate,
                'ext': ext,
                'url': item['url'],
            })
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': info['title'],
            'formats': formats,
            'description': info.get('description'),
            'timestamp': parse_iso8601(info.get('broadcast_date_time')),
            'duration': info.get('duration'),
            'thumbnail': info.get('image'),
            'is_live': sources.get('live'),
        }