[cda] Fix extraction

2025-03-15 02:47:34 +08:00 · 2016-11-04 20:43:28 +01:00 · 2016-11-04 20:43:28 +01:00 · 89c406d052
commit 89c406d052
parent b30e4c2754
1 changed files with 9 additions and 20 deletions
--- a/youtube_dl/extractor/cda.py
+++ b/youtube_dl/extractor/cda.py
@ -3,9 +3,9 @@ from __future__ import unicode_literals
 import re
 from json import loads
 from .common import InfoExtractor
 from ..utils import (
    decode_packed_codes,
    ExtractorError,
    parse_duration
 )
@ -40,12 +40,10 @@ class CDAIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
        if 'Ten film jest dostępny dla użytkowników premium' in webpage:
            raise ExtractorError('This video is only available for premium users.', expected=True)
        title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
        formats = []
        info_dict = {
@ -56,28 +54,19 @@ class CDAIE(InfoExtractor):
        }
        def extract_format(page, version):
-            unpacked = decode_packed_codes(page)
+            video_info = loads(self._search_regex(
-            format_url = self._search_regex(
+                r"player_data='(?P<video_info>.*?)'", page,
-                r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked,
+                '%s url' % version, fatal=False, group='video_info'))
-                '%s url' % version, fatal=False, group='url')
+            if not video_info:
            if not format_url:
                return
            f = {
-                'url': format_url,
+                'url': video_info['video']['file'],
                'height': video_info['video']['height'],
                'width': video_info['video']['width'],
            }
            m = re.search(
                r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
                page)
            if m:
                f.update({
                    'format_id': m.group('format_id'),
                    'height': int(m.group('height')),
                })
            info_dict['formats'].append(f)
            if not info_dict['duration']:
-                info_dict['duration'] = parse_duration(self._search_regex(
+                info_dict['duration'] = parse_duration(video_info['video']['duration'])
                    r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
                    unpacked, 'duration', fatal=False, group='duration'))
        extract_format(webpage, 'default')