1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-15 02:47:34 +08:00

[cda] Fix extraction

This commit is contained in:
Michał Rokita 2016-11-04 20:43:28 +01:00
parent b30e4c2754
commit 89c406d052

View File

@ -3,9 +3,9 @@ from __future__ import unicode_literals
import re import re
from json import loads
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
decode_packed_codes,
ExtractorError, ExtractorError,
parse_duration parse_duration
) )
@ -40,12 +40,10 @@ class CDAIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id) webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
if 'Ten film jest dostępny dla użytkowników premium' in webpage: if 'Ten film jest dostępny dla użytkowników premium' in webpage:
raise ExtractorError('This video is only available for premium users.', expected=True) raise ExtractorError('This video is only available for premium users.', expected=True)
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title') title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
formats = [] formats = []
info_dict = { info_dict = {
@ -56,28 +54,19 @@ class CDAIE(InfoExtractor):
} }
def extract_format(page, version): def extract_format(page, version):
unpacked = decode_packed_codes(page) video_info = loads(self._search_regex(
format_url = self._search_regex( r"player_data='(?P<video_info>.*?)'", page,
r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked, '%s url' % version, fatal=False, group='video_info'))
'%s url' % version, fatal=False, group='url') if not video_info:
if not format_url:
return return
f = { f = {
'url': format_url, 'url': video_info['video']['file'],
'height': video_info['video']['height'],
'width': video_info['video']['width'],
} }
m = re.search(
r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
page)
if m:
f.update({
'format_id': m.group('format_id'),
'height': int(m.group('height')),
})
info_dict['formats'].append(f) info_dict['formats'].append(f)
if not info_dict['duration']: if not info_dict['duration']:
info_dict['duration'] = parse_duration(self._search_regex( info_dict['duration'] = parse_duration(video_info['video']['duration'])
r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1",
unpacked, 'duration', fatal=False, group='duration'))
extract_format(webpage, 'default') extract_format(webpage, 'default')