From 5867abd84067e5f338c448e30e30b88509f62b91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Sun, 2 Oct 2016 00:00:03 +0200 Subject: [PATCH] [kotnrtube] Improve metadata extraction --- youtube_dl/extractor/kontrtube.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py index 704bd7b34..5ccb2b597 100644 --- a/youtube_dl/extractor/kontrtube.py +++ b/youtube_dl/extractor/kontrtube.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re @@ -6,12 +6,12 @@ import re from .common import InfoExtractor from ..utils import ( int_or_none, + js_to_json, parse_duration, ) class KontrTubeIE(InfoExtractor): - IE_NAME = 'kontrtube' IE_DESC = 'KontrTube.ru - Труба зовёт' _VALID_URL = r'https?://(?:www\.)?kontrtube\.ru/videos/(?P\d+)/(?P[^/]+)/' @@ -31,16 +31,25 @@ class KontrTubeIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') + video_id, display_id = mobj.groups() - webpage = self._download_webpage( - url, display_id, 'Downloading page') + webpage = self._download_webpage(url, display_id) + + video_data = self._parse_json( + self._search_regex( + r'var\s+flashvars\s*=\s*({[^}]+});', webpage, 'video data', fatal=False), + video_id, transform_source=js_to_json) video_url = self._search_regex( - r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL') + r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL', fatal=False) thumbnail = self._search_regex( r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False) + + if not video_url: + video_url = video_data['video_url'] + if not thumbnail: + thumbnail = video_data.get('preview_url') + title = self._html_search_regex( r'(?s)

(.+?)

', webpage, 'title') description = self._html_search_meta(