From d4e5dc44e48583596ed79cc28468920408f0afa6 Mon Sep 17 00:00:00 2001 From: Dmitry Grigoryev Date: Mon, 10 Jun 2019 21:06:46 +0300 Subject: [PATCH 1/2] [tvrain] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tvrain.py | 51 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 youtube_dl/extractor/tvrain.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index eb5efd1e8..e0d4046c0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1234,6 +1234,7 @@ from .tvplay import ( TVPlayHomeIE, ) from .tvplayer import TVPlayerIE +from .tvrain import TVRainIE from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE diff --git a/youtube_dl/extractor/tvrain.py b/youtube_dl/extractor/tvrain.py new file mode 100644 index 000000000..5d9d74d66 --- /dev/null +++ b/youtube_dl/extractor/tvrain.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import smuggle_url + + +class TVRainIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tvrain\.ru.*/(?P[a-z_]+-\d+)/?' + _TESTS = [{ + 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', + 'info_dict': { + 'id': '582306', + 'ext': 'mp4', + 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', + 'duration': 3382, + }, + }, { + 'url': 'https://tvrain.ru/teleshow/ted_dod/mozhete_li_vy_reshit_golovolomku_so_shkafchikami-432600/', + 'info_dict': { + 'id': '738482', + 'ext': 'mp4', + 'title': ' Можете ли вы решить головоломку со шкафчиками? ', + 'duration': 237, + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + meta = self._search_regex( + r'(?s)window\.TVRAIN\.app\s*=\s*({.+?})[\s\/\*\]>]+<\/script>', + webpage, 'meta', default=None) + + if meta: + article = json.loads(meta)['article'] + eagle_id = str(article['eagle_id']) + return { + '_type': 'url', + 'id': eagle_id, + 'ie_key': 'EaglePlatform', + 'url': smuggle_url( + 'eagleplatform:tvrainru.media.eagleplatform.com:%s' % eagle_id, + {'referrer': url}), + } + + return self.url_result(url, ie='Generic') From b9c1328481978ee83d0dcbf7707a3fbd24df95dd Mon Sep 17 00:00:00 2001 From: Dmitry Grigoryev Date: Mon, 10 Jun 2019 21:26:29 +0300 Subject: [PATCH 2/2] [generic] Remove tvrain tests --- youtube_dl/extractor/generic.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index eeb0d25f6..2a57b03c3 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1327,21 +1327,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, - # referrer protected EaglePlatform embed - { - 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/', - 'info_dict': { - 'id': '582306', - 'ext': 'mp4', - 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 3382, - 'view_count': int, - }, - 'params': { - 'skip_download': True, - }, - }, # ClipYou (EaglePlatform) embed (custom URL) { 'url': 'http://muz-tv.ru/play/7129/', @@ -1905,16 +1890,6 @@ class GenericIE(InfoExtractor): 'playlist_mincount': 5, 'add_ie': [JojIE.ie_key()], }, - { - # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video) - 'url': 'https://tvrain.ru/amp/418921/', - 'md5': 'cc00413936695987e8de148b67d14f1d', - 'info_dict': { - 'id': '418921', - 'ext': 'mp4', - 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', - }, - }, { # vzaar embed 'url': 'http://help.vzaar.com/article/165-embedding-video',