From 2b17340fb95ed413f21f3b866fcbfc10088b1a46 Mon Sep 17 00:00:00 2001 From: Robert Trebula Date: Thu, 4 Jan 2018 22:05:54 +0100 Subject: [PATCH 1/2] audio extracts using json parsing --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/rtvs.py | 35 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 youtube_dl/extractor/rtvs.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e64defe62..b56c45215 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -898,6 +898,7 @@ from .rtp import RTPIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE +from .rtvs import RtvsExtractorIE from .rudo import RudoIE from .ruhd import RUHDIE from .ruleporn import RulePornIE diff --git a/youtube_dl/extractor/rtvs.py b/youtube_dl/extractor/rtvs.py new file mode 100644 index 000000000..f6ada92e8 --- /dev/null +++ b/youtube_dl/extractor/rtvs.py @@ -0,0 +1,35 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +class RtvsExtractorIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/.*/archiv/[0-9]*/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', + 'md5': '134d5d6debdeddf8a5d761cbc9edacb8', + 'info_dict': { + 'id': '414872', + 'ext': 'mp3', + 'title': u'Ostrov pokladov 1 časť', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + playlist_url = self._search_regex(r'"playlist": "(https?:.*)&', webpage, 'playlist_url') + self.to_screen("Playlist URL: " + playlist_url) + + playlist = self._download_json(playlist_url, video_id, "Downloading playlist") + playlist_item = playlist[0] + url = playlist_item["sources"][0]["file"] + full_title = playlist_item.get("title") + (title, ext) = full_title.split(".", 2) + + return { + 'id': video_id, + 'url': url, + 'title': title, + 'ext': ext + } From 0b0d2bb8e83edf4a1fb42061c690ab422e4a2d34 Mon Sep 17 00:00:00 2001 From: Robert Trebula Date: Thu, 4 Jan 2018 22:29:27 +0100 Subject: [PATCH 2/2] video archives + reuse jwplayer parsing of yt-dl --- youtube_dl/extractor/rtvs.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/rtvs.py b/youtube_dl/extractor/rtvs.py index f6ada92e8..9041356bf 100644 --- a/youtube_dl/extractor/rtvs.py +++ b/youtube_dl/extractor/rtvs.py @@ -5,31 +5,34 @@ from .common import InfoExtractor class RtvsExtractorIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/.*/archiv/[0-9]*/(?P[0-9]+)' - _TEST = { + _TESTS = [{ + # radio archive 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', 'md5': '134d5d6debdeddf8a5d761cbc9edacb8', 'info_dict': { 'id': '414872', 'ext': 'mp3', - 'title': u'Ostrov pokladov 1 časť', + 'title': u'Ostrov pokladov 1 časť.mp3', } - } + }, { + # tv archive + 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118', + 'md5': '85e2c55cf988403b70cac24f5c086dc6', + 'info_dict': { + 'id': '63118', + 'ext': 'mp4', + 'title': u'Amaro Džives - Náš deň', + 'description': u'Galavečer pri príležitosti Medzinárodného dňa Rómov.' + } + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + playlist_url = self._search_regex(r'"playlist": "(https?:.*)&', webpage, 'playlist_url') - self.to_screen("Playlist URL: " + playlist_url) - + self.to_screen("%s: Playlist URL: %s" % (video_id, playlist_url)) playlist = self._download_json(playlist_url, video_id, "Downloading playlist") - playlist_item = playlist[0] - url = playlist_item["sources"][0]["file"] - full_title = playlist_item.get("title") - (title, ext) = full_title.split(".", 2) + jwplayer_data = playlist[0] + return self._parse_jwplayer_data(jwplayer_data, video_id=video_id) - return { - 'id': video_id, - 'url': url, - 'title': title, - 'ext': ext - }