diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 530474f3f..9302dcb6a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -979,7 +979,10 @@ from .rtp import RTPIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE -from .rtvs import RTVSIE +from .rtvs import ( + RTVSTVIE, + RTVSRADIOIE, +) from .rudo import RudoIE from .ruhd import RUHDIE from .rutube import ( diff --git a/youtube_dl/extractor/rtvs.py b/youtube_dl/extractor/rtvs.py index 9cd15036f..46733ec0d 100644 --- a/youtube_dl/extractor/rtvs.py +++ b/youtube_dl/extractor/rtvs.py @@ -2,25 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import determine_ext -class RTVSIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv/\d+/(?P\d+)' - _TESTS = [{ - # radio archive - 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', - 'md5': '134d5d6debdeddf8a5d761cbc9edacb8', - 'info_dict': { - 'id': '135320', - 'ext': 'mp3', - 'title': 'Ostrov pokladov 1 časť.mp3' - }, - 'params': { - 'skip_download': True, - } - }, { - # tv archive +class RTVSTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:televizia)/archiv/\d+/(?P\d+)' + _TEST = { 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118', 'md5': '85e2c55cf988403b70cac24f5c086dc6', 'info_dict': { @@ -32,7 +18,7 @@ class RTVSIE(InfoExtractor): 'params': { 'skip_download': True, } - }] + } def _real_extract(self, url): video_id = self._match_id(url) @@ -49,30 +35,69 @@ class RTVSIE(InfoExtractor): data = self._download_json( playlist_url, video_id, 'Downloading playlist') - try: - data_media = data['clip'] - except KeyError: - data_media = data['playlist'][0] + data_media = data['clip'] media_id = data_media['mediaid'] title = data_media['title'] description = data_media.get('description') thumbnail = data_media.get('image') + urldl = data_media['sources'][0]['src'] - info = { + formats = self._extract_m3u8_formats( + urldl, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + + return { 'id': media_id, 'title': title, 'description': description, 'thumbnail': thumbnail, + 'formats': formats } - url = data_media['sources'][0]['src'] - if determine_ext(url) == 'm3u8': - info['formats'] = self._extract_m3u8_formats( - url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls') - else: - info['url'] = url +class RTVSRADIOIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio)/archiv/\d+/(?P\d+)' + _TEST = { + 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', + 'md5': '134d5d6debdeddf8a5d761cbc9edacb8', + 'info_dict': { + 'id': '135320', + 'ext': 'mp3', + 'title': 'Ostrov pokladov 1 časť.mp3' + }, + 'params': { + 'skip_download': True, + } + } - return info + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + playlist_url = self._search_regex( + r'url = (["\'])(?:https?:)?(?://)(?P(?:(?!\1).)+)\1', webpage, + 'playlist url', group='url') + + if not playlist_url.startswith("http"): + playlist_url = "http://" + playlist_url + + data = self._download_json( + playlist_url, video_id, 'Downloading playlist') + + data_media = data['playlist'][0] + + media_id = data_media['mediaid'] + title = data_media['title'] + description = data_media.get('description') + thumbnail = data_media.get('image') + urldl = data_media['sources'][0]['src'] + + return { + 'id': media_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'url': urldl + }