From 0e87eec65f8edbb9d64cd3f6f21e6626a781185a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Mon, 5 Sep 2016 20:45:35 +0300 Subject: [PATCH] [lrt] Make it behave better with audio-only links Some lrt.lt links contain only audio (mp3) files so make the extractor better at behaving in these situations. In such cases only grab the link to the MP3 file. --- youtube_dl/extractor/lrt.py | 64 ++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index 1072405b3..ac920f935 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -12,7 +12,7 @@ from ..utils import ( class LRTIE(InfoExtractor): IE_NAME = 'lrt.lt' _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.lrt.lt/mediateka/irasas/54391/', 'info_dict': { 'id': '54391', @@ -22,23 +22,27 @@ class LRTIE(InfoExtractor): 'duration': 1783, 'view_count': int, 'like_count': int, - }, + }, 'params': { 'skip_download': True, # m3u8 download + }, }, - } + { + 'url': 'http://www.lrt.lt/mediateka/irasas/1013074524', + 'info_dict': { + 'id': '1013074524', + 'ext': 'mp3', + 'title': 'Kita tema 2016-09-05 15:05', + 'duration': 3008, + 'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = remove_end(self._og_search_title(webpage), ' - LRT') - m3u8_url = self._search_regex( - r'file\s*:\s*(["\'])(?P.+?)\1\s*\+\s*location\.hash\.substring\(1\)', - webpage, 'm3u8 url', group='url') - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') - self._sort_formats(formats) - thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage) duration = parse_duration(self._search_regex( @@ -52,13 +56,35 @@ class LRTIE(InfoExtractor): r']+id=(["\'])flikesCount.*?\1>(?P\d+)<', webpage, 'like count', fatal=False, group='count')) - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': thumbnail, - 'description': description, - 'duration': duration, - 'view_count': view_count, - 'like_count': like_count, - } + + m3u8_url = self._search_regex( + r'\s+[^//]file\s*:\s*(["\'])(?P.+?)\1\s*\+\s*location\.hash\.substring\(1\)', + webpage, 'm3u8 url', group='url', default=None) + if m3u8_url: + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'description': description, + 'duration': duration, + 'view_count': view_count, + 'like_count': like_count + } + else: + mp3_url = self._search_regex( + r'\s+[^//]file\s*:\s*(["\'])(?P.+?)\1', + webpage, 'mp3 url', group='url') + return { + 'id': video_id, + 'ext': 'mp3', + 'url': mp3_url, + 'title': title, + 'duration': duration, + 'description': description, + 'view_count': view_count, + 'like_count': like_count + }