1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-25 09:03:00 +08:00

[lrt] Make it behave better with audio-only links

Some lrt.lt links contain only audio (mp3) files so make the extractor
better at behaving in these situations. In such cases only grab the link
to the MP3 file.
This commit is contained in:
Giedrius Statkevičius 2016-09-05 20:45:35 +03:00
parent f3eeaacb4e
commit 0e87eec65f

View File

@ -12,7 +12,7 @@ from ..utils import (
class LRTIE(InfoExtractor): class LRTIE(InfoExtractor):
IE_NAME = 'lrt.lt' IE_NAME = 'lrt.lt'
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
_TEST = { _TESTS = [{
'url': 'http://www.lrt.lt/mediateka/irasas/54391/', 'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
'info_dict': { 'info_dict': {
'id': '54391', 'id': '54391',
@ -22,23 +22,27 @@ class LRTIE(InfoExtractor):
'duration': 1783, 'duration': 1783,
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
}, },
'params': { 'params': {
'skip_download': True, # m3u8 download 'skip_download': True, # m3u8 download
},
}, },
} {
'url': 'http://www.lrt.lt/mediateka/irasas/1013074524',
'info_dict': {
'id': '1013074524',
'ext': 'mp3',
'title': 'Kita tema 2016-09-05 15:05',
'duration': 3008,
'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5',
},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' - LRT') title = remove_end(self._og_search_title(webpage), ' - LRT')
m3u8_url = self._search_regex(
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)',
webpage, 'm3u8 url', group='url')
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
duration = parse_duration(self._search_regex( duration = parse_duration(self._search_regex(
@ -52,13 +56,35 @@ class LRTIE(InfoExtractor):
r'<span[^>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<', r'<span[^>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<',
webpage, 'like count', fatal=False, group='count')) webpage, 'like count', fatal=False, group='count'))
return {
'id': video_id, m3u8_url = self._search_regex(
'title': title, r'\s+[^//]file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)',
'formats': formats, webpage, 'm3u8 url', group='url', default=None)
'thumbnail': thumbnail, if m3u8_url:
'description': description, formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
'duration': duration, self._sort_formats(formats)
'view_count': view_count,
'like_count': like_count, return {
} 'id': video_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'description': description,
'duration': duration,
'view_count': view_count,
'like_count': like_count
}
else:
mp3_url = self._search_regex(
r'\s+[^//]file\s*:\s*(["\'])(?P<url>.+?)\1',
webpage, 'mp3 url', group='url')
return {
'id': video_id,
'ext': 'mp3',
'url': mp3_url,
'title': title,
'duration': duration,
'description': description,
'view_count': view_count,
'like_count': like_count
}