[lrt] Make it behave better with audio-only links

Some lrt.lt links contain only audio (mp3) files so make the extractor better at behaving in these situations. In such cases only grab the link to the MP3 file.
2025-01-25 04:35:47 +08:00 · 2016-09-05 20:45:35 +03:00 · 2016-09-05 20:45:35 +03:00 · 0e87eec65f
commit 0e87eec65f
parent f3eeaacb4e
1 changed files with 45 additions and 19 deletions
--- a/youtube_dl/extractor/lrt.py
+++ b/youtube_dl/extractor/lrt.py
@ -12,7 +12,7 @@ from ..utils import (
 class LRTIE(InfoExtractor):
    IE_NAME = 'lrt.lt'
    _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
        'info_dict': {
            'id': '54391',
@ -26,19 +26,23 @@ class LRTIE(InfoExtractor):
        'params': {
            'skip_download': True,  # m3u8 download
            },
-    }
+        },
+        {
+            'url': 'http://www.lrt.lt/mediateka/irasas/1013074524',
+            'info_dict': {
+                'id': '1013074524',
+                'ext': 'mp3',
+                'title': 'Kita tema 2016-09-05 15:05',
+                'duration': 3008,
+                'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5',
+                },
+            }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        title = remove_end(self._og_search_title(webpage), ' - LRT')
-        m3u8_url = self._search_regex(
-            r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)',
-            webpage, 'm3u8 url', group='url')
-        formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
-        self._sort_formats(formats)
-
        thumbnail = self._og_search_thumbnail(webpage)
        description = self._og_search_description(webpage)
        duration = parse_duration(self._search_regex(
@ -52,6 +56,14 @@ class LRTIE(InfoExtractor):
            r'<span[^>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<',
            webpage, 'like count', fatal=False, group='count'))

+
+        m3u8_url = self._search_regex(
+            r'\s+[^//]file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)',
+            webpage, 'm3u8 url', group='url', default=None)
+        if m3u8_url:
+            formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+            self._sort_formats(formats)
+
            return {
                    'id': video_id,
                    'title': title,
@ -60,5 +72,19 @@ class LRTIE(InfoExtractor):
                    'description': description,
                    'duration': duration,
                    'view_count': view_count,
-            'like_count': like_count,
+                    'like_count': like_count
+            }
+        else:
+            mp3_url = self._search_regex(
+                    r'\s+[^//]file\s*:\s*(["\'])(?P<url>.+?)\1',
+                    webpage, 'mp3 url', group='url')
+            return {
+                    'id': video_id,
+                    'ext': 'mp3',
+                    'url': mp3_url,
+                    'title': title,
+                    'duration': duration,
+                    'description': description,
+                    'view_count': view_count,
+                    'like_count': like_count
            }