[showroomlive] Improve (closes #11458)

2024-11-20 17:52:51 +08:00 · 2016-12-30 00:12:35 +07:00 · 2016-12-30 00:12:35 +07:00 · df086e74e2
commit df086e74e2
parent 963bd5ecfc
1 changed files with 62 additions and 58 deletions
--- a/youtube_dl/extractor/showroomlive.py
+++ b/youtube_dl/extractor/showroomlive.py
@ -2,79 +2,83 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
-from ..utils import ExtractorError, compat_urlparse
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    urljoin,
+)


-class ShowroomLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?P<id>[0-9a-zA-Z_]+)'
+class ShowRoomLiveIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?!onlive|timetable|event|campaign|news|ranking|room)(?P<id>[^/?#&]+)'
    _TEST = {
        'url': 'https://www.showroom-live.com/48_Nana_Okada',
-        'skip': 'Only live broadcasts, can\'t predict test case.',
-        'info_dict': {
-            'id': '48_Nana_Okada',
-            'ext': 'mp4',
-            'uploader_id': '48_Nana_Okada',
-        }
+        'only_matching': True,
    }

    def _real_extract(self, url):
        broadcaster_id = self._match_id(url)

-        # There is no showroom on these pages.
-        if broadcaster_id in ['onlive', 'timetable', 'event', 'campaign', 'news', 'ranking']:
-            raise ExtractorError('URL %s does not contain a showroom' % url)
-
-        # Retrieve the information we need
        webpage = self._download_webpage(url, broadcaster_id)
-        room_id = self._search_regex(r'profile\?room_id\=(\d+)', webpage, 'room_id')
-        room_url = compat_urlparse.urljoin(url, "/api/room/profile?room_id=%s") % room_id
-        room = self._download_json(room_url, broadcaster_id)
+
+        room_id = self._search_regex(
+            (r'SrGlobal\.roomId\s*=\s*(\d+)',
+             r'(?:profile|room)\?room_id\=(\d+)'), webpage, 'room_id')
+
+        room = self._download_json(
+            urljoin(url, '/api/room/profile?room_id=%s' % room_id),
+            broadcaster_id)

        is_live = room.get('is_onlive')
-        if not is_live:
-            raise ExtractorError('%s their showroom is not live' % broadcaster_id)
+        if is_live is not True:
+            raise ExtractorError('%s is offline' % broadcaster_id, expected=True)

-        # Prepare and return the information
-        uploader = room.get('performer_name') or broadcaster_id  # performer_name can be an empty string.
-        title = room.get('room_name', room.get('main_name', "%s's Showroom" % uploader))
+        uploader = room.get('performer_name') or broadcaster_id
+        title = room.get('room_name') or room.get('main_name') or uploader
+
+        streaming_url_list = self._download_json(
+            urljoin(url, '/api/live/streaming_url?room_id=%s' % room_id),
+            broadcaster_id)['streaming_url_list']
+
+        formats = []
+        for stream in streaming_url_list:
+            stream_url = stream.get('url')
+            if not stream_url:
+                continue
+            stream_type = stream.get('type')
+            if stream_type == 'hls':
+                m3u8_formats = self._extract_m3u8_formats(
+                    stream_url, broadcaster_id, ext='mp4', m3u8_id='hls',
+                    live=True)
+                for f in m3u8_formats:
+                    f['quality'] = int_or_none(stream.get('quality', 100))
+                formats.extend(m3u8_formats)
+            elif stream_type == 'rtmp':
+                stream_name = stream.get('stream_name')
+                if not stream_name:
+                    continue
+                formats.append({
+                    'url': stream_url,
+                    'play_path': stream_name,
+                    'page_url': url,
+                    'player_url': 'https://www.showroom-live.com/assets/swf/v3/ShowRoomLive.swf',
+                    'rtmp_live': True,
+                    'ext': 'flv',
+                    'format_id': 'rtmp',
+                    'format_note': stream.get('label'),
+                    'quality': int_or_none(stream.get('quality', 100)),
+                })
+        self._sort_formats(formats)

        return {
-            'is_live': is_live,
-            'id': str(room.get('live_id')),
-            'timestamp': room.get('current_live_started_at'),
+            'id': compat_str(room.get('live_id') or broadcaster_id),
+            'title': self._live_title(title),
+            'description': room.get('description'),
+            'timestamp': int_or_none(room.get('current_live_started_at')),
            'uploader': uploader,
            'uploader_id': broadcaster_id,
-            'title': title,
-            'description': room.get('description'),
-            'formats': self._extract_formats(url, broadcaster_id, room_id)
+            'view_count': int_or_none(room.get('view_num')),
+            'formats': formats,
+            'is_live': True,
        }
-
-    def _extract_formats(self, url, broadcaster_id, room_id):
-        formats = []
-
-        stream_url = compat_urlparse.urljoin(url, "/api/live/streaming_url?room_id=%s") % room_id
-        streaming_url_list = self._download_json(stream_url, broadcaster_id).get('streaming_url_list', [])
-
-        for stream in streaming_url_list:
-            if stream.get('type') == "hls":
-                formats.extend(self._extract_m3u8_formats(
-                    stream.get('url'),
-                    broadcaster_id,
-                    ext='mp4',
-                    m3u8_id='hls',
-                    preference=stream.get('quality', 100),
-                    live=True
-                ))
-            elif stream.get('type') == 'rtmp':
-                url = stream.get('url') + '/' + stream.get('stream_name')
-                formats.append({
-                    'url': url,
-                    'format_id': 'rtmp',
-                    'protocol': 'rtmp',
-                    'ext': 'flv',
-                    'preference': stream.get('quality', 100),
-                    'format_note': stream.get('label')
-                })
-
-        self._sort_formats(formats)
-        return formats