mirror of
https://github.com/l1ving/youtube-dl
synced 2025-01-23 12:02:51 +08:00
[RT] Attempt to fix delegation and regexps
Attempts to address issues raised https://github.com/rg3/youtube-dl/pull/17594#issuecomment-422050733
This commit is contained in:
parent
5b42aa585c
commit
a98310bbf8
@ -4,10 +4,11 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .ora import OraTVIE
|
from .ora import OraTVIE
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
|
from .generic import GenericIE
|
||||||
|
|
||||||
|
|
||||||
class RTIE(InfoExtractor):
|
class RTIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?rt\.com/.*/(?P<id>\d+)-.*'
|
_VALID_URL = r'https?://(?:www\.)?rt\.com/.+/(?P<id>\d+)-.+'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'https://www.rt.com/shows/alex-salmond-show/438343-britain-railway-london-communities/',
|
'url': 'https://www.rt.com/shows/alex-salmond-show/438343-britain-railway-london-communities/',
|
||||||
@ -27,7 +28,7 @@ class RTIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '57786',
|
'id': '57786',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Andie MacDowell on ageism in Hollywood, fame, & forest protection',
|
'title': 'md5:fa0da906fbfc7974da14ca53424b1a3a',
|
||||||
'description': 'md5:07b6bce4ad4043b136e21ef9539d46c5'
|
'description': 'md5:07b6bce4ad4043b136e21ef9539d46c5'
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -40,7 +41,7 @@ class RTIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'SHxygmDAkNE',
|
'id': 'SHxygmDAkNE',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'ICYMI: Is Elon Musk Tony Stark, or just stark raving mad?',
|
'title': 'md5:004bcbc650d8294c5cdefcc470c3cd3d',
|
||||||
'description': 'md5:99e8c3456f6904383399aeeb10784c8b',
|
'description': 'md5:99e8c3456f6904383399aeeb10784c8b',
|
||||||
'upload_date': '20180914',
|
'upload_date': '20180914',
|
||||||
'uploader_id': 'UCdgFmrDeP9nWj_eDKW6j9kQ',
|
'uploader_id': 'UCdgFmrDeP9nWj_eDKW6j9kQ',
|
||||||
@ -49,44 +50,61 @@ class RTIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': False,
|
'skip_download': False,
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www.rt.com/news/438686-syria-russia-s200-il20/',
|
||||||
|
'md5': '03acfb2a27a13fb74eb5c192e53bf7e0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'YEioP7zJzMc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:e703b7c8d88725c1530661d61a626303',
|
||||||
|
'description': 'md5:8ab844abcd296d15f4a99b089e1e1347',
|
||||||
|
'upload_date': '20180918',
|
||||||
|
'uploader_id': 'RussiaToday',
|
||||||
|
'uploader': 'RT'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': False,
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_title = self._html_search_regex(
|
||||||
video_title = self._search_regex(
|
r'<title>(.+?)</title>', webpage, 'title', fatal=False) or self._html_search_meta(['og:title', 'twitter:title'], webpage)
|
||||||
r'<title>(.+?)</title>', webpage, 'title')
|
|
||||||
# default RT's CDN
|
|
||||||
video_url = self._search_regex(
|
|
||||||
r'file: "(https://cdnv.+?)",', webpage, 'url', fatal=False, default=None)
|
|
||||||
|
|
||||||
if video_url is None:
|
|
||||||
|
|
||||||
oratv = self._search_regex(
|
oratv = self._search_regex(
|
||||||
r'src="(//www\.ora\.tv.+?)"', webpage, 'oratv', fatal=False, default=None)
|
r'src=["\']((https?:)?//(?:www\.)ora\.tv[^\'"]+)', webpage, 'oratv', fatal=False, default=None)
|
||||||
|
|
||||||
if oratv is not None:
|
|
||||||
# some videos are embedded from ORATV
|
# some videos are embedded from ORATV
|
||||||
|
if oratv is not None:
|
||||||
|
|
||||||
oratv_embedded_webpage = self._download_webpage(oratv, video_id)
|
oratv_embedded_webpage = self._download_webpage(oratv, video_id)
|
||||||
ora_website_url = self._search_regex(
|
ora_website_url = self._search_regex(
|
||||||
r'<link rel="canonical" href="(.+?)"', oratv_embedded_webpage, 'orawebsite')
|
r'<link[^>]rel=["\']canonical["\'].+href=["\']([^\'"]+)', oratv_embedded_webpage, 'orawebsite')
|
||||||
oratvie = OraTVIE()
|
|
||||||
oratvie._downloader = self._downloader
|
|
||||||
return oratvie._real_extract(ora_website_url)
|
|
||||||
else:
|
|
||||||
# some videos are embedded from youtube
|
|
||||||
|
|
||||||
|
return self.url_result(ora_website_url, ie=OraTVIE.ie_key())
|
||||||
|
|
||||||
|
# some videos are embedded from youtube
|
||||||
yturl = self._search_regex(
|
yturl = self._search_regex(
|
||||||
r'data-url="(//www\.youtube\.com/embed.+?)"', webpage, 'youtube', fatal=False, default=None)
|
r'<div[^>]+\bdata-url=["\']((https?:)?//(?:www\.)youtube\.[^\'"]+)', webpage, 'youtube', fatal=False, default=None) or self._search_regex(
|
||||||
ytie = YoutubeIE()
|
r'<iframe[^>]+\bsrc=["\']((https?:)?//(?:www\.)youtube\.[^\'"]+)', webpage, 'youtube', fatal=False, default=None)
|
||||||
ytie._downloader = self._downloader
|
|
||||||
return ytie._real_extract(yturl)
|
if yturl is not None:
|
||||||
|
return self.url_result(yturl, ie=YoutubeIE.ie_key())
|
||||||
|
|
||||||
|
# default RT's CDN
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r'file:\s*["\'](https?://[^\'"]+)', webpage, 'url', fatal=False, default=None)
|
||||||
|
|
||||||
|
if video_url is not None:
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'url': video_url
|
'url': video_url
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# attempt to use generic
|
||||||
|
return self.url_result(url, ie=GenericIE.ie_key())
|
||||||
|
Loading…
Reference in New Issue
Block a user