From 3d20fc10bffd75b68f2a691766c7c4ccc605ede0 Mon Sep 17 00:00:00 2001
From: andrejsky <andrej.veselovsky+github@gmail.com>
Date: Mon, 17 Sep 2018 15:58:22 +0200
Subject: [PATCH 1/3] [RT] Add new extractor

---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/rt.py         | 92 ++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+)
 create mode 100644 youtube_dl/extractor/rt.py
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 7dc569724..2f2111b02 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -936,6 +936,7 @@ from .roosterteeth import RoosterTeethIE
 from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rozhlas import RozhlasIE
+from .rt import RTIE
 from .rtbf import RTBFIE
 from .rte import RteIE, RteRadioIE
 from .rtlnl import RtlNlIE
diff --git a/youtube_dl/extractor/rt.py b/youtube_dl/extractor/rt.py
new file mode 100644
index 000000000..b6d4d97e6
--- /dev/null
+++ b/youtube_dl/extractor/rt.py
@@ -0,0 +1,92 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .ora import OraTVIE
+from .youtube import YoutubeIE
+
+
+class RTIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?rt\.com/.*/(?P<id>\d+)-.*'
+    _TESTS = [
+        {
+            'url': 'https://www.rt.com/shows/alex-salmond-show/438343-britain-railway-london-communities/',
+            'md5': '0d8f6f86673ee8d72215c8d060170a5e',
+            'info_dict': {
+                'id': '438343',
+                'ext': 'mp4',
+                'title': 'HS2: The human cost… RT — The Alex Salmond Show'
+            },
+            'params': {
+                'skip_download': False,
+            }
+        },
+        {
+            'url': 'https://www.rt.com/shows/larry-king-now/438502-andie-macdowell-on-ageism-in/',
+            'md5': '5852a10576b4add6b250f864546033f4',
+            'info_dict': {
+                'id': '57786',
+                'ext': 'mp4',
+                'title': 'Andie MacDowell on ageism in Hollywood, fame, & forest protection',
+                'description': 'md5:07b6bce4ad4043b136e21ef9539d46c5'
+            },
+            'params': {
+                'skip_download': False,
+            }
+        },
+        {
+            'url': 'https://www.rt.com/shows/icymi-with-polly-boiko/438450-musk-smoke-marijuana-radio/',
+            'md5': '2c2fe0f78f1ca225e82fb7b27c8fd3f5',
+            'info_dict': {
+                'id': 'SHxygmDAkNE',
+                'ext': 'mp4',
+                'title': u'ICYMI: Is Elon Musk Tony Stark, or just stark raving mad?',
+                'description': 'md5:99e8c3456f6904383399aeeb10784c8b',
+                'upload_date': '20180914',
+                'uploader_id': 'UCdgFmrDeP9nWj_eDKW6j9kQ',
+                'uploader': 'ICYMI'
+            },
+            'params': {
+                'skip_download': False,
+            }
+        }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_title = self._search_regex(
+            r'<title>(.+?)</title>', webpage, 'title')
+        # default RT's CDN
+        video_url = self._search_regex(
+            r'file: "(https://cdnv.+?)",', webpage, 'url', fatal=False, default=None)
+
+        if video_url is None:
+
+            oratv = self._search_regex(
+                r'src="(//www\.ora\.tv.+?)"', webpage, 'oratv', fatal=False, default=None)
+
+            if oratv is not None:
+                # some videos are embedded from ORATV
+
+                oratv_embedded_webpage = self._download_webpage(oratv, video_id)
+                ora_website_url = self._search_regex(
+                    r'<link rel="canonical" href="(.+?)"', oratv_embedded_webpage, 'orawebsite')
+                oratvie = OraTVIE()
+                oratvie._downloader = self._downloader
+                return oratvie._real_extract(ora_website_url)
+            else:
+                # some videos are embedded from youtube
+
+                yturl = self._search_regex(
+                    r'data-url="(//www\.youtube\.com/embed.+?)"', webpage, 'youtube', fatal=False, default=None)
+                ytie = YoutubeIE()
+                ytie._downloader = self._downloader
+                return ytie._real_extract(yturl)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'url': video_url
+        }

From 5b42aa585c2d04c26a7ca17d06d97367a0a29aa3 Mon Sep 17 00:00:00 2001
From: andrejsky <andrej.veselovsky+github@gmail.com>
Date: Mon, 17 Sep 2018 16:59:05 +0200
Subject: [PATCH 2/3] Remove unicode

---
 youtube_dl/extractor/rt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/rt.py b/youtube_dl/extractor/rt.py
index b6d4d97e6..220e6fc99 100644
--- a/youtube_dl/extractor/rt.py
+++ b/youtube_dl/extractor/rt.py
@@ -40,7 +40,7 @@ class RTIE(InfoExtractor):
             'info_dict': {
                 'id': 'SHxygmDAkNE',
                 'ext': 'mp4',
-                'title': u'ICYMI: Is Elon Musk Tony Stark, or just stark raving mad?',
+                'title': 'ICYMI: Is Elon Musk Tony Stark, or just stark raving mad?',
                 'description': 'md5:99e8c3456f6904383399aeeb10784c8b',
                 'upload_date': '20180914',
                 'uploader_id': 'UCdgFmrDeP9nWj_eDKW6j9kQ',

From a98310bbf863aafdd68d93994c43d5ee922b3dab Mon Sep 17 00:00:00 2001
From: andrejsky <andrej.veselovsky+github@gmail.com>
Date: Tue, 18 Sep 2018 16:07:25 +0200
Subject: [PATCH 3/3] [RT] Attempt to fix delegation and regexps

Attempts to address issues raised https://github.com/rg3/youtube-dl/pull/17594#issuecomment-422050733
---
 youtube_dl/extractor/rt.py | 84 +++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 33 deletions(-)

diff --git a/youtube_dl/extractor/rt.py b/youtube_dl/extractor/rt.py
index 220e6fc99..89b2ddc16 100644
--- a/youtube_dl/extractor/rt.py
+++ b/youtube_dl/extractor/rt.py
@@ -4,10 +4,11 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from .ora import OraTVIE
 from .youtube import YoutubeIE
+from .generic import GenericIE
 
 
 class RTIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?rt\.com/.*/(?P<id>\d+)-.*'
+    _VALID_URL = r'https?://(?:www\.)?rt\.com/.+/(?P<id>\d+)-.+'
     _TESTS = [
         {
             'url': 'https://www.rt.com/shows/alex-salmond-show/438343-britain-railway-london-communities/',
@@ -27,7 +28,7 @@ class RTIE(InfoExtractor):
             'info_dict': {
                 'id': '57786',
                 'ext': 'mp4',
-                'title': 'Andie MacDowell on ageism in Hollywood, fame, & forest protection',
+                'title': 'md5:fa0da906fbfc7974da14ca53424b1a3a',
                 'description': 'md5:07b6bce4ad4043b136e21ef9539d46c5'
             },
             'params': {
@@ -40,7 +41,7 @@ class RTIE(InfoExtractor):
             'info_dict': {
                 'id': 'SHxygmDAkNE',
                 'ext': 'mp4',
-                'title': 'ICYMI: Is Elon Musk Tony Stark, or just stark raving mad?',
+                'title': 'md5:004bcbc650d8294c5cdefcc470c3cd3d',
                 'description': 'md5:99e8c3456f6904383399aeeb10784c8b',
                 'upload_date': '20180914',
                 'uploader_id': 'UCdgFmrDeP9nWj_eDKW6j9kQ',
@@ -49,44 +50,61 @@ class RTIE(InfoExtractor):
             'params': {
                 'skip_download': False,
             }
+        },
+        {
+            'url': 'https://www.rt.com/news/438686-syria-russia-s200-il20/',
+            'md5': '03acfb2a27a13fb74eb5c192e53bf7e0',
+            'info_dict': {
+                'id': 'YEioP7zJzMc',
+                'ext': 'mp4',
+                'title': 'md5:e703b7c8d88725c1530661d61a626303',
+                'description': 'md5:8ab844abcd296d15f4a99b089e1e1347',
+                'upload_date': '20180918',
+                'uploader_id': 'RussiaToday',
+                'uploader': 'RT'
+            },
+            'params': {
+                'skip_download': False,
+            }
         }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-
         webpage = self._download_webpage(url, video_id)
+        video_title = self._html_search_regex(
+            r'<title>(.+?)</title>', webpage, 'title', fatal=False) or self._html_search_meta(['og:title', 'twitter:title'], webpage)
+
+        oratv = self._search_regex(
+            r'src=["\']((https?:)?//(?:www\.)ora\.tv[^\'"]+)', webpage, 'oratv', fatal=False, default=None)
+
+        # some videos are embedded from ORATV
+        if oratv is not None:
+
+            oratv_embedded_webpage = self._download_webpage(oratv, video_id)
+            ora_website_url = self._search_regex(
+                r'<link[^>]rel=["\']canonical["\'].+href=["\']([^\'"]+)', oratv_embedded_webpage, 'orawebsite')
+
+            return self.url_result(ora_website_url, ie=OraTVIE.ie_key())
+
+        # some videos are embedded from youtube
+        yturl = self._search_regex(
+            r'<div[^>]+\bdata-url=["\']((https?:)?//(?:www\.)youtube\.[^\'"]+)', webpage, 'youtube', fatal=False, default=None) or self._search_regex(
+            r'<iframe[^>]+\bsrc=["\']((https?:)?//(?:www\.)youtube\.[^\'"]+)', webpage, 'youtube', fatal=False, default=None)
+
+        if yturl is not None:
+            return self.url_result(yturl, ie=YoutubeIE.ie_key())
 
-        video_title = self._search_regex(
-            r'<title>(.+?)</title>', webpage, 'title')
         # default RT's CDN
         video_url = self._search_regex(
-            r'file: "(https://cdnv.+?)",', webpage, 'url', fatal=False, default=None)
+            r'file:\s*["\'](https?://[^\'"]+)', webpage, 'url', fatal=False, default=None)
 
-        if video_url is None:
+        if video_url is not None:
 
-            oratv = self._search_regex(
-                r'src="(//www\.ora\.tv.+?)"', webpage, 'oratv', fatal=False, default=None)
+            return {
+                'id': video_id,
+                'title': video_title,
+                'url': video_url
+            }
 
-            if oratv is not None:
-                # some videos are embedded from ORATV
-
-                oratv_embedded_webpage = self._download_webpage(oratv, video_id)
-                ora_website_url = self._search_regex(
-                    r'<link rel="canonical" href="(.+?)"', oratv_embedded_webpage, 'orawebsite')
-                oratvie = OraTVIE()
-                oratvie._downloader = self._downloader
-                return oratvie._real_extract(ora_website_url)
-            else:
-                # some videos are embedded from youtube
-
-                yturl = self._search_regex(
-                    r'data-url="(//www\.youtube\.com/embed.+?)"', webpage, 'youtube', fatal=False, default=None)
-                ytie = YoutubeIE()
-                ytie._downloader = self._downloader
-                return ytie._real_extract(yturl)
-
-        return {
-            'id': video_id,
-            'title': video_title,
-            'url': video_url
-        }
+        # attempt to use generic
+        return self.url_result(url, ie=GenericIE.ie_key())