From 62962240b1d17c4060fd125f50505c249c7288b4 Mon Sep 17 00:00:00 2001
From: metalgamer <dennis.fink@c3l.lu>
Date: Tue, 20 Jan 2015 11:02:01 +0100
Subject: [PATCH 1/4] Added rtl.lu extractor

---
 youtube_dl/extractor/__init__.py |   1 +
 youtube_dl/extractor/rtllu.py    | 108 +++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 youtube_dl/extractor/rtllu.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 0902eb437..a603fc900 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -346,6 +346,7 @@ from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rtbf import RTBFIE
 from .rte import RteIE
+from .rtllu import RtlluIE
 from .rtlnl import RtlXlIE
 from .rtlnow import RTLnowIE
 from .rtp import RTPIE
diff --git a/youtube_dl/extractor/rtllu.py b/youtube_dl/extractor/rtllu.py
new file mode 100644
index 000000000..7220b226f
--- /dev/null
+++ b/youtube_dl/extractor/rtllu.py
@@ -0,0 +1,108 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class RtlluIE(InfoExtractor):
+    IE_NAME = 'rtl.lu'
+
+    _VALID_URL = r'https?://(www|tele|radio|5minutes)\.rtl\.lu\/.*?\/(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://radio.rtl.lu/emissiounen/background/599319.html',
+        'md5': 'TODO:',
+        'info_dict': {
+            'id': '599319',
+            'ext': 'mp4',
+        },
+    }
+
+    def _real_extract(self, url):
+        match = self._VALID_URL_RE.match(url)
+        id = match.group('id')
+
+        webpage = self._download_webpage(url, id)
+
+        javascript_regex = r'<script language="Javascript">((\n*?.*?)*?)</script>'
+        javascript = self._html_search_regex(javascript_regex, webpage, 'javascript')
+
+        try:
+            javascript_sources_regex = r'object.*\.sources = \'(?P<value>.*?)\';'
+            sources = json.loads(re.search(javascript_sources_regex, javascript).group('value'))
+
+            javascript_thumbnail_regex = r'object.*\.title = \'(?P<value>.*?)\';'
+            javascript_thumbnail = re.search(javascript_thumbnail_regex, javascript).group('value')
+
+            javascript_videoid_regex = r'object.*\.videoid = \'(?P<value>.*?)\';'
+            javascript_videoid = re.search(javascript_videoid_regex, javascript).group('value')
+
+            javascript_publicdate_regex = r'object.*\.publicdate = \'(?P<value>.*?)\';'
+            javascript_publicdate = re.search(javascript_publicdate_regex, javascript).group('value')
+
+            formats = [
+                {
+                    'url': sources['httplq']['src'],
+                    'format': 'Low Quality',
+                    'format_id': 'lq',
+                    'protocol': 'http',
+                },
+                {
+                    'url': sources['http']['src'],
+                    'format': 'Standard Quality',
+                    'format_id': 'sd',
+                    'protocol': 'http',
+                },
+                {
+                    'url': sources['httphq']['src'],
+                    'format': 'High Quality',
+                    'format_id': 'hq',
+                    'protocol': 'http',
+                },
+            ]
+
+            return {
+                'id': javascript_videoid or id,
+                'title': self.get_video_title(webpage, javascript),
+                'formats': formats,
+                'thumbnail': javascript_thumbnail,
+                'upload_date': javascript_publicdate,
+            }
+        except AttributeError:
+            javascript_mp3_regex = r'play_mp3\("object[0-9]*", "(?P<value>.*?)",'
+            javascript_mp3 = re.search(javascript_mp3_regex, javascript).group('value')
+            print(javascript_mp3)
+
+            return {
+                'id': id,
+                'title': self.get_audio_title(webpage),
+                'url': javascript_mp3,
+            }
+
+    def get_video_title(self, webpage, javascript):
+
+        title_regex = r'</div>.*<h1>(?P<title>.*?)</h1>.*?<p class="sub">'
+        title = re.findall(title_regex, webpage, flags=re.S)
+
+        if title:
+            title = title[-1]
+
+        javascript_title_regex = r'object.*\.title = \'(?P<value>.*?)\';'
+        javascript_title = re.search(javascript_title_regex, javascript).group('value')
+        return javascript_title or title or self._og_search_title(webpage)
+
+    def get_audio_title(self, webpage):
+
+        title_regex = r'<header><h1><span>(?P<span>.*?)</span>(?P<title>.*?)</h1>'
+        title = self._html_search_regex(title_regex, webpage, 'title', group='title', fatal=False)
+        span = self._html_search_regex(title_regex, webpage, 'span', group='span', fatal=False)
+
+        if title or span:
+            title = ' - '.join([span, title])
+
+        h5_title_regex = r'<h5>(?P<title>.*?)</h5>'
+        h5_title = self._html_search_regex(h5_title_regex, webpage, 'title', group='title', fatal=False)
+
+        return title or h5_title or self._og_search_title(webpage)

From 53ca134ca9e91639330723543c7deefd7369a0c6 Mon Sep 17 00:00:00 2001
From: Dennis Fink <dennis.fink@c3l.lu>
Date: Mon, 3 Oct 2016 00:29:17 +0200
Subject: [PATCH 2/4] [Rtllu] Added new extractor

---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/rtllu.py      | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index e8928307c..7910ed978 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -748,6 +748,7 @@ from .roxwel import RoxwelIE
 from .rozhlas import RozhlasIE
 from .rtbf import RTBFIE
 from .rte import RteIE, RteRadioIE
+from .rtllu import RtlluIE
 from .rtlnl import RtlNlIE
 from .rtl2 import RTL2IE
 from .rtp import RTPIE
diff --git a/youtube_dl/extractor/rtllu.py b/youtube_dl/extractor/rtllu.py
index 7220b226f..454005388 100644
--- a/youtube_dl/extractor/rtllu.py
+++ b/youtube_dl/extractor/rtllu.py
@@ -12,11 +12,12 @@ class RtlluIE(InfoExtractor):
     _VALID_URL = r'https?://(www|tele|radio|5minutes)\.rtl\.lu\/.*?\/(?P<id>[0-9]+)'
 
     _TEST = {
-        'url': 'http://radio.rtl.lu/emissiounen/background/599319.html',
-        'md5': 'TODO:',
+        'url': 'http://tele.rtl.lu/emissiounen/documentaire-routwaissgro/lu/890363.html',
+        'md5': '38a2d2286ff4b8ccc300e847294cb90a',
         'info_dict': {
             'id': '599319',
             'ext': 'mp4',
+            'title': '"Vënz de Prënz" (18.03.2016)',
         },
     }
 
@@ -33,16 +34,23 @@ class RtlluIE(InfoExtractor):
             javascript_sources_regex = r'object.*\.sources = \'(?P<value>.*?)\';'
             sources = json.loads(re.search(javascript_sources_regex, javascript).group('value'))
 
-            javascript_thumbnail_regex = r'object.*\.title = \'(?P<value>.*?)\';'
-            javascript_thumbnail = re.search(javascript_thumbnail_regex, javascript).group('value')
-
             javascript_videoid_regex = r'object.*\.videoid = \'(?P<value>.*?)\';'
             javascript_videoid = re.search(javascript_videoid_regex, javascript).group('value')
 
             javascript_publicdate_regex = r'object.*\.publicdate = \'(?P<value>.*?)\';'
             javascript_publicdate = re.search(javascript_publicdate_regex, javascript).group('value')
 
+            javascript_thumbnail_regex = r'object.*\.thumbnail = \'(?P<value>.*?)\';'
+            javascript_thumbnail = re.search(javascript_thumbnail_regex, javascript).group('value')
+
             formats = [
+                {
+                    'url': sources['rtmp']['src'],
+                    'format': 'RTMP Stream',
+                    'format_id': 'rtmp',
+                    'protocol': 'rtmp',
+                },
+
                 {
                     'url': sources['httplq']['src'],
                     'format': 'Low Quality',
@@ -73,7 +81,6 @@ class RtlluIE(InfoExtractor):
         except AttributeError:
             javascript_mp3_regex = r'play_mp3\("object[0-9]*", "(?P<value>.*?)",'
             javascript_mp3 = re.search(javascript_mp3_regex, javascript).group('value')
-            print(javascript_mp3)
 
             return {
                 'id': id,

From 36ce480413f3e316023565e4e6b3d686913fc29c Mon Sep 17 00:00:00 2001
From: Dennis Fink <dennis.fink@c3l.lu>
Date: Mon, 3 Oct 2016 18:19:29 +0200
Subject: [PATCH 3/4] Fixed issues from travis-ci

---
 youtube_dl/extractor/rtllu.py | 116 +++++++++++++++++++++-------------
 1 file changed, 73 insertions(+), 43 deletions(-)

diff --git a/youtube_dl/extractor/rtllu.py b/youtube_dl/extractor/rtllu.py
index 454005388..24f95cd2a 100644
--- a/youtube_dl/extractor/rtllu.py
+++ b/youtube_dl/extractor/rtllu.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -22,9 +23,7 @@ class RtlluIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        match = self._VALID_URL_RE.match(url)
-        id = match.group('id')
-
+        id = self._match_id(url)
         webpage = self._download_webpage(url, id)
 
         javascript_regex = r'<script language="Javascript">((\n*?.*?)*?)</script>'
@@ -32,60 +31,91 @@ class RtlluIE(InfoExtractor):
 
         try:
             javascript_sources_regex = r'object.*\.sources = \'(?P<value>.*?)\';'
-            sources = json.loads(re.search(javascript_sources_regex, javascript).group('value'))
+            sources = self._search_regex(javascript_sources_regex, javascript, 'sources')
+            sources = json.loads(sources)
 
-            javascript_videoid_regex = r'object.*\.videoid = \'(?P<value>.*?)\';'
-            javascript_videoid = re.search(javascript_videoid_regex, javascript).group('value')
+            videoid_regex = r'object.*\.videoid = \'(?P<value>.*?)\';'
+            videoid = self._search_regex(videoid_regex, javascript, 'videoid', fatal=False, default=id)
 
-            javascript_publicdate_regex = r'object.*\.publicdate = \'(?P<value>.*?)\';'
-            javascript_publicdate = re.search(javascript_publicdate_regex, javascript).group('value')
+            publicdate_regex = r'object.*\.publicdate = \'(?P<value>.*?)\';'
+            publicdate = self._search_regex(publicdate_regex, javascript, 'publicdate', fatal=False)
 
-            javascript_thumbnail_regex = r'object.*\.thumbnail = \'(?P<value>.*?)\';'
-            javascript_thumbnail = re.search(javascript_thumbnail_regex, javascript).group('value')
+            thumbnail_regex = r'object.*\.thumbnail = \'(?P<value>.*?)\';'
+            thumbnail = self._search_regex(thumbnail_regex, javascript, 'thumbnail', fatal=False)
 
-            formats = [
-                {
-                    'url': sources['rtmp']['src'],
-                    'format': 'RTMP Stream',
-                    'format_id': 'rtmp',
-                    'protocol': 'rtmp',
-                },
+            formats = []
 
-                {
-                    'url': sources['httplq']['src'],
-                    'format': 'Low Quality',
-                    'format_id': 'lq',
-                    'protocol': 'http',
-                },
-                {
-                    'url': sources['http']['src'],
-                    'format': 'Standard Quality',
-                    'format_id': 'sd',
-                    'protocol': 'http',
-                },
-                {
-                    'url': sources['httphq']['src'],
-                    'format': 'High Quality',
-                    'format_id': 'hq',
-                    'protocol': 'http',
-                },
-            ]
+            rtmp_source = sources.get('rtmp')
+            if rtmp_source is not None:
+                rtmp_url = rtmp_source.get('src')
+
+                if rtmp_url is not None:
+                    formats.append(
+                        {
+                            'url': rtmp_url,
+                            'format': 'RTMP Stream',
+                            'format_id': 'rtmp',
+                            'protocol': 'rtmp'
+                        }
+                    )
+
+            httplq_source = sources.get('httplq')
+            if httplq_source is not None:
+                httplq_url = httplq_source.get('src')
+
+                if httplq_url is not None:
+                    formats.append(
+                        {
+                            'url': httplq_url,
+                            'format': 'Low Quality',
+                            'format_id': 'lq',
+                            'protocol': 'http',
+                        }
+                    )
+
+            http_source = sources.get('http')
+            if http_source is not None:
+                http_url = http_source.get('src')
+
+                if http_url is not None:
+                    formats.append(
+                        {
+                            'url': http_url,
+                            'format': 'Standard Quality',
+                            'format_id': 'sd',
+                            'protocol': 'http',
+                        }
+                    )
+
+            httphq_source = sources.get('httphq')
+            if httphq_source is not None:
+                httphq_url = httphq_source.get('src')
+
+                if httphq_url is not None:
+                    formats.append(
+                        {
+                            'url': httphq_url,
+                            'format': 'High Quality',
+                            'format_id': 'hq',
+                            'protocol': 'http',
+                        }
+                    )
 
             return {
-                'id': javascript_videoid or id,
+                'id': videoid,
                 'title': self.get_video_title(webpage, javascript),
                 'formats': formats,
-                'thumbnail': javascript_thumbnail,
-                'upload_date': javascript_publicdate,
+                'thumbnail': thumbnail,
+                'upload_date': publicdate,
             }
         except AttributeError:
-            javascript_mp3_regex = r'play_mp3\("object[0-9]*", "(?P<value>.*?)",'
-            javascript_mp3 = re.search(javascript_mp3_regex, javascript).group('value')
+            mp3_regex = r'play_mp3\("object[0-9]*", "(?P<value>.*?)",'
+            mp3_url = self._search_regex(mp3_regex, javascript, 'mp3_url')
 
             return {
                 'id': id,
                 'title': self.get_audio_title(webpage),
-                'url': javascript_mp3,
+                'url': mp3_url,
             }
 
     def get_video_title(self, webpage, javascript):
@@ -97,7 +127,7 @@ class RtlluIE(InfoExtractor):
             title = title[-1]
 
         javascript_title_regex = r'object.*\.title = \'(?P<value>.*?)\';'
-        javascript_title = re.search(javascript_title_regex, javascript).group('value')
+        javascript_title = self._search_regex(javascript_title_regex, javascript, 'javascript_title', fatal=False)
         return javascript_title or title or self._og_search_title(webpage)
 
     def get_audio_title(self, webpage):

From 20155ed96a8778641e22582f1bfa153105ca6f14 Mon Sep 17 00:00:00 2001
From: Dennis Fink <dennis.fink@c3l.lu>
Date: Mon, 3 Oct 2016 22:15:34 +0200
Subject: [PATCH 4/4] Fixed tests

---
 youtube_dl/extractor/rtllu.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/rtllu.py b/youtube_dl/extractor/rtllu.py
index 24f95cd2a..df9ff3e54 100644
--- a/youtube_dl/extractor/rtllu.py
+++ b/youtube_dl/extractor/rtllu.py
@@ -14,11 +14,12 @@ class RtlluIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://tele.rtl.lu/emissiounen/documentaire-routwaissgro/lu/890363.html',
-        'md5': '38a2d2286ff4b8ccc300e847294cb90a',
+        'md5': 'a9f34b9c8a20a61c2332b1f2f8c084d6',
         'info_dict': {
-            'id': '599319',
+            'id': '3057497',
             'ext': 'mp4',
             'title': '"Vënz de Prënz" (18.03.2016)',
+            'upload_date': '20160318',
         },
     }