From 3c3e04c97541daa0937d38b405d600d454e4f5a1 Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Mon, 9 Jan 2017 21:19:55 +0100
Subject: [PATCH 1/9] [twentymin] Began to fix 20min.ch extractor.

---
 youtube_dl/extractor/twentymin.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py
index b721ecb0a..2f93bdb36 100644
--- a/youtube_dl/extractor/twentymin.py
+++ b/youtube_dl/extractor/twentymin.py
@@ -60,6 +60,7 @@ class TwentyMinutenIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         display_id = mobj.group('display_id') or video_id
+        print('DISPLAY_ID: {}'.format(display_id))
 
         webpage = self._download_webpage(url, display_id)
 
@@ -75,13 +76,23 @@ class TwentyMinutenIE(InfoExtractor):
         if not title:
             title = remove_end(re.sub(
                 r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News')
+        print('TITLE: {}'.format(title))
 
+        # if not video_id:
+        #     video_id = self._search_regex(
+        #         r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id')
         if not video_id:
-            video_id = self._search_regex(
-                r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id')
+            videoplayer_url = self._html_search_regex(
+                r'<iframe[^>]+src="((?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=*?[^"]+)"',
+                webpage, '20min embed URL', default=None)
+            vid = re.match(r'videoID@\d+', videoplayer_url)
+            print(vid)
+
+
 
         description = self._html_search_meta(
             'description', webpage, 'description')
+        print('DESCRIPTION: {}'.format(description))
         thumbnail = self._og_search_thumbnail(webpage)
 
         return {

From 69807d59ed02b6adf393cc2c116b6ef27e8decca Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Tue, 10 Jan 2017 11:01:14 +0100
Subject: [PATCH 2/9] [twentymin] Updated the 20min extractor to support the
 site after the update.

---
 youtube_dl/extractor/twentymin.py | 32 +++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py
index 2f93bdb36..bc7fb8c13 100644
--- a/youtube_dl/extractor/twentymin.py
+++ b/youtube_dl/extractor/twentymin.py
@@ -13,7 +13,7 @@ class TwentyMinutenIE(InfoExtractor):
     _TESTS = [{
         # regular video
         'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
-        'md5': 'b52d6bc6ea6398e6a38f12cfd418149c',
+        'md5': 'e7264320db31eed8c38364150c12496e',
         'info_dict': {
             'id': '469148',
             'ext': 'flv',
@@ -34,6 +34,18 @@ class TwentyMinutenIE(InfoExtractor):
             'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
         },
         'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.',
+    }, {
+        # news article with video
+        'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
+        'md5': '807f9e1e06a69b77440a9b315e52e580',
+        'info_dict': {
+            'id': '523629',
+            'display_id': 'So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
+            'ext': 'mp4',
+            'title': 'So kommen Sie bei Eis und Schnee sicher an',
+            'description': 'Schneegestöber und Glatteis führten in den letzten Tagen zu zahlreichen Strassenunfällen. Ein Experte erklärt, worauf man nun beim Autofahren achten muss.',
+            'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/81/teaserbreit.jpg',
+        }
     }, {
         # YouTube embed
         'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184',
@@ -78,27 +90,27 @@ class TwentyMinutenIE(InfoExtractor):
                 r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News')
         print('TITLE: {}'.format(title))
 
-        # if not video_id:
-        #     video_id = self._search_regex(
-        #         r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id')
         if not video_id:
-            videoplayer_url = self._html_search_regex(
-                r'<iframe[^>]+src="((?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=*?[^"]+)"',
+            params = self._html_search_regex(
+                r'<iframe[^>]+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"',
                 webpage, '20min embed URL', default=None)
-            vid = re.match(r'videoID@\d+', videoplayer_url)
-            print(vid)
-
+            print('PARMAS: {}'.format(params))
+            video_id = self._search_regex(
+                r'.*videoId@(\d+)',
+                params, 'Video Id', default=None) if params is not None else ''
+            print('VIDEO ID: {}'.format(video_id))
 
 
         description = self._html_search_meta(
             'description', webpage, 'description')
         print('DESCRIPTION: {}'.format(description))
         thumbnail = self._og_search_thumbnail(webpage)
+        print('THUMBNAIL: {}'.format(thumbnail))
 
         return {
             'id': video_id,
             'display_id': display_id,
-            'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id,
+            'url': 'http://podcast.20min-tv.ch/podcast/20min/%sh.mp4' % video_id,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,

From eaffc609a7fe6db645e35c36a5969806f4a040cd Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Wed, 11 Jan 2017 19:17:05 +0100
Subject: [PATCH 3/9] [twentymin] Fixed 20min information extractor. Tried to
 add better video description handler, but the site seems to be too
 inconsistent...

---
 youtube_dl/extractor/twentymin.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py
index bc7fb8c13..2fe6d3a22 100644
--- a/youtube_dl/extractor/twentymin.py
+++ b/youtube_dl/extractor/twentymin.py
@@ -4,7 +4,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import remove_end
+from ..utils import (
+    remove_end,
+    ExtractorError,
+    clean_html,
+    get_element_by_class
+)
 
 
 class TwentyMinutenIE(InfoExtractor):
@@ -99,10 +104,16 @@ class TwentyMinutenIE(InfoExtractor):
                 r'.*videoId@(\d+)',
                 params, 'Video Id', default=None) if params is not None else ''
             print('VIDEO ID: {}'.format(video_id))
+            if not video_id: # the article does not contain a video
+                raise ExtractorError('No media links found on %s.' % url, expected=True)
 
-
-        description = self._html_search_meta(
-            'description', webpage, 'description')
+        # # Try to use the real video description:
+        # description = clean_html(get_element_by_class('caption', webpage))
+        # # Otherwise, use the lead text of the article as the video description:
+        # if not description:
+        #     description = self._html_search_meta(
+        #         'description', webpage, 'description')
+        description = self._html_search_meta('description', webpage, 'description')
         print('DESCRIPTION: {}'.format(description))
         thumbnail = self._og_search_thumbnail(webpage)
         print('THUMBNAIL: {}'.format(thumbnail))

From 451ae1f8b1bab294e978c61f576dcf8409dff1c9 Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Wed, 11 Jan 2017 19:27:44 +0100
Subject: [PATCH 4/9] [twentymin] Clean up.

---
 youtube_dl/extractor/twentymin.py | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py
index 2fe6d3a22..f9e40d7c1 100644
--- a/youtube_dl/extractor/twentymin.py
+++ b/youtube_dl/extractor/twentymin.py
@@ -6,9 +6,7 @@ import re
 from .common import InfoExtractor
 from ..utils import (
     remove_end,
-    ExtractorError,
-    clean_html,
-    get_element_by_class
+    ExtractorError
 )
 
 
@@ -21,7 +19,7 @@ class TwentyMinutenIE(InfoExtractor):
         'md5': 'e7264320db31eed8c38364150c12496e',
         'info_dict': {
             'id': '469148',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': '85 000 Franken für 15 perfekte Minuten',
             'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)',
             'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg'
@@ -49,19 +47,19 @@ class TwentyMinutenIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'So kommen Sie bei Eis und Schnee sicher an',
             'description': 'Schneegestöber und Glatteis führten in den letzten Tagen zu zahlreichen Strassenunfällen. Ein Experte erklärt, worauf man nun beim Autofahren achten muss.',
-            'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/81/teaserbreit.jpg',
+            'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/83/teaserbreit.jpg',
         }
     }, {
         # YouTube embed
         'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184',
-        'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f',
+        'md5': 'e7e237fd98da2a3cc1422ce683df234d',
         'info_dict': {
             'id': 'ivM7A7SpDOs',
             'ext': 'mp4',
             'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016',
             'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a',
             'upload_date': '20160424',
-            'uploader': 'RTVCM Castilla-La Mancha',
+            'uploader': 'CMM Castilla-La Mancha Media',
             'uploader_id': 'RTVCM',
         },
         'add_ie': ['Youtube'],
@@ -77,7 +75,6 @@ class TwentyMinutenIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         display_id = mobj.group('display_id') or video_id
-        print('DISPLAY_ID: {}'.format(display_id))
 
         webpage = self._download_webpage(url, display_id)
 
@@ -93,30 +90,19 @@ class TwentyMinutenIE(InfoExtractor):
         if not title:
             title = remove_end(re.sub(
                 r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News')
-        print('TITLE: {}'.format(title))
 
         if not video_id:
             params = self._html_search_regex(
                 r'<iframe[^>]+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"',
                 webpage, '20min embed URL', default=None)
-            print('PARMAS: {}'.format(params))
             video_id = self._search_regex(
                 r'.*videoId@(\d+)',
                 params, 'Video Id', default=None) if params is not None else ''
-            print('VIDEO ID: {}'.format(video_id))
             if not video_id: # the article does not contain a video
                 raise ExtractorError('No media links found on %s.' % url, expected=True)
 
-        # # Try to use the real video description:
-        # description = clean_html(get_element_by_class('caption', webpage))
-        # # Otherwise, use the lead text of the article as the video description:
-        # if not description:
-        #     description = self._html_search_meta(
-        #         'description', webpage, 'description')
         description = self._html_search_meta('description', webpage, 'description')
-        print('DESCRIPTION: {}'.format(description))
         thumbnail = self._og_search_thumbnail(webpage)
-        print('THUMBNAIL: {}'.format(thumbnail))
 
         return {
             'id': video_id,

From 77eac436cbc266af5be132141c3c36a3d099d10a Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Wed, 11 Jan 2017 19:34:04 +0100
Subject: [PATCH 5/9] [twentymin] flake8

---
 youtube_dl/extractor/twentymin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py
index f9e40d7c1..ab35d6be2 100644
--- a/youtube_dl/extractor/twentymin.py
+++ b/youtube_dl/extractor/twentymin.py
@@ -98,7 +98,7 @@ class TwentyMinutenIE(InfoExtractor):
             video_id = self._search_regex(
                 r'.*videoId@(\d+)',
                 params, 'Video Id', default=None) if params is not None else ''
-            if not video_id: # the article does not contain a video
+            if not video_id:  # the article does not contain a video
                 raise ExtractorError('No media links found on %s.' % url, expected=True)
 
         description = self._html_search_meta('description', webpage, 'description')

From 8bba00cfcb403e3ad5884e079ea255a1cee737b6 Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Wed, 11 Jan 2017 19:45:51 +0100
Subject: [PATCH 6/9] [twentymin] Revert to old style

---
 youtube_dl/extractor/twentymin.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py
index ab35d6be2..3e57c5ce7 100644
--- a/youtube_dl/extractor/twentymin.py
+++ b/youtube_dl/extractor/twentymin.py
@@ -101,7 +101,8 @@ class TwentyMinutenIE(InfoExtractor):
             if not video_id:  # the article does not contain a video
                 raise ExtractorError('No media links found on %s.' % url, expected=True)
 
-        description = self._html_search_meta('description', webpage, 'description')
+        description = self._html_search_meta(
+            'description', webpage, 'description')
         thumbnail = self._og_search_thumbnail(webpage)
 
         return {

From 61f5aff95e890056d02d546e9ae1ad0c53d446a1 Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Tue, 17 Jan 2017 17:03:03 +0100
Subject: [PATCH 7/9] [twentymin] Revert to old behavior if article does not
 contain any videos, and include both video formats.

---
 youtube_dl/extractor/twentymin.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py
index 3e57c5ce7..4f42747dd 100644
--- a/youtube_dl/extractor/twentymin.py
+++ b/youtube_dl/extractor/twentymin.py
@@ -4,10 +4,7 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    remove_end,
-    ExtractorError
-)
+from ..utils import remove_end
 
 
 class TwentyMinutenIE(InfoExtractor):
@@ -40,7 +37,7 @@ class TwentyMinutenIE(InfoExtractor):
     }, {
         # news article with video
         'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
-        'md5': '807f9e1e06a69b77440a9b315e52e580',
+        'md5': '372917ba85ed969e176d287ae54b2f94',
         'info_dict': {
             'id': '523629',
             'display_id': 'So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
@@ -98,8 +95,6 @@ class TwentyMinutenIE(InfoExtractor):
             video_id = self._search_regex(
                 r'.*videoId@(\d+)',
                 params, 'Video Id', default=None) if params is not None else ''
-            if not video_id:  # the article does not contain a video
-                raise ExtractorError('No media links found on %s.' % url, expected=True)
 
         description = self._html_search_meta(
             'description', webpage, 'description')
@@ -108,8 +103,16 @@ class TwentyMinutenIE(InfoExtractor):
         return {
             'id': video_id,
             'display_id': display_id,
-            'url': 'http://podcast.20min-tv.ch/podcast/20min/%sh.mp4' % video_id,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
+            'formats': [{
+                'format_id': 'sd',
+                'url': 'http://podcast.20min-tv.ch/podcast/20min/%s.mp4' % video_id,
+                'preference': -2
+            }, {
+                'format_id': 'hd',
+                'url': 'http://podcast.20min-tv.ch/podcast/20min/%sh.mp4' % video_id,
+                'preference': -1
+            }]
         }

From b14da99b3c5f379e09adcb5db6b889939e13b2f2 Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Tue, 17 Jan 2017 17:39:35 +0100
Subject: [PATCH 8/9] [twentymin] Code cleanup for regular expression search in
 html.

---
 youtube_dl/extractor/twentymin.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py
index 4f42747dd..d7fc4c4b5 100644
--- a/youtube_dl/extractor/twentymin.py
+++ b/youtube_dl/extractor/twentymin.py
@@ -91,10 +91,10 @@ class TwentyMinutenIE(InfoExtractor):
         if not video_id:
             params = self._html_search_regex(
                 r'<iframe[^>]+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"',
-                webpage, '20min embed URL', default=None)
+                webpage, '20min embed URL', default='')
             video_id = self._search_regex(
                 r'.*videoId@(\d+)',
-                params, 'Video Id', default=None) if params is not None else ''
+                params, 'Video Id', default='')
 
         description = self._html_search_meta(
             'description', webpage, 'description')

From 7b2fd0f5b2e58c2b9beab1531d19f3e7dd0ab03a Mon Sep 17 00:00:00 2001
From: Alex Seiler <seileralex@gmail.com>
Date: Tue, 17 Jan 2017 18:15:20 +0100
Subject: [PATCH 9/9] [twentymin] Removed default values in regular expression
 search and avoided code duplication for video formats.

---
 youtube_dl/extractor/twentymin.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py
index d7fc4c4b5..68d5a0cb5 100644
--- a/youtube_dl/extractor/twentymin.py
+++ b/youtube_dl/extractor/twentymin.py
@@ -91,28 +91,29 @@ class TwentyMinutenIE(InfoExtractor):
         if not video_id:
             params = self._html_search_regex(
                 r'<iframe[^>]+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"',
-                webpage, '20min embed URL', default='')
+                webpage, '20min embed URL')
             video_id = self._search_regex(
                 r'.*videoId@(\d+)',
-                params, 'Video Id', default='')
+                params, 'Video Id')
 
         description = self._html_search_meta(
             'description', webpage, 'description')
         thumbnail = self._og_search_thumbnail(webpage)
 
+        formats = []
+        format_preferences = [('sd', ''), ('hd', 'h')]
+        for format_id, url_extension in format_preferences:
+            format_url = 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, url_extension)
+            formats.append({
+                'format_id': format_id,
+                'url': format_url,
+            })
+
         return {
             'id': video_id,
             'display_id': display_id,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
-            'formats': [{
-                'format_id': 'sd',
-                'url': 'http://podcast.20min-tv.ch/podcast/20min/%s.mp4' % video_id,
-                'preference': -2
-            }, {
-                'format_id': 'hd',
-                'url': 'http://podcast.20min-tv.ch/podcast/20min/%sh.mp4' % video_id,
-                'preference': -1
-            }]
+            'formats': formats,
         }