From 60098e149109a8e612a6571704503c256e5a3de9 Mon Sep 17 00:00:00 2001
From: Christophe de Vienne <christophe@cdevienne.info>
Date: Thu, 11 Jan 2018 23:02:20 +0100
Subject: [PATCH 1/7] [mytaratata] Add new extractor

---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/mytaratata.py | 68 ++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 youtube_dl/extractor/mytaratata.py
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 37624d37a..706bf3b7a 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -631,6 +631,7 @@ from .musicplayon import MusicPlayOnIE
 from .mwave import MwaveIE, MwaveMeetGreetIE
 from .myspace import MySpaceIE, MySpaceAlbumIE
 from .myspass import MySpassIE
+from .mytaratata import MyTaratataIE
 from .myvi import MyviIE
 from .myvidster import MyVidsterIE
 from .nationalgeographic import (
diff --git a/youtube_dl/extractor/mytaratata.py b/youtube_dl/extractor/mytaratata.py
new file mode 100644
index 000000000..44e32a42f
--- /dev/null
+++ b/youtube_dl/extractor/mytaratata.py
@@ -0,0 +1,68 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class MyTaratataIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?mytaratata\.com/taratata/(?P<id>[a-zA-Z0-9_\-/]+)'
+    _TEST = {
+        'url': 'http://mytaratata.com/taratata/519/shaka-ponk-camille-et-julie-berthollet-smells-like-teen-spirit-nirvana',
+        'md5': 'c2876e18716b350c9de69cfda2662919',
+        'info_dict': {
+            'id': '519/shaka-ponk-camille-et-julie-berthollet-smells-like-teen-spirit-nirvana',
+            'ext': 'mp4',
+            'title': 'Taratata - Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
+            'uploader': 'Taratata',
+            'description': 'Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
+            # 'thumbnail': r're:^https?://.*\.jpg$',
+            # TODO more properties, either as:
+            # * A value
+            # * MD5 checksum; start the string with md5:
+            # * A regular expression; start the string with re:
+            # * Any Python type (for example int or float)
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        # TODO more code goes here, for example ...
+        title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
+
+        formats = []
+
+        video_source_re = re.compile(
+            r'data-source="(?P<url>http://videos.air-productions.cdn.sfr.net'
+            r'/mytaratata/Taratata[^"]+\.mp4)"'
+        )
+
+        last_vid = None
+        for url in video_source_re.findall(webpage):
+            info_m = re.match(r'.*(?P<vid>[0-9]+)-[a-f0-9]+-(?P<w>[0-9]+)x(?P<h>[0-9]+)\.mp4', url)
+            if info_m is None:
+                continue
+            vid = info_m.group('vid')
+            w = info_m.group('w')
+            h = info_m.group('h')
+            if last_vid is None:
+                last_vid = vid
+            if vid != last_vid:
+                break
+
+            formats.append({'url': url, 'width': int(w), 'height': int(h)})
+
+        formats = list(sorted(formats, key=lambda f: f['width']))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': self._og_search_description(webpage),
+            'uploader': "Taratata",
+            # TODO more properties (see youtube_dl/extractor/common.py)
+            'formats': formats,
+        }

From 63c3a458cb3df4e182338d0ec4b5b700edf4c3f7 Mon Sep 17 00:00:00 2001
From: Christophe de Vienne <christophe@cdevienne.info>
Date: Fri, 12 Jan 2018 13:20:11 +0100
Subject: [PATCH 2/7] [mytaratata] Improve title & id extraction, add thumbnail

---
 youtube_dl/extractor/mytaratata.py | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/youtube_dl/extractor/mytaratata.py b/youtube_dl/extractor/mytaratata.py
index 44e32a42f..b4a7e49c4 100644
--- a/youtube_dl/extractor/mytaratata.py
+++ b/youtube_dl/extractor/mytaratata.py
@@ -11,11 +11,11 @@ class MyTaratataIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?mytaratata\.com/taratata/(?P<id>[a-zA-Z0-9_\-/]+)'
     _TEST = {
         'url': 'http://mytaratata.com/taratata/519/shaka-ponk-camille-et-julie-berthollet-smells-like-teen-spirit-nirvana',
-        'md5': 'c2876e18716b350c9de69cfda2662919',
+        'md5': '99657330eb7dec6d63a329d7f26ec93e',
         'info_dict': {
-            'id': '519/shaka-ponk-camille-et-julie-berthollet-smells-like-teen-spirit-nirvana',
+            'id': '7174',
             'ext': 'mp4',
-            'title': 'Taratata - Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
+            'title': u'TARATATA N°519 - Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
             'uploader': 'Taratata',
             'description': 'Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
             # 'thumbnail': r're:^https?://.*\.jpg$',
@@ -31,8 +31,8 @@ class MyTaratataIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        # TODO more code goes here, for example ...
-        title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
 
         formats = []
 
@@ -43,7 +43,7 @@ class MyTaratataIE(InfoExtractor):
 
         last_vid = None
         for url in video_source_re.findall(webpage):
-            info_m = re.match(r'.*(?P<vid>[0-9]+)-[a-f0-9]+-(?P<w>[0-9]+)x(?P<h>[0-9]+)\.mp4', url)
+            info_m = re.match(r'.*/(?P<vid>[0-9]+)-[a-f0-9]+-(?P<w>[0-9]+)x(?P<h>[0-9]+)\.mp4', url)
             if info_m is None:
                 continue
             vid = info_m.group('vid')
@@ -54,15 +54,22 @@ class MyTaratataIE(InfoExtractor):
             if vid != last_vid:
                 break
 
-            formats.append({'url': url, 'width': int(w), 'height': int(h)})
+            formats.append({
+                'url': url,
+                'width': int(w),
+                'height': int(h),
+            })
 
         formats = list(sorted(formats, key=lambda f: f['width']))
 
         return {
-            'id': video_id,
-            'title': title,
-            'description': self._og_search_description(webpage),
+            'id': last_vid,
+            'title': '%s - %s' % (title, description),
+            'description': description,
+            # TODO Improve the filename, id, title.
             'uploader': "Taratata",
-            # TODO more properties (see youtube_dl/extractor/common.py)
             'formats': formats,
+            'thumbnails': [
+                {'url': self._og_search_thumbnail(webpage)},
+            ],
         }

From 52f319ddd0485cf8ba602925be63edf2cebcc828 Mon Sep 17 00:00:00 2001
From: Christophe de Vienne <christophe@cdevienne.info>
Date: Fri, 12 Jan 2018 13:40:06 +0100
Subject: [PATCH 3/7] Cleanup

---
 youtube_dl/extractor/mytaratata.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/youtube_dl/extractor/mytaratata.py b/youtube_dl/extractor/mytaratata.py
index b4a7e49c4..52733594d 100644
--- a/youtube_dl/extractor/mytaratata.py
+++ b/youtube_dl/extractor/mytaratata.py
@@ -18,12 +18,7 @@ class MyTaratataIE(InfoExtractor):
             'title': u'TARATATA N°519 - Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
             'uploader': 'Taratata',
             'description': 'Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
-            # 'thumbnail': r're:^https?://.*\.jpg$',
-            # TODO more properties, either as:
-            # * A value
-            # * MD5 checksum; start the string with md5:
-            # * A regular expression; start the string with re:
-            # * Any Python type (for example int or float)
+            'thumbnail': 'http://static.mytaratata.com/content/image/5a2562a1a5ee5.jpeg',
         }
     }
 
@@ -41,6 +36,8 @@ class MyTaratataIE(InfoExtractor):
             r'/mytaratata/Taratata[^"]+\.mp4)"'
         )
 
+        # The first videos are the live videos, coming in 2 formats. The next videos are
+        # bonuses, multi-cams... that we won't download.
         last_vid = None
         for url in video_source_re.findall(webpage):
             info_m = re.match(r'.*/(?P<vid>[0-9]+)-[a-f0-9]+-(?P<w>[0-9]+)x(?P<h>[0-9]+)\.mp4', url)
@@ -49,9 +46,12 @@ class MyTaratataIE(InfoExtractor):
             vid = info_m.group('vid')
             w = info_m.group('w')
             h = info_m.group('h')
+
             if last_vid is None:
                 last_vid = vid
+
             if vid != last_vid:
+                # We found a new video, not another format of the same. Stops here.
                 break
 
             formats.append({
@@ -66,10 +66,7 @@ class MyTaratataIE(InfoExtractor):
             'id': last_vid,
             'title': '%s - %s' % (title, description),
             'description': description,
-            # TODO Improve the filename, id, title.
             'uploader': "Taratata",
             'formats': formats,
-            'thumbnails': [
-                {'url': self._og_search_thumbnail(webpage)},
-            ],
+            'thumbnail': self._og_search_thumbnail(webpage),
         }

From 553b8b28f97636dd34b029e71bd324cda4b79452 Mon Sep 17 00:00:00 2001
From: Christophe de Vienne <christophe@cdevienne.info>
Date: Sun, 14 Jan 2018 23:07:16 +0100
Subject: [PATCH 4/7] [mytaratata] Cleanup

Cleanup a few things based on the PR review.
---
 youtube_dl/extractor/mytaratata.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/mytaratata.py b/youtube_dl/extractor/mytaratata.py
index 52733594d..e21972249 100644
--- a/youtube_dl/extractor/mytaratata.py
+++ b/youtube_dl/extractor/mytaratata.py
@@ -15,7 +15,7 @@ class MyTaratataIE(InfoExtractor):
         'info_dict': {
             'id': '7174',
             'ext': 'mp4',
-            'title': u'TARATATA N°519 - Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
+            'title': 'TARATATA N°519 - Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
             'uploader': 'Taratata',
             'description': 'Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
             'thumbnail': 'http://static.mytaratata.com/content/image/5a2562a1a5ee5.jpeg',
@@ -26,21 +26,25 @@ class MyTaratataIE(InfoExtractor):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
+        # The title contains only the program name and episode number.
+        # Each episode containts many videos
         title = self._og_search_title(webpage)
+        # The description is the title of the video within the episode.
         description = self._og_search_description(webpage)
 
         formats = []
 
         video_source_re = re.compile(
-            r'data-source="(?P<url>http://videos.air-productions.cdn.sfr.net'
-            r'/mytaratata/Taratata[^"]+\.mp4)"'
+            r'data-source="(?P<url>http://[^/]*/mytaratata/Taratata[^"]+\.mp4)"'
         )
 
         # The first videos are the live videos, coming in 2 formats. The next videos are
         # bonuses, multi-cams... that we won't download.
         last_vid = None
-        for url in video_source_re.findall(webpage):
-            info_m = re.match(r'.*/(?P<vid>[0-9]+)-[a-f0-9]+-(?P<w>[0-9]+)x(?P<h>[0-9]+)\.mp4', url)
+        for video_url in video_source_re.findall(webpage):
+            info_m = re.match(
+                r'.*/(?P<vid>[0-9]+)-[a-f0-9]+-(?P<w>[0-9]+)x(?P<h>[0-9]+)\.mp4',
+                video_url)
             if info_m is None:
                 continue
             vid = info_m.group('vid')
@@ -55,12 +59,12 @@ class MyTaratataIE(InfoExtractor):
                 break
 
             formats.append({
-                'url': url,
+                'url': video_url,
                 'width': int(w),
                 'height': int(h),
             })
 
-        formats = list(sorted(formats, key=lambda f: f['width']))
+        self._sort_formats(formats)
 
         return {
             'id': last_vid,

From 202846d5c70d2eb1a1bd3817eb577e6049ea10bb Mon Sep 17 00:00:00 2001
From: Christophe de Vienne <christophe@cdevienne.info>
Date: Mon, 15 Jan 2018 09:40:44 +0100
Subject: [PATCH 5/7] [mytaratata] Use more relaxed regex

---
 youtube_dl/extractor/mytaratata.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/mytaratata.py b/youtube_dl/extractor/mytaratata.py
index e21972249..542653a7d 100644
--- a/youtube_dl/extractor/mytaratata.py
+++ b/youtube_dl/extractor/mytaratata.py
@@ -18,7 +18,7 @@ class MyTaratataIE(InfoExtractor):
             'title': 'TARATATA N°519 - Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
             'uploader': 'Taratata',
             'description': 'Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
-            'thumbnail': 'http://static.mytaratata.com/content/image/5a2562a1a5ee5.jpeg',
+            'thumbnail': 're:https?://.*\.jpeg$',
         }
     }
 
@@ -35,7 +35,7 @@ class MyTaratataIE(InfoExtractor):
         formats = []
 
         video_source_re = re.compile(
-            r'data-source="(?P<url>http://[^/]*/mytaratata/Taratata[^"]+\.mp4)"'
+            r'<div [^>]*class="jwplayer" [^\>]*data-source="(?P<url>https?://.*/Taratata[^"]+\.mp4)"'
         )
 
         # The first videos are the live videos, coming in 2 formats. The next videos are

From 8d1e047e861ca0d27a2a3e609b7e6fe47f1f76b8 Mon Sep 17 00:00:00 2001
From: Christophe de Vienne <christophe@cdevienne.info>
Date: Mon, 22 Jan 2018 16:56:18 +0100
Subject: [PATCH 6/7] Use a more relaxed regex for video links

---
 youtube_dl/extractor/mytaratata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/mytaratata.py b/youtube_dl/extractor/mytaratata.py
index 542653a7d..0e6f67626 100644
--- a/youtube_dl/extractor/mytaratata.py
+++ b/youtube_dl/extractor/mytaratata.py
@@ -35,7 +35,7 @@ class MyTaratataIE(InfoExtractor):
         formats = []
 
         video_source_re = re.compile(
-            r'<div [^>]*class="jwplayer" [^\>]*data-source="(?P<url>https?://.*/Taratata[^"]+\.mp4)"'
+            r'<div [^>]*class="jwplayer" [^\>]*data-source="(?P<url>[^"]+)"'
         )
 
         # The first videos are the live videos, coming in 2 formats. The next videos are

From 275853a4d2741afe88bc36637de263c51507c250 Mon Sep 17 00:00:00 2001
From: Christophe de Vienne <christophe@cdevienne.info>
Date: Mon, 22 Jan 2018 17:00:07 +0100
Subject: [PATCH 7/7] Remove the 'uploader' static value

---
 youtube_dl/extractor/mytaratata.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/youtube_dl/extractor/mytaratata.py b/youtube_dl/extractor/mytaratata.py
index 0e6f67626..281e919da 100644
--- a/youtube_dl/extractor/mytaratata.py
+++ b/youtube_dl/extractor/mytaratata.py
@@ -16,7 +16,6 @@ class MyTaratataIE(InfoExtractor):
             'id': '7174',
             'ext': 'mp4',
             'title': 'TARATATA N°519 - Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
-            'uploader': 'Taratata',
             'description': 'Shaka Ponk / Camille et Julie Berthollet "Smells Like Teen Spirit" (Nirvana)',
             'thumbnail': 're:https?://.*\.jpeg$',
         }
@@ -70,7 +69,6 @@ class MyTaratataIE(InfoExtractor):
             'id': last_vid,
             'title': '%s - %s' % (title, description),
             'description': description,
-            'uploader': "Taratata",
             'formats': formats,
             'thumbnail': self._og_search_thumbnail(webpage),
         }