From fa675891347fd3344a25dec2039599902959dba9 Mon Sep 17 00:00:00 2001
From: Rob van Bekkum <rob_van_bekkum@hotmail.com>
Date: Sat, 27 Feb 2016 18:21:42 +0100
Subject: [PATCH 01/11] [LCP] Add new extractor

---
 youtube_dl/extractor/__init__.py |   1 +
 youtube_dl/extractor/lcp.py      | 137 +++++++++++++++++++++++++++++++
 2 files changed, 138 insertions(+)
 create mode 100644 youtube_dl/extractor/lcp.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 1ae606f1e..8bb2631a7 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -349,6 +349,7 @@ from .kuwo import (
 )
 from .la7 import LA7IE
 from .laola1tv import Laola1TvIE
+from .lcp import LcpIE
 from .lecture2go import Lecture2GoIE
 from .lemonde import LemondeIE
 from .letv import (
diff --git a/youtube_dl/extractor/lcp.py b/youtube_dl/extractor/lcp.py
new file mode 100644
index 000000000..be586e1f1
--- /dev/null
+++ b/youtube_dl/extractor/lcp.py
@@ -0,0 +1,137 @@
+# coding: utf-8
+from __future__ import unicode_literals
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none
+)
+
+
+class LcpIE(InfoExtractor):
+    IE_NAME = 'LCP'
+    _VALID_URL = r'https?:\/\/(?:www\.)?lcp\.fr\/(?:[^\/]+/)*(?P<id>[^/]+)'
+
+    _TESTS = [{
+        'url': 'http://www.lcp.fr/la-politique-en-video/schwartzenberg-prg-preconise-francois-hollande-de-participer-une-primaire',
+        'md5': 'aecf5a330cfc1061445a9af5b2df392d',
+        'info_dict': {
+            'id': 'd56d03e9',
+            'url': 're:http://httpod.scdn.arkena.com/11970/d56d03e9_[0-9]+.mp4',
+            'ext': 'mp4',
+            'title': 'Schwartzenberg (PRG) préconise à François Hollande de participer à une primaire à gauche'
+        }
+    }, {
+        'url': 'http://www.lcp.fr/emissions/politique-matin/271085-politique-matin',
+        'md5': '6cea4f7d13810464ef8485a924fc3333',
+        'info_dict': {
+            'id': '327336',
+            'url': 're:http://httpod.scdn.arkena.com/11970/327336_[0-9]+.mp4',
+            'ext': 'mp4',
+            'title': 'Politique Matin - Politique matin'
+        }
+    }]
+
+    def _real_extract(self, url):
+        """Extracts the information for a given url and returns it in a dictionary"""
+        display_id = self._match_id(url)
+
+        # Extract the web page
+        webpage = self._download_webpage(url, display_id)
+
+        # Extract the required info of the media files
+        media_files_info = self.__extract_from_webpage(display_id, webpage)
+        # Some web pages embed videos from other platforms like dailymotion, therefore we pass on these URL
+        if media_files_info is None:
+            return self.url_result(self.__extract_embed_url(webpage))
+
+        # Extract the video formats from the media info
+        video_formats = self.__get_video_formats(media_files_info)
+        # Extract the thumbnails from the media info
+        video_thumbnails = self.__get_thumbnails(media_files_info)
+
+        # Return the dictionary with the information about the video to download
+        return {
+            'id': media_files_info['EntryName'],
+            'title': self._og_search_title(webpage),
+            'formats': video_formats,
+            'thumbnails': video_thumbnails
+        }
+
+    def __extract_from_webpage(self, display_id, webpage):
+        """Extracts the media info JSON object for the video for the provided web page."""
+        embed_url = self.__extract_embed_url(webpage)
+        embed_regex = r'(?:[a-zA-Z0-9]+\.)?lcp\.fr\/embed\/(?P<clip_id>[A-za-z0-9]+)\/(?P<player_id>[A-za-z0-9]+)\/(?P<skin_name>[^\/]+)'
+
+        # Extract the identifying attributes from the embed url of the web page
+        clip_id = self._search_regex(embed_regex, embed_url, 'clip id', group='clip_id', fatal=False)
+        player_id = self._search_regex(embed_regex, embed_url, 'player id', group='player_id', fatal=False)
+        skin_name = self._search_regex(embed_regex, embed_url, 'skin name', group='skin_name', fatal=False)
+
+        # Check whether the extraction of the clip id, player id or skin name
+        if (clip_id is None) or (player_id is None) or (skin_name is None):
+            return None
+
+        # Extract the video url from the embedded player
+        return self.__extract_from_player(display_id, clip_id, player_id, skin_name)
+
+    def __extract_embed_url(self, webpage):
+        """Extracts the embedded player url for the video."""
+        return self._search_regex(
+            r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
+            webpage, 'embed url', group='url')
+
+    def __extract_from_player(self, display_id, clip_id, player_id, skin_name):
+        """Extracts the JSON object containing the required media info from the embedded arkena player"""
+        arkena_url = 'http://play.arkena.com/config/avp/v1/player/media/{0}/{1}/{2}/?callbackMethod=?'.format(clip_id,
+                                                                                                              skin_name,
+                                                                                                              player_id)
+        arkena_info = self._download_webpage(arkena_url, 'clip_info_' + clip_id)
+
+        # Extract the json containing information about the video files
+        arkena_info_regex = r'\?\((?P<json>.*)\);'
+        info_json = self._parse_json(self._search_regex(arkena_info_regex, arkena_info, 'json', group='json'),
+                                     display_id)
+
+        # All videos are part of a playlist, a single video is in a playlist of size 1
+        media_files_info = info_json.get('Playlist')
+        if media_files_info is not None:
+            media_files_info = media_files_info[0]
+        return media_files_info
+
+    def __get_thumbnails(self, media_files_info):
+        """Retrieves the thumbnails contained in the media info"""
+        thumbnails = []
+        media_thumbnail_info = media_files_info.get('MediaInfo', {}).get('Poster')
+        if media_thumbnail_info is not None:
+            for thumbnail in media_thumbnail_info:
+                thumbnails.append({
+                    'url': thumbnail.get('Url'),
+                    'width': int_or_none(thumbnail.get('Size'))
+                })
+        return thumbnails
+
+    def __get_video_formats(self, media_files_info):
+        """Retrieves the video formats contained in the media file info"""
+        formats = []
+        media_files = media_files_info.get('MediaFiles')
+
+        if media_files is not None:
+            formats.extend(self.__get_mp4_video_formats(media_files))
+            self._sort_formats(formats)
+
+        return formats
+
+    def __get_mp4_video_formats(self, media_files_json):
+        """Retrieves all mp4 video formats contained in the media file info"""
+        formats = []
+        mp4_files_json = media_files_json.get('Mp4')
+        if mp4_files_json is not None:
+            for video_info in mp4_files_json:
+                bitrate = int_or_none(video_info.get('Bitrate'))
+                if bitrate is not None:
+                    bitrate /= 1000  # Set bitrate to KBit/s
+                formats.append({
+                    'url': video_info.get('Url'),
+                    'ext': 'mp4',
+                    'tbr': bitrate
+                })
+        return formats

From ccdbe1e8faac13fa78950f3001d6cdbc02798c70 Mon Sep 17 00:00:00 2001
From: Rob van Bekkum <rob_van_bekkum@hotmail.com>
Date: Sat, 27 Feb 2016 19:54:38 +0100
Subject: [PATCH 02/11] Removed redundant comments, needless escaping of
 slashes in regular expressions, using scala argument of int_or_none and
 direct use of the generic extractor for already supported embeds

---
 youtube_dl/extractor/lcp.py | 31 ++++++++-----------------------
 1 file changed, 8 insertions(+), 23 deletions(-)

diff --git a/youtube_dl/extractor/lcp.py b/youtube_dl/extractor/lcp.py
index be586e1f1..eb3f16698 100644
--- a/youtube_dl/extractor/lcp.py
+++ b/youtube_dl/extractor/lcp.py
@@ -8,7 +8,7 @@ from ..utils import (
 
 class LcpIE(InfoExtractor):
     IE_NAME = 'LCP'
-    _VALID_URL = r'https?:\/\/(?:www\.)?lcp\.fr\/(?:[^\/]+/)*(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?lcp\.fr/(?:[^\/]+/)*(?P<id>[^/]+)'
 
     _TESTS = [{
         'url': 'http://www.lcp.fr/la-politique-en-video/schwartzenberg-prg-preconise-francois-hollande-de-participer-une-primaire',
@@ -31,24 +31,18 @@ class LcpIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        """Extracts the information for a given url and returns it in a dictionary"""
         display_id = self._match_id(url)
-
-        # Extract the web page
         webpage = self._download_webpage(url, display_id)
 
-        # Extract the required info of the media files
+        # Extract the required info of the media files gathered in a dictionary
         media_files_info = self.__extract_from_webpage(display_id, webpage)
-        # Some web pages embed videos from other platforms like dailymotion, therefore we pass on these URL
+        # Some web pages embed videos from other platforms like dailymotion, therefore we pass on these URLs
         if media_files_info is None:
-            return self.url_result(self.__extract_embed_url(webpage))
+            return self.url_result(url, 'Generic')
 
-        # Extract the video formats from the media info
         video_formats = self.__get_video_formats(media_files_info)
-        # Extract the thumbnails from the media info
         video_thumbnails = self.__get_thumbnails(media_files_info)
 
-        # Return the dictionary with the information about the video to download
         return {
             'id': media_files_info['EntryName'],
             'title': self._og_search_title(webpage),
@@ -59,22 +53,19 @@ class LcpIE(InfoExtractor):
     def __extract_from_webpage(self, display_id, webpage):
         """Extracts the media info JSON object for the video for the provided web page."""
         embed_url = self.__extract_embed_url(webpage)
-        embed_regex = r'(?:[a-zA-Z0-9]+\.)?lcp\.fr\/embed\/(?P<clip_id>[A-za-z0-9]+)\/(?P<player_id>[A-za-z0-9]+)\/(?P<skin_name>[^\/]+)'
+        embed_regex = r'(?:[a-zA-Z0-9]+\.)?lcp\.fr/embed/(?P<clip_id>[A-za-z0-9]+)/(?P<player_id>[A-za-z0-9]+)/(?P<skin_name>[^\/]+)'
 
-        # Extract the identifying attributes from the embed url of the web page
         clip_id = self._search_regex(embed_regex, embed_url, 'clip id', group='clip_id', fatal=False)
         player_id = self._search_regex(embed_regex, embed_url, 'player id', group='player_id', fatal=False)
         skin_name = self._search_regex(embed_regex, embed_url, 'skin name', group='skin_name', fatal=False)
 
-        # Check whether the extraction of the clip id, player id or skin name
+        # Check whether the matches failed, which might be when dealing with other players (e.g., dailymotion stream)
         if (clip_id is None) or (player_id is None) or (skin_name is None):
             return None
 
-        # Extract the video url from the embedded player
         return self.__extract_from_player(display_id, clip_id, player_id, skin_name)
 
     def __extract_embed_url(self, webpage):
-        """Extracts the embedded player url for the video."""
         return self._search_regex(
             r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
             webpage, 'embed url', group='url')
@@ -86,19 +77,17 @@ class LcpIE(InfoExtractor):
                                                                                                               player_id)
         arkena_info = self._download_webpage(arkena_url, 'clip_info_' + clip_id)
 
-        # Extract the json containing information about the video files
         arkena_info_regex = r'\?\((?P<json>.*)\);'
         info_json = self._parse_json(self._search_regex(arkena_info_regex, arkena_info, 'json', group='json'),
                                      display_id)
 
-        # All videos are part of a playlist, a single video is in a playlist of size 1
+        # All videos are part of a playlist, a single video is also put in a playlist
         media_files_info = info_json.get('Playlist')
         if media_files_info is not None:
             media_files_info = media_files_info[0]
         return media_files_info
 
     def __get_thumbnails(self, media_files_info):
-        """Retrieves the thumbnails contained in the media info"""
         thumbnails = []
         media_thumbnail_info = media_files_info.get('MediaInfo', {}).get('Poster')
         if media_thumbnail_info is not None:
@@ -110,7 +99,6 @@ class LcpIE(InfoExtractor):
         return thumbnails
 
     def __get_video_formats(self, media_files_info):
-        """Retrieves the video formats contained in the media file info"""
         formats = []
         media_files = media_files_info.get('MediaFiles')
 
@@ -121,14 +109,11 @@ class LcpIE(InfoExtractor):
         return formats
 
     def __get_mp4_video_formats(self, media_files_json):
-        """Retrieves all mp4 video formats contained in the media file info"""
         formats = []
         mp4_files_json = media_files_json.get('Mp4')
         if mp4_files_json is not None:
             for video_info in mp4_files_json:
-                bitrate = int_or_none(video_info.get('Bitrate'))
-                if bitrate is not None:
-                    bitrate /= 1000  # Set bitrate to KBit/s
+                bitrate = int_or_none(video_info.get('Bitrate'), scale=0.001) # Scale bitrate to KBit/s
                 formats.append({
                     'url': video_info.get('Url'),
                     'ext': 'mp4',

From e13bd53a94fe0153c7cce2a38e5c072b9e9123e0 Mon Sep 17 00:00:00 2001
From: Rob van Bekkum <rob_van_bekkum@hotmail.com>
Date: Sat, 27 Feb 2016 21:40:05 +0100
Subject: [PATCH 03/11] Check for None through truth value checks, added test
 for scenario for delegating the url

---
 youtube_dl/extractor/lcp.py | 63 +++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 27 deletions(-)

diff --git a/youtube_dl/extractor/lcp.py b/youtube_dl/extractor/lcp.py
index eb3f16698..9bc0e8f0b 100644
--- a/youtube_dl/extractor/lcp.py
+++ b/youtube_dl/extractor/lcp.py
@@ -28,6 +28,13 @@ class LcpIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Politique Matin - Politique matin'
         }
+    }, {
+        'url': 'http://www.lcp.fr/le-direct',
+        'info_dict': {
+            'title': 'Le direct | LCP Assembl\xe9e nationale',
+            'id': 'le-direct',
+        },
+        'playlist_mincount': 1
     }]
 
     def _real_extract(self, url):
@@ -35,9 +42,9 @@ class LcpIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         # Extract the required info of the media files gathered in a dictionary
-        media_files_info = self.__extract_from_webpage(display_id, webpage)
+        media_files_info = None #self.__extract_from_webpage(display_id, webpage)
         # Some web pages embed videos from other platforms like dailymotion, therefore we pass on these URLs
-        if media_files_info is None:
+        if not media_files_info:
             return self.url_result(url, 'Generic')
 
         video_formats = self.__get_video_formats(media_files_info)
@@ -55,12 +62,12 @@ class LcpIE(InfoExtractor):
         embed_url = self.__extract_embed_url(webpage)
         embed_regex = r'(?:[a-zA-Z0-9]+\.)?lcp\.fr/embed/(?P<clip_id>[A-za-z0-9]+)/(?P<player_id>[A-za-z0-9]+)/(?P<skin_name>[^\/]+)'
 
-        clip_id = self._search_regex(embed_regex, embed_url, 'clip id', group='clip_id', fatal=False)
-        player_id = self._search_regex(embed_regex, embed_url, 'player id', group='player_id', fatal=False)
-        skin_name = self._search_regex(embed_regex, embed_url, 'skin name', group='skin_name', fatal=False)
+        clip_id = self._search_regex(embed_regex, embed_url, 'clip id', group='clip_id', default=None)
+        player_id = self._search_regex(embed_regex, embed_url, 'player id', group='player_id', default=None)
+        skin_name = self._search_regex(embed_regex, embed_url, 'skin name', group='skin_name', default=None)
 
         # Check whether the matches failed, which might be when dealing with other players (e.g., dailymotion stream)
-        if (clip_id is None) or (player_id is None) or (skin_name is None):
+        if not clip_id or not player_id or not skin_name:
             return None
 
         return self.__extract_from_player(display_id, clip_id, player_id, skin_name)
@@ -83,40 +90,42 @@ class LcpIE(InfoExtractor):
 
         # All videos are part of a playlist, a single video is also put in a playlist
         media_files_info = info_json.get('Playlist')
-        if media_files_info is not None:
-            media_files_info = media_files_info[0]
-        return media_files_info
+        if not media_files_info:
+            return None
+        return media_files_info[0]
 
     def __get_thumbnails(self, media_files_info):
         thumbnails = []
         media_thumbnail_info = media_files_info.get('MediaInfo', {}).get('Poster')
-        if media_thumbnail_info is not None:
-            for thumbnail in media_thumbnail_info:
-                thumbnails.append({
-                    'url': thumbnail.get('Url'),
-                    'width': int_or_none(thumbnail.get('Size'))
-                })
+        if not media_thumbnail_info:
+            return None
+        for thumbnail in media_thumbnail_info:
+            thumbnails.append({
+                'url': thumbnail.get('Url'),
+                'width': int_or_none(thumbnail.get('Size'))
+            })
         return thumbnails
 
     def __get_video_formats(self, media_files_info):
         formats = []
         media_files = media_files_info.get('MediaFiles')
+        if not media_files:
+            return None
 
-        if media_files is not None:
-            formats.extend(self.__get_mp4_video_formats(media_files))
-            self._sort_formats(formats)
-
+        formats.extend(self.__get_mp4_video_formats(media_files))
+        self._sort_formats(formats)
         return formats
 
     def __get_mp4_video_formats(self, media_files_json):
         formats = []
         mp4_files_json = media_files_json.get('Mp4')
-        if mp4_files_json is not None:
-            for video_info in mp4_files_json:
-                bitrate = int_or_none(video_info.get('Bitrate'), scale=0.001) # Scale bitrate to KBit/s
-                formats.append({
-                    'url': video_info.get('Url'),
-                    'ext': 'mp4',
-                    'tbr': bitrate
-                })
+        if not mp4_files_json:
+            return None
+        for video_info in mp4_files_json:
+            bitrate = int_or_none(video_info.get('Bitrate'), scale=1000) # Scale bitrate to KBit/s
+            formats.append({
+                'url': video_info.get('Url'),
+                'ext': 'mp4',
+                'tbr': bitrate
+            })
         return formats

From c7293db7b7c3fa365374789e4374bc54e039eaeb Mon Sep 17 00:00:00 2001
From: Rob van Bekkum <rob_van_bekkum@hotmail.com>
Date: Sat, 27 Feb 2016 22:07:38 +0100
Subject: [PATCH 04/11] Skip formats and thumbnails missing Url

---
 youtube_dl/extractor/lcp.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/lcp.py b/youtube_dl/extractor/lcp.py
index 9bc0e8f0b..eb4a14056 100644
--- a/youtube_dl/extractor/lcp.py
+++ b/youtube_dl/extractor/lcp.py
@@ -100,8 +100,11 @@ class LcpIE(InfoExtractor):
         if not media_thumbnail_info:
             return None
         for thumbnail in media_thumbnail_info:
+            thumbnail_url = thumbnail.get('Url')
+            if not thumbnail_url:
+                continue
             thumbnails.append({
-                'url': thumbnail.get('Url'),
+                'url': thumbnail_url,
                 'width': int_or_none(thumbnail.get('Size'))
             })
         return thumbnails
@@ -123,8 +126,11 @@ class LcpIE(InfoExtractor):
             return None
         for video_info in mp4_files_json:
             bitrate = int_or_none(video_info.get('Bitrate'), scale=1000) # Scale bitrate to KBit/s
+            video_url = video_info.get('Url')
+            if not video_url:
+                continue
             formats.append({
-                'url': video_info.get('Url'),
+                'url': video_url,
                 'ext': 'mp4',
                 'tbr': bitrate
             })

From 4e23fd0d368e0685f67d15581d9f3863850518a6 Mon Sep 17 00:00:00 2001
From: Rob van Bekkum <rob_van_bekkum@hotmail.com>
Date: Sat, 27 Feb 2016 22:11:48 +0100
Subject: [PATCH 05/11] Restored line of code with function call to retrieve
 media info

---
 youtube_dl/extractor/lcp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/lcp.py b/youtube_dl/extractor/lcp.py
index eb4a14056..315e9ab44 100644
--- a/youtube_dl/extractor/lcp.py
+++ b/youtube_dl/extractor/lcp.py
@@ -42,7 +42,7 @@ class LcpIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         # Extract the required info of the media files gathered in a dictionary
-        media_files_info = None #self.__extract_from_webpage(display_id, webpage)
+        media_files_info = self.__extract_from_webpage(display_id, webpage)
         # Some web pages embed videos from other platforms like dailymotion, therefore we pass on these URLs
         if not media_files_info:
             return self.url_result(url, 'Generic')

From a4b7e38dc4a7780c9b0e82d90011d3f2a33a5a8f Mon Sep 17 00:00:00 2001
From: Rob van Bekkum <rob_van_bekkum@hotmail.com>
Date: Sat, 27 Feb 2016 23:20:45 +0100
Subject: [PATCH 06/11] Added timestamp and description, which is the title of
 the webpage when the description of the page is unavailable

---
 youtube_dl/extractor/lcp.py | 48 +++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/youtube_dl/extractor/lcp.py b/youtube_dl/extractor/lcp.py
index 315e9ab44..608b14923 100644
--- a/youtube_dl/extractor/lcp.py
+++ b/youtube_dl/extractor/lcp.py
@@ -2,7 +2,8 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
-    int_or_none
+    int_or_none,
+    parse_iso8601
 )
 
 
@@ -17,7 +18,10 @@ class LcpIE(InfoExtractor):
             'id': 'd56d03e9',
             'url': 're:http://httpod.scdn.arkena.com/11970/d56d03e9_[0-9]+.mp4',
             'ext': 'mp4',
-            'title': 'Schwartzenberg (PRG) préconise à François Hollande de participer à une primaire à gauche'
+            'title': 'Schwartzenberg (PRG) préconise à François Hollande de participer à une primaire à gauche',
+            'upload_date': '20160226',
+            'description': 'Le président du groupe parlementaire radical, républicain, démocrate et progressiste (RRDP) y voit une bonne occasion pour le président de la République de se "relégitimer".',
+            'timestamp': 1456488895
         }
     }, {
         'url': 'http://www.lcp.fr/emissions/politique-matin/271085-politique-matin',
@@ -26,7 +30,10 @@ class LcpIE(InfoExtractor):
             'id': '327336',
             'url': 're:http://httpod.scdn.arkena.com/11970/327336_[0-9]+.mp4',
             'ext': 'mp4',
-            'title': 'Politique Matin - Politique matin'
+            'title': 'Politique Matin - Politique matin',
+            'upload_date': '20160225',
+            'description': 'Politique Matin - Politique matin',
+            'timestamp': 1456391602
         }
     }, {
         'url': 'http://www.lcp.fr/le-direct',
@@ -42,19 +49,31 @@ class LcpIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         # Extract the required info of the media files gathered in a dictionary
-        media_files_info = self.__extract_from_webpage(display_id, webpage)
+        media_dict = self.__extract_from_webpage(display_id, webpage)
         # Some web pages embed videos from other platforms like dailymotion, therefore we pass on these URLs
-        if not media_files_info:
+        if not media_dict:
             return self.url_result(url, 'Generic')
 
+        # All videos are part of a playlist, a single video is also put in a playlist
+        playlist_files_info = media_dict.get('Playlist')
+        if not playlist_files_info:
+            return self.url_result(url, 'Generic')
+
+        media_files_info = playlist_files_info[0]
         video_formats = self.__get_video_formats(media_files_info)
         video_thumbnails = self.__get_thumbnails(media_files_info)
+        video_timestamp = parse_iso8601(media_files_info.get('MediaInfo', {}).get('PublishDate'))
+
+        title = self._og_search_title(webpage)
+        description = self._html_search_regex(self._meta_regex('description'), webpage, 'description', group='content', default=title)
 
         return {
-            'id': media_files_info['EntryName'],
-            'title': self._og_search_title(webpage),
+            'id': media_files_info.get('EntryName'),
+            'title': title,
             'formats': video_formats,
-            'thumbnails': video_thumbnails
+            'thumbnails': video_thumbnails,
+            'description': description,
+            'timestamp': video_timestamp
         }
 
     def __extract_from_webpage(self, display_id, webpage):
@@ -83,16 +102,9 @@ class LcpIE(InfoExtractor):
                                                                                                               skin_name,
                                                                                                               player_id)
         arkena_info = self._download_webpage(arkena_url, 'clip_info_' + clip_id)
-
         arkena_info_regex = r'\?\((?P<json>.*)\);'
-        info_json = self._parse_json(self._search_regex(arkena_info_regex, arkena_info, 'json', group='json'),
-                                     display_id)
-
-        # All videos are part of a playlist, a single video is also put in a playlist
-        media_files_info = info_json.get('Playlist')
-        if not media_files_info:
-            return None
-        return media_files_info[0]
+        return self._parse_json(self._search_regex(arkena_info_regex, arkena_info, 'json', group='json'),
+                                display_id)
 
     def __get_thumbnails(self, media_files_info):
         thumbnails = []
@@ -125,7 +137,7 @@ class LcpIE(InfoExtractor):
         if not mp4_files_json:
             return None
         for video_info in mp4_files_json:
-            bitrate = int_or_none(video_info.get('Bitrate'), scale=1000) # Scale bitrate to KBit/s
+            bitrate = int_or_none(video_info.get('Bitrate'), scale=1000)  # Scale bitrate to KBit/s
             video_url = video_info.get('Url')
             if not video_url:
                 continue

From 620856702658e6525e93cc6b612dc611911d4f0a Mon Sep 17 00:00:00 2001
From: Rob van Bekkum <rob_van_bekkum@hotmail.com>
Date: Sun, 28 Feb 2016 00:29:09 +0100
Subject: [PATCH 07/11] Changed _html_search_regex to _html_search_meta for
 retrieving description when available, otherwise leave it empty

---
 youtube_dl/extractor/lcp.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/lcp.py b/youtube_dl/extractor/lcp.py
index 608b14923..e49744e91 100644
--- a/youtube_dl/extractor/lcp.py
+++ b/youtube_dl/extractor/lcp.py
@@ -32,7 +32,6 @@ class LcpIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Politique Matin - Politique matin',
             'upload_date': '20160225',
-            'description': 'Politique Matin - Politique matin',
             'timestamp': 1456391602
         }
     }, {
@@ -65,7 +64,7 @@ class LcpIE(InfoExtractor):
         video_timestamp = parse_iso8601(media_files_info.get('MediaInfo', {}).get('PublishDate'))
 
         title = self._og_search_title(webpage)
-        description = self._html_search_regex(self._meta_regex('description'), webpage, 'description', group='content', default=title)
+        description = self._html_search_meta('description', webpage, default=None)
 
         return {
             'id': media_files_info.get('EntryName'),

From 26bab54ef5239dcfa3f2ff180e472f828225c08a Mon Sep 17 00:00:00 2001
From: Rob van Bekkum <rob_van_bekkum@hotmail.com>
Date: Mon, 7 Mar 2016 01:14:18 +0100
Subject: [PATCH 08/11] Added ArkenaPlay extractor [Extraction for different
 formats needs to be implemented]

---
 youtube_dl/extractor/__init__.py   |   1 +
 youtube_dl/extractor/arkenaplay.py | 129 +++++++++++++++++++++++++++++
 youtube_dl/extractor/lcp.py        | 124 +++------------------------
 3 files changed, 143 insertions(+), 111 deletions(-)
 create mode 100644 youtube_dl/extractor/arkenaplay.py

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 8bb2631a7..a15572d7e 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -37,6 +37,7 @@ from .ard import (
     ARDMediathekIE,
     SportschauIE,
 )
+from .arkenaplay import ArkenaPlayIE
 from .arte import (
     ArteTvIE,
     ArteTVPlus7IE,
diff --git a/youtube_dl/extractor/arkenaplay.py b/youtube_dl/extractor/arkenaplay.py
new file mode 100644
index 000000000..ea6faac31
--- /dev/null
+++ b/youtube_dl/extractor/arkenaplay.py
@@ -0,0 +1,129 @@
+# coding: utf-8
+from __future__ import unicode_literals
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_iso8601
+)
+
+
+class ArkenaPlayIE(InfoExtractor):
+    IE_NAME = 'ArkenaPlay'
+    _VALID_URL = r'(?P<host>https?://(?:www\.)?play\..*\..*)/embed/.*(?P<id>\d+)?/.*'
+
+    _TESTS = [{
+        'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0',
+        'md5': '6cea4f7d13810464ef8485a924fc3333',
+        'info_dict': {
+            'id': '327336',
+            'url': 're:http://httpod.scdn.arkena.com/11970/327336_[0-9]+.mp4',
+            'ext': 'mp4',
+            'title': '327336',
+            'upload_date': '20160225',
+            'timestamp': 1456391602
+        }
+    }, {
+        'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
+        'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
+        'info_dict': {
+            'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
+            'url': 'http://88e04ec095b07cd1aa3ea588be47e870.httpcache0.90034-httpcache0.dna.qbrick.com/90034-httpcache0/4bf759a1-00090034/bbb_sunflower_2160p_60fps_normal_720p.mp4',
+            'ext': 'mp4',
+            'title': 'Big Buck Bunny',
+            'description': 'Royalty free test video',
+            'upload_date': '20150528',
+            'timestamp': 1432816365
+        }
+    }]
+
+    def _real_extract(self, url):
+        display_id = self._search_regex(self._VALID_URL, url, 'host_name', group='id')
+        webpage = self._download_webpage(url, display_id)
+
+        media_url_regex = '"(?P<mediainfo>(?P<host>.*)/config/avp/.*/\?callbackMethod=\?)"'
+        media_url = self._html_search_regex(media_url_regex, webpage, 'arkena_media_info_url')
+        hostname = self._html_search_regex(media_url_regex, webpage, 'arkena_media_host', group='host')
+        if not hostname:
+            hostname = self._search_regex(self._VALID_URL, url, 'host_name', group='host')
+            media_url = hostname + media_url
+
+        # Extract the required info of the media files gathered in a dictionary
+        arkena_info = self._download_webpage(media_url, 'arkena_info_')
+        arkena_info_regex = r'\?\((?P<json>.*)\);'
+        media_dict = self._parse_json(self._search_regex(arkena_info_regex, arkena_info, 'json', group='json'),
+                                      display_id)
+
+        # All videos are part of a playlist, a single video is also put in a playlist
+        playlist_items = media_dict.get('Playlist', [])
+        if len(playlist_items) == 0:
+            return self.url_result(url, 'Generic')
+        elif len(playlist_items) == 1:
+            arkena_media_info = playlist_items[0]
+            return self.__extract_from_playlistentry(arkena_media_info)
+        else:
+            entries_info = []
+            for arkena_playlist_item in playlist_items:
+                entries_info.append(self.__extract_from_playlistentry(arkena_playlist_item))
+            return {
+                'id': display_id,
+                'entries': entries_info
+            }
+
+    def __extract_from_playlistentry(self, arkena_playlistentry_info):
+        formats = self.__get_video_formats(arkena_playlistentry_info)
+        media_info = arkena_playlistentry_info.get('MediaInfo', {})
+        thumbnails = self.__get_thumbnails(media_info)
+        title = media_info.get('Title')
+        description = media_info.get('Description')
+        timestamp = parse_iso8601(media_info.get('PublishDate'))
+        return {
+            'id': arkena_playlistentry_info.get('EntryName'),
+            'title': title,
+            'formats': formats,
+            'thumbnails': thumbnails,
+            'description': description,
+            'timestamp': timestamp
+        }
+
+    def __get_thumbnails(self, arkena_mediainfo):
+        thumbnails = []
+        thumbnails_info = arkena_mediainfo.get('Poster')
+        if not thumbnails_info:
+            return None
+        for thumbnail in thumbnails_info:
+            thumbnail_url = thumbnail.get('Url')
+            if not thumbnail_url:
+                continue
+            thumbnails.append({
+                'url': thumbnail_url,
+                'width': int_or_none(thumbnail.get('Size'))
+            })
+        return thumbnails
+
+    def __get_video_formats(self, media_files_info):
+        formats = []
+        media_files = media_files_info.get('MediaFiles')
+        if not media_files:
+            return None
+
+        formats.extend(self.__get_mp4_video_formats(media_files))
+        # TODO <Other video formats>
+        self._sort_formats(formats)
+        return formats
+
+    def __get_mp4_video_formats(self, media_files_json):
+        formats = []
+        mp4_files_json = media_files_json.get('Mp4')
+        if not mp4_files_json:
+            return None
+        for video_info in mp4_files_json:
+            bitrate = int_or_none(video_info.get('Bitrate'), scale=1000)  # Scale bitrate to KBit/s
+            video_url = video_info.get('Url')
+            if not video_url:
+                continue
+            formats.append({
+                'url': video_url,
+                'ext': 'mp4',
+                'tbr': bitrate
+            })
+        return formats
diff --git a/youtube_dl/extractor/lcp.py b/youtube_dl/extractor/lcp.py
index e49744e91..22c9642a6 100644
--- a/youtube_dl/extractor/lcp.py
+++ b/youtube_dl/extractor/lcp.py
@@ -1,11 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
-from ..utils import (
-    int_or_none,
-    parse_iso8601
-)
-
 
 class LcpIE(InfoExtractor):
     IE_NAME = 'LCP'
@@ -18,21 +13,20 @@ class LcpIE(InfoExtractor):
             'id': 'd56d03e9',
             'url': 're:http://httpod.scdn.arkena.com/11970/d56d03e9_[0-9]+.mp4',
             'ext': 'mp4',
-            'title': 'Schwartzenberg (PRG) préconise à François Hollande de participer à une primaire à gauche',
+            'title': 'd56d03e9',
             'upload_date': '20160226',
-            'description': 'Le président du groupe parlementaire radical, républicain, démocrate et progressiste (RRDP) y voit une bonne occasion pour le président de la République de se "relégitimer".',
             'timestamp': 1456488895
         }
     }, {
-        'url': 'http://www.lcp.fr/emissions/politique-matin/271085-politique-matin',
-        'md5': '6cea4f7d13810464ef8485a924fc3333',
+        'url': 'http://www.lcp.fr/emissions/parlementair',
+        'md5': '9b63769445cbe5f26952bef71f281e8c',
         'info_dict': {
-            'id': '327336',
-            'url': 're:http://httpod.scdn.arkena.com/11970/327336_[0-9]+.mp4',
+            'id': '327499',
+            'url': 're:http://httpod.scdn.arkena.com/11970/327499_[0-9]+.mp4',
             'ext': 'mp4',
-            'title': 'Politique Matin - Politique matin',
-            'upload_date': '20160225',
-            'timestamp': 1456391602
+            'title': '327499',
+            'upload_date': '20160304',
+            'timestamp': 1457098658
         }
     }, {
         'url': 'http://www.lcp.fr/le-direct',
@@ -47,102 +41,10 @@ class LcpIE(InfoExtractor):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
 
-        # Extract the required info of the media files gathered in a dictionary
-        media_dict = self.__extract_from_webpage(display_id, webpage)
-        # Some web pages embed videos from other platforms like dailymotion, therefore we pass on these URLs
-        if not media_dict:
+        embed_url_regex = r'"(?P<url>(?:https?://(?:www\.)?)?play\.lcp\.fr/embed/[A-za-z0-9]+/[A-za-z0-9]+/[A-za-z0-9]+/[A-za-z0-9]+)"'
+        embed_url = self._html_search_regex(embed_url_regex, webpage, 'player_url', default=None, fatal=False)
+        if not embed_url:
             return self.url_result(url, 'Generic')
 
-        # All videos are part of a playlist, a single video is also put in a playlist
-        playlist_files_info = media_dict.get('Playlist')
-        if not playlist_files_info:
-            return self.url_result(url, 'Generic')
-
-        media_files_info = playlist_files_info[0]
-        video_formats = self.__get_video_formats(media_files_info)
-        video_thumbnails = self.__get_thumbnails(media_files_info)
-        video_timestamp = parse_iso8601(media_files_info.get('MediaInfo', {}).get('PublishDate'))
-
-        title = self._og_search_title(webpage)
-        description = self._html_search_meta('description', webpage, default=None)
-
-        return {
-            'id': media_files_info.get('EntryName'),
-            'title': title,
-            'formats': video_formats,
-            'thumbnails': video_thumbnails,
-            'description': description,
-            'timestamp': video_timestamp
-        }
-
-    def __extract_from_webpage(self, display_id, webpage):
-        """Extracts the media info JSON object for the video for the provided web page."""
-        embed_url = self.__extract_embed_url(webpage)
-        embed_regex = r'(?:[a-zA-Z0-9]+\.)?lcp\.fr/embed/(?P<clip_id>[A-za-z0-9]+)/(?P<player_id>[A-za-z0-9]+)/(?P<skin_name>[^\/]+)'
-
-        clip_id = self._search_regex(embed_regex, embed_url, 'clip id', group='clip_id', default=None)
-        player_id = self._search_regex(embed_regex, embed_url, 'player id', group='player_id', default=None)
-        skin_name = self._search_regex(embed_regex, embed_url, 'skin name', group='skin_name', default=None)
-
-        # Check whether the matches failed, which might be when dealing with other players (e.g., dailymotion stream)
-        if not clip_id or not player_id or not skin_name:
-            return None
-
-        return self.__extract_from_player(display_id, clip_id, player_id, skin_name)
-
-    def __extract_embed_url(self, webpage):
-        return self._search_regex(
-            r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
-            webpage, 'embed url', group='url')
-
-    def __extract_from_player(self, display_id, clip_id, player_id, skin_name):
-        """Extracts the JSON object containing the required media info from the embedded arkena player"""
-        arkena_url = 'http://play.arkena.com/config/avp/v1/player/media/{0}/{1}/{2}/?callbackMethod=?'.format(clip_id,
-                                                                                                              skin_name,
-                                                                                                              player_id)
-        arkena_info = self._download_webpage(arkena_url, 'clip_info_' + clip_id)
-        arkena_info_regex = r'\?\((?P<json>.*)\);'
-        return self._parse_json(self._search_regex(arkena_info_regex, arkena_info, 'json', group='json'),
-                                display_id)
-
-    def __get_thumbnails(self, media_files_info):
-        thumbnails = []
-        media_thumbnail_info = media_files_info.get('MediaInfo', {}).get('Poster')
-        if not media_thumbnail_info:
-            return None
-        for thumbnail in media_thumbnail_info:
-            thumbnail_url = thumbnail.get('Url')
-            if not thumbnail_url:
-                continue
-            thumbnails.append({
-                'url': thumbnail_url,
-                'width': int_or_none(thumbnail.get('Size'))
-            })
-        return thumbnails
-
-    def __get_video_formats(self, media_files_info):
-        formats = []
-        media_files = media_files_info.get('MediaFiles')
-        if not media_files:
-            return None
-
-        formats.extend(self.__get_mp4_video_formats(media_files))
-        self._sort_formats(formats)
-        return formats
-
-    def __get_mp4_video_formats(self, media_files_json):
-        formats = []
-        mp4_files_json = media_files_json.get('Mp4')
-        if not mp4_files_json:
-            return None
-        for video_info in mp4_files_json:
-            bitrate = int_or_none(video_info.get('Bitrate'), scale=1000)  # Scale bitrate to KBit/s
-            video_url = video_info.get('Url')
-            if not video_url:
-                continue
-            formats.append({
-                'url': video_url,
-                'ext': 'mp4',
-                'tbr': bitrate
-            })
-        return formats
+        title = self._og_search_title(webpage, default=None)
+        return self.url_result(embed_url, 'ArkenaPlay', video_id=display_id, video_title=title)

From 32719d16c375a7df702a36969599ea3a01e460c1 Mon Sep 17 00:00:00 2001
From: Sander van den Oever <sandervdo@gmail.com>
Date: Wed, 9 Mar 2016 17:26:10 +0100
Subject: [PATCH 09/11] Add flash support to Arkenaplay Extractor

---
 youtube_dl/extractor/arkenaplay.py | 33 +++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/arkenaplay.py b/youtube_dl/extractor/arkenaplay.py
index ea6faac31..5f4f65fe2 100644
--- a/youtube_dl/extractor/arkenaplay.py
+++ b/youtube_dl/extractor/arkenaplay.py
@@ -70,14 +70,15 @@ class ArkenaPlayIE(InfoExtractor):
             }
 
     def __extract_from_playlistentry(self, arkena_playlistentry_info):
-        formats = self.__get_video_formats(arkena_playlistentry_info)
         media_info = arkena_playlistentry_info.get('MediaInfo', {})
         thumbnails = self.__get_thumbnails(media_info)
         title = media_info.get('Title')
         description = media_info.get('Description')
+        video_id = media_info.get('VideoId')
         timestamp = parse_iso8601(media_info.get('PublishDate'))
+        formats = self.__get_video_formats(arkena_playlistentry_info, video_id)
         return {
-            'id': arkena_playlistentry_info.get('EntryName'),
+            'id': video_id,
             'title': title,
             'formats': formats,
             'thumbnails': thumbnails,
@@ -100,13 +101,15 @@ class ArkenaPlayIE(InfoExtractor):
             })
         return thumbnails
 
-    def __get_video_formats(self, media_files_info):
+    def __get_video_formats(self, media_files_info, video_id):
         formats = []
         media_files = media_files_info.get('MediaFiles')
         if not media_files:
             return None
 
         formats.extend(self.__get_mp4_video_formats(media_files))
+        formats.extend(self.__get_m3u8_video_formats(media_files, video_id))
+        formats.extend(self.__get_f4m_video_formats(media_files, video_id))
         # TODO <Other video formats>
         self._sort_formats(formats)
         return formats
@@ -127,3 +130,27 @@ class ArkenaPlayIE(InfoExtractor):
                 'tbr': bitrate
             })
         return formats
+
+    def __get_m3u8_video_formats(self, media_files_json, video_id):
+        formats = []
+        m3u8_files_json = media_files_json.get("M3u8")
+        if not m3u8_files_json:
+            return None
+        for video_info in m3u8_files_json:
+            video_url = video_info.get('Url')
+            if not video_url:
+                continue
+            formats = self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+        return formats
+
+    def __get_f4m_video_formats(self, media_files_json, video_id):
+        formats = []
+        f4m_files_json = media_files_json.get("Flash")
+        if not f4m_files_json:
+            return None
+        for video_info in f4m_files_json:
+            video_url = video_info.get("Url")
+            if not video_url:
+                continue
+            formats = self._extract_f4m_formats(video_url, video_id, 'f4m', f4m_id='hds', fatal=False)
+        return formats

From ff32acc9bd2dad5714ee4fc93ee1b12436af985a Mon Sep 17 00:00:00 2001
From: Rob van Bekkum <rob_van_bekkum@hotmail.com>
Date: Thu, 10 Mar 2016 01:33:10 +0100
Subject: [PATCH 10/11] Fixed some bugs in ArkenaPlay Extractor and made
 distinction between flv and fmd

---
 youtube_dl/extractor/arkenaplay.py | 33 ++++++++++++++++++------------
 youtube_dl/extractor/lcp.py        | 15 ++------------
 2 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/youtube_dl/extractor/arkenaplay.py b/youtube_dl/extractor/arkenaplay.py
index 5f4f65fe2..81c6d6e35 100644
--- a/youtube_dl/extractor/arkenaplay.py
+++ b/youtube_dl/extractor/arkenaplay.py
@@ -13,10 +13,10 @@ class ArkenaPlayIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0',
-        'md5': '6cea4f7d13810464ef8485a924fc3333',
+        'md5': '7d857b1af491ec0f6c2610e52df1ff82',
         'info_dict': {
             'id': '327336',
-            'url': 're:http://httpod.scdn.arkena.com/11970/327336_[0-9]+.mp4',
+            'url': 're:http://httpod.scdn.arkena.com/11970/327336.*',
             'ext': 'mp4',
             'title': '327336',
             'upload_date': '20160225',
@@ -40,7 +40,7 @@ class ArkenaPlayIE(InfoExtractor):
         display_id = self._search_regex(self._VALID_URL, url, 'host_name', group='id')
         webpage = self._download_webpage(url, display_id)
 
-        media_url_regex = '"(?P<mediainfo>(?P<host>.*)/config/avp/.*/\?callbackMethod=\?)"'
+        media_url_regex = '"(?P<mediainfo>(?P<host>.*)/(c|C)onfig/.*\?callbackMethod=\?)"'
         media_url = self._html_search_regex(media_url_regex, webpage, 'arkena_media_info_url')
         hostname = self._html_search_regex(media_url_regex, webpage, 'arkena_media_host', group='host')
         if not hostname:
@@ -109,8 +109,8 @@ class ArkenaPlayIE(InfoExtractor):
 
         formats.extend(self.__get_mp4_video_formats(media_files))
         formats.extend(self.__get_m3u8_video_formats(media_files, video_id))
-        formats.extend(self.__get_f4m_video_formats(media_files, video_id))
-        # TODO <Other video formats>
+        formats.extend(self.__get_flash_video_formats(media_files, video_id))
+        # TODO <DASH (mpd) formats>
         self._sort_formats(formats)
         return formats
 
@@ -133,24 +133,31 @@ class ArkenaPlayIE(InfoExtractor):
 
     def __get_m3u8_video_formats(self, media_files_json, video_id):
         formats = []
-        m3u8_files_json = media_files_json.get("M3u8")
+        m3u8_files_json = media_files_json.get('M3u8')
         if not m3u8_files_json:
             return None
         for video_info in m3u8_files_json:
             video_url = video_info.get('Url')
             if not video_url:
                 continue
-            formats = self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+            formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
         return formats
 
-    def __get_f4m_video_formats(self, media_files_json, video_id):
+    def __get_flash_video_formats(self, media_files_json, video_id):
         formats = []
-        f4m_files_json = media_files_json.get("Flash")
-        if not f4m_files_json:
+        flash_files_json = media_files_json.get('Flash')
+        if not flash_files_json:
             return None
-        for video_info in f4m_files_json:
-            video_url = video_info.get("Url")
+        for video_info in flash_files_json:
+            video_url = video_info.get('Url')
             if not video_url:
                 continue
-            formats = self._extract_f4m_formats(video_url, video_id, 'f4m', f4m_id='hds', fatal=False)
+            video_type = video_info.get('Type')
+            if video_type == 'application/hds+xml':
+                formats.extend(self._extract_f4m_formats(video_url, video_id, f4m_id='hds', fatal=False))
+            elif video_type == 'video/x-flv':
+                formats.append({
+                    'url': video_url,
+                    'ext': 'flv'
+                })
         return formats
diff --git a/youtube_dl/extractor/lcp.py b/youtube_dl/extractor/lcp.py
index 22c9642a6..38d7502df 100644
--- a/youtube_dl/extractor/lcp.py
+++ b/youtube_dl/extractor/lcp.py
@@ -8,26 +8,15 @@ class LcpIE(InfoExtractor):
 
     _TESTS = [{
         'url': 'http://www.lcp.fr/la-politique-en-video/schwartzenberg-prg-preconise-francois-hollande-de-participer-une-primaire',
-        'md5': 'aecf5a330cfc1061445a9af5b2df392d',
+        'md5': 'ab96c4dae94322ece1e98d97c8dc7807',
         'info_dict': {
             'id': 'd56d03e9',
-            'url': 're:http://httpod.scdn.arkena.com/11970/d56d03e9_[0-9]+.mp4',
+            'url': 're:http://httpod.scdn.arkena.com/11970/d56d03e9_.*',
             'ext': 'mp4',
             'title': 'd56d03e9',
             'upload_date': '20160226',
             'timestamp': 1456488895
         }
-    }, {
-        'url': 'http://www.lcp.fr/emissions/parlementair',
-        'md5': '9b63769445cbe5f26952bef71f281e8c',
-        'info_dict': {
-            'id': '327499',
-            'url': 're:http://httpod.scdn.arkena.com/11970/327499_[0-9]+.mp4',
-            'ext': 'mp4',
-            'title': '327499',
-            'upload_date': '20160304',
-            'timestamp': 1457098658
-        }
     }, {
         'url': 'http://www.lcp.fr/le-direct',
         'info_dict': {

From d4951f7489e9b84b3b76842a6c8bfa1f3b47cd44 Mon Sep 17 00:00:00 2001
From: Rob van Bekkum <rob_van_bekkum@hotmail.com>
Date: Fri, 11 Mar 2016 01:24:51 +0100
Subject: [PATCH 11/11] Added support for different formats (DASH mpd, webm)
 and refactored ArkenaPlay extractor code

---
 youtube_dl/extractor/arkenaplay.py | 116 +++++++++++++----------------
 1 file changed, 52 insertions(+), 64 deletions(-)

diff --git a/youtube_dl/extractor/arkenaplay.py b/youtube_dl/extractor/arkenaplay.py
index 81c6d6e35..0061ea196 100644
--- a/youtube_dl/extractor/arkenaplay.py
+++ b/youtube_dl/extractor/arkenaplay.py
@@ -5,15 +5,16 @@ from ..utils import (
     int_or_none,
     parse_iso8601
 )
+import re
 
 
 class ArkenaPlayIE(InfoExtractor):
     IE_NAME = 'ArkenaPlay'
-    _VALID_URL = r'(?P<host>https?://(?:www\.)?play\..*\..*)/embed/.*(?P<id>\d+)?/.*'
+    _VALID_URL = r'(?P<shortcut>arkena:(?P<version>[0-9]+):(?P<mediatype>[A-Za-z0-9]+):(?P<mediaId>[^:]+):(?P<widgetsettingId>[A-Za-z0-9]+):(?P<accountId>[A-Za-z0-9]+))|(?:(?P<host>https?://(?:www\.)?play\..*\..*)/embed/(?:avp/v[0-9]+/player/[A-Za-z0-9]+/)?(?P<id>.*)?)'
 
     _TESTS = [{
         'url': 'http://play.lcp.fr/embed/327336/131064/darkmatter/0',
-        'md5': '7d857b1af491ec0f6c2610e52df1ff82',
+        'md5': '6cea4f7d13810464ef8485a924fc3333',
         'info_dict': {
             'id': '327336',
             'url': 're:http://httpod.scdn.arkena.com/11970/327336.*',
@@ -23,7 +24,8 @@ class ArkenaPlayIE(InfoExtractor):
             'timestamp': 1456391602
         }
     }, {
-        'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
+        # Shortcut for: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
+        'url': 'arkena:2:media:b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe:1:129411',
         'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
         'info_dict': {
             'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
@@ -37,15 +39,26 @@ class ArkenaPlayIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        display_id = self._search_regex(self._VALID_URL, url, 'host_name', group='id')
-        webpage = self._download_webpage(url, display_id)
+        mobj = re.match(self._VALID_URL, url)
+        if mobj.group('shortcut'):
+            version = mobj.group('version')
+            mediatype = mobj.group('mediatype')
+            mediaid = mobj.group('mediaId')
+            widgetsettingid = mobj.group('widgetsettingId')
+            accountid = mobj.group('accountId')
+            display_id = '{0}:{1}:{2}:{3}'.format(mediatype, mediaid, widgetsettingid, accountid)
+            media_url = 'https://play.arkena.com/config/avp/v{0}/player/{1}/{2}/{3}/{4}/?callbackMethod=?'.format(
+                version, mediatype, mediaid, widgetsettingid, accountid)
+        else:
+            display_id = self._search_regex(self._VALID_URL, url, 'host_name', group='id')
+            webpage = self._download_webpage(url, display_id)
 
-        media_url_regex = '"(?P<mediainfo>(?P<host>.*)/(c|C)onfig/.*\?callbackMethod=\?)"'
-        media_url = self._html_search_regex(media_url_regex, webpage, 'arkena_media_info_url')
-        hostname = self._html_search_regex(media_url_regex, webpage, 'arkena_media_host', group='host')
-        if not hostname:
-            hostname = self._search_regex(self._VALID_URL, url, 'host_name', group='host')
-            media_url = hostname + media_url
+            media_url_regex = '"(?P<mediainfo>(?P<host>.*)/(c|C)onfig/.*\?callbackMethod=\?)"'
+            media_url = self._html_search_regex(media_url_regex, webpage, 'arkena_media_info_url')
+            hostname = self._html_search_regex(media_url_regex, webpage, 'arkena_media_host', group='host')
+            if not hostname:
+                hostname = self._search_regex(self._VALID_URL, url, 'host_name', group='host')
+                media_url = hostname + media_url
 
         # Extract the required info of the media files gathered in a dictionary
         arkena_info = self._download_webpage(media_url, 'arkena_info_')
@@ -107,57 +120,32 @@ class ArkenaPlayIE(InfoExtractor):
         if not media_files:
             return None
 
-        formats.extend(self.__get_mp4_video_formats(media_files))
-        formats.extend(self.__get_m3u8_video_formats(media_files, video_id))
-        formats.extend(self.__get_flash_video_formats(media_files, video_id))
-        # TODO <DASH (mpd) formats>
+        for type_name, video_files_json in media_files.iteritems():
+            for video_info in video_files_json:
+                video_url = video_info.get('Url')
+                if not video_url:
+                    continue
+                type = video_info.get('Type')
+                if type_name in ['Mp4', 'WebM', 'Flash']:
+                    bitrate = int_or_none(video_info.get('Bitrate'), scale=1000)
+                    ext = None
+                    if type == 'video/mp4':
+                        ext = 'mp4'
+                    elif type == 'video/webm':
+                        ext = 'webm'
+                    elif type == 'video/x-flv':
+                        ext = 'flv'
+                    formats.append({
+                        'url': video_url,
+                        'ext': ext,
+                        'tbr': bitrate
+                    })
+                elif type_name == 'M3u8' and type == 'application/x-mpegURL':
+                    formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+                elif type_name == 'Flash' and type == 'application/hds+xml':
+                    formats.extend(self._extract_f4m_formats(video_url, video_id, f4m_id='hds', fatal=False))
+                elif type_name == 'Dash' and type == 'application/dash+xml':
+                    formats.extend(self._extract_mpd_formats(video_url, video_id, mpd_id='dash', fatal=False))
+
         self._sort_formats(formats)
-        return formats
-
-    def __get_mp4_video_formats(self, media_files_json):
-        formats = []
-        mp4_files_json = media_files_json.get('Mp4')
-        if not mp4_files_json:
-            return None
-        for video_info in mp4_files_json:
-            bitrate = int_or_none(video_info.get('Bitrate'), scale=1000)  # Scale bitrate to KBit/s
-            video_url = video_info.get('Url')
-            if not video_url:
-                continue
-            formats.append({
-                'url': video_url,
-                'ext': 'mp4',
-                'tbr': bitrate
-            })
-        return formats
-
-    def __get_m3u8_video_formats(self, media_files_json, video_id):
-        formats = []
-        m3u8_files_json = media_files_json.get('M3u8')
-        if not m3u8_files_json:
-            return None
-        for video_info in m3u8_files_json:
-            video_url = video_info.get('Url')
-            if not video_url:
-                continue
-            formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
-        return formats
-
-    def __get_flash_video_formats(self, media_files_json, video_id):
-        formats = []
-        flash_files_json = media_files_json.get('Flash')
-        if not flash_files_json:
-            return None
-        for video_info in flash_files_json:
-            video_url = video_info.get('Url')
-            if not video_url:
-                continue
-            video_type = video_info.get('Type')
-            if video_type == 'application/hds+xml':
-                formats.extend(self._extract_f4m_formats(video_url, video_id, f4m_id='hds', fatal=False))
-            elif video_type == 'video/x-flv':
-                formats.append({
-                    'url': video_url,
-                    'ext': 'flv'
-                })
-        return formats
+        return formats
\ No newline at end of file