From 54ece30cc7adf8567190f71cfdcd2ccf9a6bfbd0 Mon Sep 17 00:00:00 2001
From: Leonardo Taccari <iamleot@gmail.com>
Date: Fri, 15 Apr 2016 11:52:14 +0200
Subject: [PATCH] [la7] Update to fix the extractor to reflect the current
 reality

La7.it no longer uses the richplayer and the parsing logic is now
completely changed. Completely rework it to fix that.

XXX: Unfortunately I was able to test it only using a connection from Italy.
XXX: I am not sure if it is georestricted or not.
---
 youtube_dl/extractor/la7.py | 57 +++++++++++--------------------------
 1 file changed, 17 insertions(+), 40 deletions(-)
diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py
index b08f6e3c9..6b9d6633c 100644
--- a/youtube_dl/extractor/la7.py
+++ b/youtube_dl/extractor/la7.py
@@ -1,60 +1,37 @@
+# encoding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import (
-    parse_duration,
-)
 
 
 class LA7IE(InfoExtractor):
     IE_NAME = 'la7.tv'
-    _VALID_URL = r'''(?x)
-        https?://(?:www\.)?la7\.tv/
-        (?:
-            richplayer/\?assetid=|
-            \?contentId=
-        )
-        (?P<id>[0-9]+)'''
+    _VALID_URL = r'https?://(?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/(?P<id>.+)'
 
     _TEST = {
-        'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
-        'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
+        'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
+        'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
         'info_dict': {
-            'id': '50355319',
+            'id': '0_42j6wd36',
             'ext': 'mp4',
-            'title': 'IL DIVO',
-            'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci',
-            'duration': 6254,
+            'title': 'Inc.Cool8',
+            'thumbnail': 're:http://.*\.jpg',
+            'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto  atletico',
         },
-        'skip': 'Blocked in the US',
     }
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
-        doc = self._download_xml(xml_url, video_id)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
 
-        video_title = doc.find('title').text
-        description = doc.find('description').text
-        duration = parse_duration(doc.find('duration').text)
-        thumbnail = doc.find('img').text
-        view_count = int(doc.find('views').text)
-
-        prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:')
-
-        formats = [{
-            'format': vnode.find('quality').text,
-            'tbr': int(vnode.find('quality').text),
-            'url': vnode.find('fms').text.strip().replace('mp4:', prefix),
-        } for vnode in doc.findall('.//videos/video')]
-        self._sort_formats(formats)
+        video_id = self._search_regex(r'"entry_id"(?:\s*):(?:\s*)"([^"]+)"', webpage, 'video ID')
+        video_url = self._search_regex(r'src_mp4(?:\s*):(?:\s*)"([^"]+)"', webpage, 'video URL')
 
         return {
             'id': video_id,
-            'title': video_title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'formats': formats,
-            'view_count': view_count,
+            'display_id': display_id,
+            'url': video_url,
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
         }