[la7] Update to fix the extractor to reflect the current reality

La7.it no longer uses the richplayer and the parsing logic is now completely changed. Completely rework it to fix that. XXX: Unfortunately I was able to test it only using a connection from Italy. XXX: I am not sure if it is georestricted or not.
2026-06-06 23:53:46 +08:00 · 2016-04-15 11:52:14 +02:00
parent 66d40ae3a5
commit 54ece30cc7
1 changed files with 17 additions and 40 deletions
@@ -1,60 +1,37 @@
+# encoding: utf-8
 from __future__ import unicode_literals

 from .common import InfoExtractor
-from ..utils import (
-    parse_duration,
-)


 class LA7IE(InfoExtractor):
    IE_NAME = 'la7.tv'
-    _VALID_URL = r'''(?x)
-        https?://(?:www\.)?la7\.tv/
-        (?:
-            richplayer/\?assetid=|
-            \?contentId=
-        )
-        (?P<id>[0-9]+)'''
+    _VALID_URL = r'https?://(?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/(?P<id>.+)'

    _TEST = {
-        'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
-        'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
+        'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
+        'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
        'info_dict': {
-            'id': '50355319',
+            'id': '0_42j6wd36',
            'ext': 'mp4',
-            'title': 'IL DIVO',
-            'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci',
-            'duration': 6254,
+            'title': 'Inc.Cool8',
+            'thumbnail': 're:http://.*\.jpg',
+            'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto  atletico',
        },
-        'skip': 'Blocked in the US',
    }

    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
-        doc = self._download_xml(xml_url, video_id)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)

-        video_title = doc.find('title').text
-        description = doc.find('description').text
-        duration = parse_duration(doc.find('duration').text)
-        thumbnail = doc.find('img').text
-        view_count = int(doc.find('views').text)
-
-        prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:')
-
-        formats = [{
-            'format': vnode.find('quality').text,
-            'tbr': int(vnode.find('quality').text),
-            'url': vnode.find('fms').text.strip().replace('mp4:', prefix),
-        } for vnode in doc.findall('.//videos/video')]
-        self._sort_formats(formats)
+        video_id = self._search_regex(r'"entry_id"(?:\s*):(?:\s*)"([^"]+)"', webpage, 'video ID')
+        video_url = self._search_regex(r'src_mp4(?:\s*):(?:\s*)"([^"]+)"', webpage, 'video URL')

        return {
            'id': video_id,
-            'title': video_title,
-            'description': description,
-            'thumbnail': thumbnail,
-            'duration': duration,
-            'formats': formats,
-            'view_count': view_count,
+            'display_id': display_id,
+            'url': video_url,
+            'title': self._og_search_title(webpage),
+            'description': self._og_search_description(webpage),
+            'thumbnail': self._og_search_thumbnail(webpage),
        }