From 54ece30cc7adf8567190f71cfdcd2ccf9a6bfbd0 Mon Sep 17 00:00:00 2001 From: Leonardo Taccari Date: Fri, 15 Apr 2016 11:52:14 +0200 Subject: [PATCH] [la7] Update to fix the extractor to reflect the current reality La7.it no longer uses the richplayer and the parsing logic is now completely changed. Completely rework it to fix that. XXX: Unfortunately I was able to test it only using a connection from Italy. XXX: I am not sure if it is georestricted or not. --- youtube_dl/extractor/la7.py | 57 +++++++++++-------------------------- 1 file changed, 17 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py index b08f6e3c9..6b9d6633c 100644 --- a/youtube_dl/extractor/la7.py +++ b/youtube_dl/extractor/la7.py @@ -1,60 +1,37 @@ +# encoding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - parse_duration, -) class LA7IE(InfoExtractor): IE_NAME = 'la7.tv' - _VALID_URL = r'''(?x) - https?://(?:www\.)?la7\.tv/ - (?: - richplayer/\?assetid=| - \?contentId= - ) - (?P[0-9]+)''' + _VALID_URL = r'https?://(?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/(?P.+)' _TEST = { - 'url': 'http://www.la7.tv/richplayer/?assetid=50355319', - 'md5': 'ec7d1f0224d20ba293ab56cf2259651f', + 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722', + 'md5': '8b613ffc0c4bf9b9e377169fc19c214c', 'info_dict': { - 'id': '50355319', + 'id': '0_42j6wd36', 'ext': 'mp4', - 'title': 'IL DIVO', - 'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci', - 'duration': 6254, + 'title': 'Inc.Cool8', + 'thumbnail': 're:http://.*\.jpg', + 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico', }, - 'skip': 'Blocked in the US', } def _real_extract(self, url): - video_id = self._match_id(url) - xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id - doc = self._download_xml(xml_url, video_id) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) - video_title = doc.find('title').text - description = doc.find('description').text - duration = parse_duration(doc.find('duration').text) - thumbnail = doc.find('img').text - view_count = int(doc.find('views').text) - - prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:') - - formats = [{ - 'format': vnode.find('quality').text, - 'tbr': int(vnode.find('quality').text), - 'url': vnode.find('fms').text.strip().replace('mp4:', prefix), - } for vnode in doc.findall('.//videos/video')] - self._sort_formats(formats) + video_id = self._search_regex(r'"entry_id"(?:\s*):(?:\s*)"([^"]+)"', webpage, 'video ID') + video_url = self._search_regex(r'src_mp4(?:\s*):(?:\s*)"([^"]+)"', webpage, 'video URL') return { 'id': video_id, - 'title': video_title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - 'view_count': view_count, + 'display_id': display_id, + 'url': video_url, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), }