1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-25 12:12:55 +08:00

[la7] Update to fix the extractor to reflect the current reality

La7.it no longer uses the richplayer and the parsing logic is now
completely changed. Completely rework it to fix that.

XXX: Unfortunately I was able to test it only using a connection from Italy.
XXX: I am not sure if it is georestricted or not.
This commit is contained in:
Leonardo Taccari 2016-04-15 11:52:14 +02:00
parent 66d40ae3a5
commit 54ece30cc7

View File

@ -1,60 +1,37 @@
# encoding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
parse_duration,
)
class LA7IE(InfoExtractor):
IE_NAME = 'la7.tv'
_VALID_URL = r'''(?x)
https?://(?:www\.)?la7\.tv/
(?:
richplayer/\?assetid=|
\?contentId=
)
(?P<id>[0-9]+)'''
_VALID_URL = r'https?://(?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/(?P<id>.+)'
_TEST = {
'url': 'http://www.la7.tv/richplayer/?assetid=50355319',
'md5': 'ec7d1f0224d20ba293ab56cf2259651f',
'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
'info_dict': {
'id': '50355319',
'id': '0_42j6wd36',
'ext': 'mp4',
'title': 'IL DIVO',
'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti e Flavio Bucci',
'duration': 6254,
'title': 'Inc.Cool8',
'thumbnail': 're:http://.*\.jpg',
'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
},
'skip': 'Blocked in the US',
}
def _real_extract(self, url):
video_id = self._match_id(url)
xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id
doc = self._download_xml(xml_url, video_id)
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_title = doc.find('title').text
description = doc.find('description').text
duration = parse_duration(doc.find('duration').text)
thumbnail = doc.find('img').text
view_count = int(doc.find('views').text)
prefix = doc.find('.//fqdn').text.strip().replace('auto:', 'http:')
formats = [{
'format': vnode.find('quality').text,
'tbr': int(vnode.find('quality').text),
'url': vnode.find('fms').text.strip().replace('mp4:', prefix),
} for vnode in doc.findall('.//videos/video')]
self._sort_formats(formats)
video_id = self._search_regex(r'"entry_id"(?:\s*):(?:\s*)"([^"]+)"', webpage, 'video ID')
video_url = self._search_regex(r'src_mp4(?:\s*):(?:\s*)"([^"]+)"', webpage, 'video URL')
return {
'id': video_id,
'title': video_title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
'view_count': view_count,
'display_id': display_id,
'url': video_url,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}