1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-13 14:00:11 +08:00
DarkZeros 176653fb9d WIP New IE for Mitele
Mitele has changed the website. This is an incomplete parser, that just
grabs the keys, and the data of the video. But still no real video
JSON/MMC downloading. I will continue when I have more time
2016-10-04 02:04:24 +01:00

186 lines
7.5 KiB
Python

# coding: utf-8
from __future__ import unicode_literals
import re
import uuid
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..utils import (
get_element_by_attribute,
int_or_none,
remove_start,
extract_attributes,
determine_ext,
)
class MiTeleBaseIE(InfoExtractor):
def _get_player_info(self, url, webpage):
player_data = extract_attributes(self._search_regex(
r'(?s)(<ms-video-player.+?</ms-video-player>)',
webpage, 'ms video player'))
video_id = player_data['data-media-id']
config_url = compat_urlparse.urljoin(url, player_data['data-config'])
config = self._download_json(
config_url, video_id, 'Downloading config JSON')
mmc_url = config['services']['mmc']
duration = None
formats = []
for m_url in (mmc_url, mmc_url.replace('/flash.json', '/html5.json')):
mmc = self._download_json(
m_url, video_id, 'Downloading mmc JSON')
if not duration:
duration = int_or_none(mmc.get('duration'))
for location in mmc['locations']:
gat = self._proto_relative_url(location.get('gat'), 'http:')
bas = location.get('bas')
loc = location.get('loc')
ogn = location.get('ogn')
if None in (gat, bas, loc, ogn):
continue
token_data = {
'bas': bas,
'icd': loc,
'ogn': ogn,
'sta': '0',
}
media = self._download_json(
'%s/?%s' % (gat, compat_urllib_parse_urlencode(token_data)),
video_id, 'Downloading %s JSON' % location['loc'])
file_ = media.get('file')
if not file_:
continue
ext = determine_ext(file_)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
file_, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
'thumbnail': player_data.get('data-poster') or config.get('poster', {}).get('imageUrl'),
'duration': duration,
}
def _mitele_extract(self, url, webpage):
#Extract video ID from URL
video_id = self._search_regex(
r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){2}([0-9a-zA-Z]+)/[^/]*',
url, 'video_id', default=None)
#Get the Gigya script for getting auth keys
gigya_url = self._search_regex(r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s*src="([^"]*)">[^>]*</script>', webpage, 'gigya', default=None)
gigya_sc = self._download_webpage(compat_urlparse.urljoin(r'http://www.mitele.es/', gigya_url), 'Downloading gigya script')
#Get a appKey/uuid for getting the session key
appKey = self._search_regex(r'value\("appGridApplicationKey","([0-9a-f]*)"\)', gigya_sc, 'appKey', default=None)
uid = str(uuid.uuid4())
#Extract session keys
session_url = r'https://appgrid-api.cloud.accedo.tv/session?appKey=%s&uuid=%s' % (appKey,uid)
session_json = self._download_json(session_url, video_id, 'Downloading session keys')
sessionKey = str(session_json['sessionKey'])
#Extract paths
paths_url = r'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration?sessionKey='+sessionKey
paths = self._download_json(paths_url, video_id, 'Downloading paths JSON')
#Extract data from the provider with ID
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
data_p = 'http://'+ooyala_s['base_url']+ooyala_s['full_path']+ooyala_s['provider_id']+\
'/docs/'+video_id+'?include_titles=Series,Season&product_name=test&format=full'
data = self._download_json(data_p, id, 'Downloading data JSON')
import pdb; pdb.set_trace()
#TODO THE REST
info = self._get_player_info(url, webpage)
title = self._search_regex(
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>',
webpage, 'title', default=None)
mobj = re.search(r'''(?sx)
class="Destacado-text"[^>]*>.*?<h1>\s*
<span>(?P<series>[^<]+)</span>\s*
<span>(?P<season>[^<]+)</span>\s*
<span>(?P<episode>[^<]+)</span>''', webpage)
series, season, episode = mobj.groups() if mobj else [None] * 3
if not title:
if mobj:
title = '%s - %s - %s' % (series, season, episode)
else:
title = remove_start(self._search_regex(
r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ')
info.update({
'display_id': display_id,
'title': title,
'description': get_element_by_attribute('class', 'text', webpage),
'series': series,
'season': season,
'episode': episode,
})
return info
class MiTeleIE(MiTeleBaseIE):
IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){2}(?P<id>[^/]+)/[^/]*' # NEW FORMAT
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
return self._mitele_extract(url, webpage)
class MiTeleOldIE(MiTeleIE):
IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' # OLD FORMAT
_TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
# MD5 is unstable
'info_dict': {
'id': '0NF1jJnxS1Wu3pHrmvFyw2',
'display_id': 'programa-144',
'ext': 'mp4',
'title': 'Tor, la web invisible',
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
'series': 'Diario de',
'season': 'La redacción',
'episode': 'Programa 144',
'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 2913,
},
}, {
# no explicit title
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/',
'info_dict': {
'id': 'eLZSwoEd1S3pVyUm8lc6F',
'display_id': 'programa-226',
'ext': 'mp4',
'title': 'Cuarto Milenio - Temporada 6 - Programa 226',
'description': 'md5:50daf9fadefa4e62d9fc866d0c015701',
'series': 'Cuarto Milenio',
'season': 'Temporada 6',
'episode': 'Programa 226',
'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 7312,
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
resp = self._download_webpage_handle(url, display_id)
#If the address is "OLD" then will give us a redirected URL for the new address
#Continue with the new parser
return self._mitele_extract(resp[1].url, resp[0])