1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-02-10 12:22:54 +08:00

[tegnamedia] Add new information extractors

This commit is contained in:
Alex Seiler 2017-02-17 22:58:07 +01:00
parent cf3704c132
commit b602c16f61
2 changed files with 129 additions and 0 deletions

View File

@ -932,6 +932,11 @@ from .teamcoco import TeamcocoIE
from .teamfourstar import TeamFourStarIE from .teamfourstar import TeamFourStarIE
from .techtalks import TechTalksIE from .techtalks import TechTalksIE
from .ted import TEDIE from .ted import TEDIE
from .tegnamedia import (
NineNewsIE,
THVElevenIE,
TwelveNewsIE,
)
from .tele13 import Tele13IE from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE from .telecinco import TelecincoIE

View File

@ -0,0 +1,124 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
str_or_none,
unified_timestamp,
)
class TegnaMediaIE(InfoExtractor):
SUBSCRIPTION_KEY = ''
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id)
player_info = self._html_search_regex(
r'<div[^>]+class="js-jwloader"(?P<info>[^>]+)', webpage, 'player info')
data_id = self._search_regex(
r'data-id="(?P<id>\d+)"', player_info, 'video id')
data_site = self._search_regex(
r'data-site="(?P<data_site>\d+)"', player_info, 'data site')
api_url = 'http://api.tegna-tv.com/video/v2/getAllVideoPathsById/%s/%s?subscription-key=%s' % (data_id, data_site, self.SUBSCRIPTION_KEY)
video_json = self._download_json(api_url, show_id)
video_id = str_or_none(video_json['Id'])
title = str_or_none(video_json['Title'])
description = str_or_none(video_json['Description'])
thumbnail = str_or_none(video_json['Image'])
duration = parse_duration(str_or_none(video_json['VideoLength']))
timestamp = unified_timestamp(str_or_none(video_json['DateCreated']))
formats = []
for elem in video_json.get('Sources'):
path = str_or_none(elem['Path'])
if elem.get('Format') == 'MP4':
formats.append(
{
'url': path,
'format_id': 'mp4-' + str_or_none(elem['EncodingRate']),
'vbr': int_or_none(elem['EncodingRate']),
}
)
elif elem.get('Format') == 'HLS':
forms = self._extract_m3u8_formats(
path, video_id, ext='mp4', entry_protocol='m3u8_native')
formats += forms
elif elem.get('Format') == 'HDS':
# I am not sure how to extract this format, I have tried the
# following, but this format seems to be only mentioned
# in the json, but not really available:
# forms = self._extract_akamai_formats(path, video_id)
# formats += forms
pass
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'formats': formats,
}
class NineNewsIE(TegnaMediaIE):
_VALID_URL = r'https?://(?:www\.)?9news\.com/.+/(?P<id>[0-9]+)'
SUBSCRIPTION_KEY = 'ae1d3e46c9914e9b87757fead91d7654'
_TEST = {
'url': 'http://www.9news.com/news/local/father-worries-about-immigration-status/408808900',
'md5': 'e367c89e52eed4ff3bcc696d664e4f4b',
'info_dict': {
'id': '2512310',
'ext': 'mp4',
'title': 'Father worries about immigration status',
'description': '9NEWS @ 9. 2/15/2017',
'thumbnail': 'http://kusa-download.edgesuite.net/video/2512310/2512310_Still.jpg',
'duration': 96.0,
'timestamp': 1487218434,
'upload_date': '20170216',
}
}
def _real_extract(self, url):
return super(NineNewsIE, self)._real_extract(url)
class TwelveNewsIE(TegnaMediaIE):
_VALID_URL = r'https?://(?:www\.)?12news\.com/.+/(?P<id>[0-9]+)'
SUBSCRIPTION_KEY = 'd721cdf2210c493cb8a194d1e53b4ef5'
_TEST = {
'url': 'http://www.12news.com/news/local/valley/dps-stops-wrong-way-driver-after-several-miles/408864874',
'info_dict': {
'id': '2514219',
'ext': 'mp4',
'title': '''Megan Melanson's initial court appearance''',
'description': 'md5:24188e754669c29700e8dd6d19e4943b',
'timestamp': 1487360943,
'upload_date': '20170217',
},
'params': {
'skip_download': True,
}
}
def _real_extract(self, url):
return super(TwelveNewsIE, self)._real_extract(url)
class THVElevenIE(TegnaMediaIE):
_VALID_URL = r'https?://(?:www\.)?thv11\.com/.+/(?P<id>[0-9]+)'
SUBSCRIPTION_KEY = 'd8d2110b71e5490f8652a270ef1cc8c2'
def _real_extract(self, url):
return super(THVElevenIE, self)._real_extract(url)