1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-09 12:27:15 +08:00

First implementation for the TV5UnisCa extractor

This commit is contained in:
Frederic Bournival 2020-04-19 17:13:54 -04:00
parent 00eb865b3c
commit 5333bea24f
2 changed files with 52 additions and 0 deletions

View File

@ -1181,6 +1181,7 @@ from .tv2dk import (
from .tv2hu import TV2HuIE from .tv2hu import TV2HuIE
from .tv4 import TV4IE from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE from .tv5mondeplus import TV5MondePlusIE
from .tv5unisca import TV5UnisCaIE
from .tva import TVAIE from .tva import TVAIE
from .tvanouvelles import ( from .tvanouvelles import (
TVANouvellesIE, TVANouvellesIE,

View File

@ -0,0 +1,51 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
get_element_by_id
)
import re
class TV5UnisCaIE(InfoExtractor):
IE_DESC = 'TV5UNISCA'
_VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/(?P<id>[^?#]+)'
_TESTS = []
_GEO_BYPASS = False
def _real_extract(self, format_url):
display_id = self._match_id(format_url)
webpage = self._download_webpage(format_url, display_id)
next_data_dict = self._parse_json(
get_element_by_id('__NEXT_DATA__', webpage), display_id)\
.get('props').get('apolloState')
info_dict = self._json_ld(
next_data_dict['$ArtisanBlocksPageMetaData:50.blockConfiguration.pageMetaDataConfiguration']['jsonLd'],
display_id
)
formats = []
for key in filter(lambda k: re.match(r'\$Video:\d+\.encodings\.', k), next_data_dict.keys()):
format_ul = next_data_dict[key].get('url')
if not format_ul:
continue
if format_ul.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(format_ul, display_id))
if format_ul.endswith('.ism/manifest'):
formats.extend(self._extract_ism_formats(format_ul, display_id, ism_id='mss', fatal=False))
if format_ul.endswith('.mp4'):
formats.append({
'url': format_ul,
'format_id': 'http'
})
info_dict['id'] = info_dict['display_id'] = display_id
info_dict['formats'] = formats
return info_dict