diff --git a/youtube_dl/extractor/tv5unisca.py b/youtube_dl/extractor/tv5unisca.py index 04bb981a4..2897c4124 100644 --- a/youtube_dl/extractor/tv5unisca.py +++ b/youtube_dl/extractor/tv5unisca.py @@ -13,8 +13,33 @@ import re class TV5UnisCaIE(InfoExtractor): IE_DESC = 'TV5UNISCA' _VALID_URL = r'https?://(?:www\.)?tv5unis\.ca/videos/(?P[^?#]+)' - _TESTS = [] - _GEO_COUNTRIES = ['CA', 'FR'] + _TESTS = [{ + 'url': 'https://www.tv5unis.ca/videos/expedition-kayak/saisons/1/episodes/2', + 'info_dict': { + 'id': 'expedition-kayak/saisons/1/episodes/2', + 'episode_number': 2, + 'season_number': 1, + 'ext': 'm3u8', + 'title': 'Expédition kayak - Gaspésie 2', + 'description': 'md5:aecf01897141d3997f10685b3f2662ef', + 'upload_date': '20200417', + 'timestamp': 1587085203, + } + }, { + 'url': 'https://www.tv5unis.ca/videos/la-bataille-de-notre-dame', + 'info_dict': { + 'id': 'la-bataille-de-notre-dame', + 'ext': 'm3u8', + 'title': 'La bataille de Notre-Dame', + 'description': 'md5:b69a25dbe9b1880eadad219af7372a7c', + 'upload_date': '20200414', + 'timestamp': 1586824384, + }, + 'params': { + 'skip_download': True, + } + }] + _GEO_BYPASS = False def _real_extract(self, url): @@ -22,7 +47,7 @@ class TV5UnisCaIE(InfoExtractor): webpage = self._download_webpage(url, display_id) next_data_dict = self._parse_json( - get_element_by_id('__NEXT_DATA__', webpage), display_id)\ + get_element_by_id('__NEXT_DATA__', webpage), display_id) \ .get('props').get('apolloState') info_dict = self._json_ld( @@ -30,6 +55,11 @@ class TV5UnisCaIE(InfoExtractor): display_id ) + if info_dict.get('season', ''): + info_dict['title'] = ' - '.join((info_dict.get('season', ''), info_dict.get('episode', ''))) + + info_dict['id'] = info_dict['display_id'] = display_id + formats = [] for key in filter(lambda k: re.match(r'\$Video:\d+\.encodings\.', k), next_data_dict.keys()): url = next_data_dict[key].get('url') @@ -37,15 +67,13 @@ class TV5UnisCaIE(InfoExtractor): continue if url.endswith('.m3u8'): formats.extend(self._extract_m3u8_formats(url, display_id)) - if url.endswith('.ism/manifest'): - formats.extend(self._extract_ism_formats(url, display_id, ism_id='mss', fatal=False)) if url.endswith('.mp4'): formats.append({ 'url': url, 'format_id': 'http' }) - info_dict['id'] = info_dict['display_id'] = display_id + self._sort_formats(formats) info_dict['formats'] = formats return info_dict