1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-02-02 22:52:52 +08:00
youtube-dl/youtube_dl/extractor/olympicchannel.py

97 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
import re
class OlympicChannelIE(InfoExtractor):
IE_NAME = 'olympicchannel'
_VALID_URL = r'https?://(?:www\.)?olympicchannel\.com/(?P<language>..)/(original-series|video)/detail/(.+?/.+?/)?(.+?/)?(?P<display_id>.+)/?'
_TESTS = [
{
'url': 'https://www.olympicchannel.com/en/video/detail/news-of-the-week-with-ash-tulloch-x9414/',
'md5': 'aee7e665ad4bf45936e0f5d861e56ac5',
'info_dict': {
'_type': 'video',
'id': 'E18112220',
'ext': 'mp4',
'title': 'News of the Week with Ash Tulloch',
'thumbnail': 'https://img.olympicchannel.com/images/image/private/t_social_share_thumb/primary/fzcmi2e6kji6cnjjs1xz',
'description': 'Exclusive interviews with Rika Kihira after her Grand Prix finals win, Yuzuru Hanyu&#39;s coach on the injured star, and Valerie Adams on Tokyo.',
}
},
{
'url': 'https://www.olympicchannel.com/en/original-series/detail/fashion-behind-the-games/fashion-behind-the-games-season-1/episodes/past-and-present-field-hockey/',
'md5': '0da7ace7ee712e777e56cdc736edfcb5',
'info_dict': {
'_type': 'video',
'id': 'E17060701',
'ext': 'mp4',
'title': 'Past and Present Field hockey',
'thumbnail': 'https://img.olympicchannel.com/images/image/private/t_social_share_thumb/primary/z61ca9vgb4h6t6r2se1f',
'description': 'We reunite a pair of &quot;Las Leonas&quot; and travel back to the early days of field hockey.',
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) # video id isn't included in URL, but is in the URL to the video
display_id = mobj.group('display_id') # display id is in URL, however
webpage = self._download_webpage(url, display_id)
m3u8_url = self._html_search_regex(r'<meta name="video_url" content="(.+?)" />', webpage, 'm3u8_url') # extract URL of video's m3u8 playlist
title = self._html_search_regex(r'<meta name="episode_title" content="(.+?)" />', webpage, 'title') or self._html_search_regex(r'<title>(.+?) \| Olympic Channel</title>', webpage, 'title') # extract title
video_id = self._search_regex(r'_(.........)_', m3u8_url, 'id') # extract unique video id from m3u8
thumbnail_url = self._html_search_regex(r'<meta name="og:image" content="(.+?)" />', webpage, 'thumbnail')
return {
'_type': 'video',
'display_id': display_id,
'id': video_id,
'title': title,
'description': self._og_search_description(webpage),
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'),
'thumbnail': thumbnail_url,
}
'''
class OlympicChannelOriginalIE(InfoExtractor):
IE_NAME = 'olympicchannel:original'
_VALID_URL = r'https?://(?:www\.)?olympicchannel\.com/(?P<language>..)/original-series/detail/(?P<series_name>.+)/episodes/(?P<display_id>.+)/?'
_TEST = {
'url': 'https://www.olympicchannel.com/en/video/detail/news-of-the-week-with-ash-tulloch-x9414/',
'md5': 'aee7e665ad4bf45936e0f5d861e56ac5',
'info_dict': {
'_type': 'video',
'id': 'E17060701',
'ext': 'm3u8',
'title': 'Past and Present Field hockey',
'thumbnail': 'https://img.olympicchannel.com/images/image/private/t_social_share_thumb/primary/z61ca9vgb4h6t6r2se1f',
'description': 'We reunite a pair of &quot;Las Leonas&quot; and travel back to the early days of field hockey.',
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type (for example int or float)
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) # video id isn't included in URL, but is in the URL to the video
display_id = mobj.group('display_id') # display id is in URL, however
webpage = self._download_webpage(url, display_id)
m3u8_url = self._html_search_regex(r'<meta name="video_url" content="(.+?)" />', webpage, 'm3u8_url') # extract URL of video's m3u8 playlist
title = self._html_search_regex(r'<meta name="episode_title" content="(.+?)" />', webpage, 'title') or self._html_search_regex(r'<title>(.+?) \| Olympic Channel</title>', webpage, 'title') # extract title
video_id = self._search_regex(r'St1-_(.+)_', m3u8_url, 'id') # extract unique video id from m3u8
thumbnail_url = self._html_search_regex(r'<meta name="og:image" content="(.+?)" />', webpage, 'thumbnail')
return {
'_type': 'multi_video',
'display_id': display_id,
'id': video_id,
'title': title,
'description': self._og_search_description(webpage),
'formats': self._extract_m3u8_formats(m3u8_url, video_id),
'thumbnail': thumbnail_url,
}
'''