\(([\d,.]+)\)', webpage, 'comment')
- page_params = self._parse_json(self._search_regex(
- r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P{[^}]+})',
- webpage, 'page parameters', group='data', default='{}'),
- video_id, transform_source=js_to_json, fatal=False)
- tags = categories = None
- if page_params:
- tags = page_params.get('tags', '').split(',')
- categories = page_params.get('categories', '').split(',')
+ def extract_list(meta_key):
+ div = self._search_regex(
+ r'(?s)]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)
'
+ % meta_key, webpage, meta_key, default=None)
+ if div:
+ return re.findall(r']+\bhref=[^>]+>([^<]+)', div)
return {
'id': video_id,
@@ -325,8 +322,8 @@ class PornHubIE(PornHubBaseIE):
'comment_count': comment_count,
'formats': formats,
'age_limit': 18,
- 'tags': tags,
- 'categories': categories,
+ 'tags': extract_list('tags'),
+ 'categories': extract_list('categories'),
'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py
index 302f67d96..dd95f99f2 100644
--- a/youtube_dl/extractor/radiocanada.py
+++ b/youtube_dl/extractor/radiocanada.py
@@ -4,16 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_HTTPError
from ..utils import (
- xpath_text,
- find_xpath_attr,
determine_ext,
+ ExtractorError,
int_or_none,
unified_strdate,
- xpath_element,
- ExtractorError,
- determine_protocol,
- unsmuggle_url,
)
@@ -61,107 +57,67 @@ class RadioCanadaIE(InfoExtractor):
'only_matching': True,
}
]
+ _GEO_COUNTRIES = ['CA']
+ _access_token = None
+ _claims = None
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
- app_code, video_id = re.match(self._VALID_URL, url).groups()
-
- metadata = self._download_xml(
- 'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
- video_id, note='Downloading metadata XML', query={
+ def _call_api(self, path, video_id=None, app_code=None, query=None):
+ if not query:
+ query = {}
+ query.update({
+ 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',
+ 'output': 'json',
+ })
+ if video_id:
+ query.update({
'appCode': app_code,
'idMedia': video_id,
})
+ if self._access_token:
+ query['access_token'] = self._access_token
+ try:
+ return self._download_json(
+ 'https://services.radio-canada.ca/media/' + path, video_id, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422):
+ data = self._parse_json(e.cause.read().decode(), None)
+ error = data.get('error_description') or data['errorMessage']['text']
+ raise ExtractorError(error, expected=True)
+ raise
+
+ def _extract_info(self, app_code, video_id):
+ metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']
def get_meta(name):
- el = find_xpath_attr(metadata, './/Meta', 'name', name)
- return el.text if el is not None else None
+ for meta in metas:
+ if meta.get('name') == name:
+ text = meta.get('text')
+ if text:
+ return text
# protectionType does not necessarily mean the video is DRM protected (see
# https://github.com/rg3/youtube-dl/pull/18609).
if get_meta('protectionType'):
self.report_warning('This video is probably DRM protected.')
- device_types = ['ipad']
- if not smuggled_data:
- device_types.append('flash')
- device_types.append('android')
-
- formats = []
- error = None
- # TODO: extract f4m formats
- # f4m formats can be extracted using flashhd device_type but they produce unplayable file
- for device_type in device_types:
- validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx'
- query = {
- 'appCode': app_code,
- 'idMedia': video_id,
- 'connectionType': 'broadband',
- 'multibitrate': 'true',
- 'deviceType': device_type,
- }
- if smuggled_data:
- validation_url = 'https://services.radio-canada.ca/media/validation/v2/'
- query.update(smuggled_data)
- else:
- query.update({
- # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
- 'paysJ391wsHjbOJwvCs26toz': 'CA',
- 'bypasslock': 'NZt5K62gRqfc',
- })
- v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False)
- v_url = xpath_text(v_data, 'url')
- if not v_url:
- continue
- if v_url == 'null':
- error = xpath_text(v_data, 'message')
- continue
- ext = determine_ext(v_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- v_url, video_id, f4m_id='hds', fatal=False))
- else:
- ext = determine_ext(v_url)
- bitrates = xpath_element(v_data, 'bitrates')
- for url_e in bitrates.findall('url'):
- tbr = int_or_none(url_e.get('bitrate'))
- if not tbr:
- continue
- f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url)
- protocol = determine_protocol({'url': f_url})
- f = {
- 'format_id': '%s-%d' % (protocol, tbr),
- 'url': f_url,
- 'ext': 'flv' if protocol == 'rtmp' else ext,
- 'protocol': protocol,
- 'width': int_or_none(url_e.get('width')),
- 'height': int_or_none(url_e.get('height')),
- 'tbr': tbr,
- }
- mobj = re.match(r'(?Prtmp://[^/]+/[^/]+)/(?P[^?]+)(?P\?.+)', f_url)
- if mobj:
- f.update({
- 'url': mobj.group('url') + mobj.group('auth'),
- 'play_path': mobj.group('playpath'),
- })
- formats.append(f)
- if protocol == 'rtsp':
- base_url = self._search_regex(
- r'rtsp://([^?]+)', f_url, 'base url', default=None)
- if base_url:
- base_url = 'http://' + base_url
- formats.extend(self._extract_m3u8_formats(
- base_url + '/playlist.m3u8', video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
- formats.extend(self._extract_f4m_formats(
- base_url + '/manifest.f4m', video_id,
- f4m_id='hds', fatal=False))
- if not formats and error:
+ query = {
+ 'connectionType': 'hd',
+ 'deviceType': 'ipad',
+ 'multibitrate': 'true',
+ }
+ if self._claims:
+ query['claims'] = self._claims
+ v_data = self._call_api('validation/v2/', video_id, app_code, query)
+ v_url = v_data.get('url')
+ if not v_url:
+ error = v_data['message']
+ if error == "Le contenu sélectionné n'est pas disponible dans votre pays":
+ raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)
+ if error == 'Le contenu sélectionné est disponible seulement en premium':
+ self.raise_login_required(error)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error), expected=True)
+ formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
self._sort_formats(formats)
subtitles = {}
@@ -186,11 +142,14 @@ class RadioCanadaIE(InfoExtractor):
'formats': formats,
}
+ def _real_extract(self, url):
+ return self._extract_info(*re.match(self._VALID_URL, url).groups())
+
class RadioCanadaAudioVideoIE(InfoExtractor):
'radiocanada:audiovideo'
- _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P[0-9]+)'
- _TEST = {
+ _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P[0-9]+)'
+ _TESTS = [{
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
'info_dict': {
'id': '7527184',
@@ -203,7 +162,10 @@ class RadioCanadaAudioVideoIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
- }
+ }, {
+ 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py
index 10ac8ed1f..8f54d5675 100644
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -21,7 +21,17 @@ from ..utils import (
class RutubeBaseIE(InfoExtractor):
- def _extract_video(self, video, video_id=None, require_title=True):
+ def _download_api_info(self, video_id, query=None):
+ if not query:
+ query = {}
+ query['format'] = 'json'
+ return self._download_json(
+ 'http://rutube.ru/api/video/%s/' % video_id,
+ video_id, 'Downloading video JSON',
+ 'Unable to download video JSON', query=query)
+
+ @staticmethod
+ def _extract_info(video, video_id=None, require_title=True):
title = video['title'] if require_title else video.get('title')
age_limit = video.get('is_adult')
@@ -32,7 +42,7 @@ class RutubeBaseIE(InfoExtractor):
category = try_get(video, lambda x: x['category']['name'])
return {
- 'id': video.get('id') or video_id,
+ 'id': video.get('id') or video_id if video_id else video['id'],
'title': title,
'description': video.get('description'),
'thumbnail': video.get('thumbnail_url'),
@@ -47,6 +57,42 @@ class RutubeBaseIE(InfoExtractor):
'is_live': bool_or_none(video.get('is_livestream')),
}
+ def _download_and_extract_info(self, video_id, query=None):
+ return self._extract_info(
+ self._download_api_info(video_id, query=query), video_id)
+
+ def _download_api_options(self, video_id, query=None):
+ if not query:
+ query = {}
+ query['format'] = 'json'
+ return self._download_json(
+ 'http://rutube.ru/api/play/options/%s/' % video_id,
+ video_id, 'Downloading options JSON',
+ 'Unable to download options JSON',
+ headers=self.geo_verification_headers(), query=query)
+
+ def _extract_formats(self, options, video_id):
+ formats = []
+ for format_id, format_url in options['video_balancer'].items():
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ self._sort_formats(formats)
+ return formats
+
+ def _download_and_extract_formats(self, video_id, query=None):
+ return self._extract_formats(
+ self._download_api_options(video_id, query=query), video_id)
+
class RutubeIE(RutubeBaseIE):
IE_NAME = 'rutube'
@@ -55,13 +101,13 @@ class RutubeIE(RutubeBaseIE):
_TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
- 'md5': '79938ade01294ef7e27574890d0d3769',
+ 'md5': '1d24f180fac7a02f3900712e5a5764d6',
'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Раненный кенгуру забежал в аптеку',
'description': 'http://www.ntdtv.ru ',
- 'duration': 80,
+ 'duration': 81,
'uploader': 'NTDRussian',
'uploader_id': '29790',
'timestamp': 1381943602,
@@ -94,39 +140,12 @@ class RutubeIE(RutubeBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
-
- video = self._download_json(
- 'http://rutube.ru/api/video/%s/?format=json' % video_id,
- video_id, 'Downloading video JSON')
-
- info = self._extract_video(video, video_id)
-
- options = self._download_json(
- 'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
- video_id, 'Downloading options JSON',
- headers=self.geo_verification_headers())
-
- formats = []
- for format_id, format_url in options['video_balancer'].items():
- ext = determine_ext(format_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- format_url, video_id, f4m_id=format_id, fatal=False))
- else:
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- })
- self._sort_formats(formats)
-
- info['formats'] = formats
+ info = self._download_and_extract_info(video_id)
+ info['formats'] = self._download_and_extract_formats(video_id)
return info
-class RutubeEmbedIE(InfoExtractor):
+class RutubeEmbedIE(RutubeBaseIE):
IE_NAME = 'rutube:embed'
IE_DESC = 'Rutube embedded videos'
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P[0-9]+)'
@@ -135,7 +154,7 @@ class RutubeEmbedIE(InfoExtractor):
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
'info_dict': {
'id': 'a10e53b86e8f349080f718582ce4c661',
- 'ext': 'flv',
+ 'ext': 'mp4',
'timestamp': 1387830582,
'upload_date': '20131223',
'uploader_id': '297833',
@@ -149,16 +168,26 @@ class RutubeEmbedIE(InfoExtractor):
}, {
'url': 'http://rutube.ru/play/embed/8083783',
'only_matching': True,
+ }, {
+ # private video
+ 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ',
+ 'only_matching': True,
}]
def _real_extract(self, url):
embed_id = self._match_id(url)
- webpage = self._download_webpage(url, embed_id)
-
- canonical_url = self._html_search_regex(
- r'[\da-z]+)/video'
+ _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P[\da-z]+)/(?:video|play|embed)\b'
_TESTS = [{
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
'md5': '1cc433e1d6aa14bc376535b8679302f7',
@@ -41,13 +42,22 @@ class SpankBangIE(InfoExtractor):
# 4k
'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
'only_matching': True,
+ }, {
+ 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.spankbang.com/3vvn/play',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://spankbang.com/2y3td/embed/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id, headers={
- 'Cookie': 'country=US'
- })
+ webpage = self._download_webpage(
+ url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
+ video_id, headers={'Cookie': 'country=US'})
if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
raise ExtractorError(
@@ -94,3 +104,33 @@ class SpankBangIE(InfoExtractor):
'formats': formats,
'age_limit': age_limit,
}
+
+
+class SpankBangPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P[\da-z]+)/playlist/[^/]+'
+ _TEST = {
+ 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
+ 'info_dict': {
+ 'id': 'ug0k',
+ 'title': 'Big Ass Titties',
+ },
+ 'playlist_mincount': 50,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
+
+ entries = [self.url_result(
+ 'https://spankbang.com/%s/video' % video_id,
+ ie=SpankBangIE.ie_key(), video_id=video_id)
+ for video_id in orderedSet(re.findall(
+ r']+\bhref=["\']/?([\da-z]+)/play/', webpage))]
+
+ title = self._html_search_regex(
+ r'([^<]+)\s+playlist
', webpage, 'playlist title',
+ fatal=False)
+
+ return self.playlist_result(entries, playlist_id, title)
diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py
index 2e7876cc5..f1ab91cf2 100644
--- a/youtube_dl/extractor/toutv.py
+++ b/youtube_dl/extractor/toutv.py
@@ -3,22 +3,19 @@ from __future__ import unicode_literals
import re
-from .common import InfoExtractor
+from .radiocanada import RadioCanadaIE
from ..utils import (
- int_or_none,
- js_to_json,
- urlencode_postdata,
extract_attributes,
- smuggle_url,
+ int_or_none,
+ merge_dicts,
+ urlencode_postdata,
)
-class TouTvIE(InfoExtractor):
+class TouTvIE(RadioCanadaIE):
_NETRC_MACHINE = 'toutv'
IE_NAME = 'tou.tv'
_VALID_URL = r'https?://ici\.tou\.tv/(?P[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
- _access_token = None
- _claims = None
_TESTS = [{
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
@@ -46,18 +43,14 @@ class TouTvIE(InfoExtractor):
email, password = self._get_login_info()
if email is None:
return
- state = 'http://ici.tou.tv/'
- webpage = self._download_webpage(state, None, 'Downloading homepage')
- toutvlogin = self._parse_json(self._search_regex(
- r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json)
- authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize'
login_webpage = self._download_webpage(
- authorize_url, None, 'Downloading login page', query={
- 'client_id': toutvlogin['clientId'],
- 'redirect_uri': 'https://ici.tou.tv/login/loginCallback',
+ 'https://services.radio-canada.ca/auth/oauth/v2/authorize',
+ None, 'Downloading login page', query={
+ 'client_id': '4dd36440-09d5-4468-8923-b6d91174ad36',
+ 'redirect_uri': 'https://ici.tou.tv/logincallback',
'response_type': 'token',
- 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged',
- 'state': state,
+ 'scope': 'id.write media-validation.read',
+ 'state': '/',
})
def extract_form_url_and_data(wp, default_form_url, form_spec_re=''):
@@ -86,12 +79,7 @@ class TouTvIE(InfoExtractor):
self._access_token = self._search_regex(
r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
urlh.geturl(), 'access token')
- self._claims = self._download_json(
- 'https://services.radio-canada.ca/media/validation/v2/getClaims',
- None, 'Extracting Claims', query={
- 'token': self._access_token,
- 'access_token': self._access_token,
- })['claims']
+ self._claims = self._call_api('validation/v2/getClaims')['claims']
def _real_extract(self, url):
path = self._match_id(url)
@@ -102,19 +90,10 @@ class TouTvIE(InfoExtractor):
self.report_warning('This video is probably DRM protected.', path)
video_id = metadata['IdMedia']
details = metadata['Details']
- title = details['OriginalTitle']
- video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id)
- if self._access_token and self._claims:
- video_url = smuggle_url(video_url, {
- 'access_token': self._access_token,
- 'claims': self._claims,
- })
- return {
- '_type': 'url_transparent',
- 'url': video_url,
+ return merge_dicts({
'id': video_id,
- 'title': title,
+ 'title': details.get('OriginalTitle'),
'thumbnail': details.get('ImageUrl'),
'duration': int_or_none(details.get('LengthInSeconds')),
- }
+ }, self._extract_info(metadata.get('AppCode', 'toutv'), video_id))
diff --git a/youtube_dl/extractor/trunews.py b/youtube_dl/extractor/trunews.py
new file mode 100644
index 000000000..b0c7caabf
--- /dev/null
+++ b/youtube_dl/extractor/trunews.py
@@ -0,0 +1,75 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ dict_get,
+ float_or_none,
+ int_or_none,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+)
+
+
+class TruNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
+ 'md5': 'a19c024c3906ff954fac9b96ce66bb08',
+ 'info_dict': {
+ 'id': '5c5a21e65d3c196e1c0020cc',
+ 'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
+ 'ext': 'mp4',
+ 'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?",
+ 'description': 'md5:c583b72147cc92cf21f56a31aff7a670',
+ 'duration': 3685,
+ 'timestamp': 1549411440,
+ 'upload_date': '20190206',
+ },
+ 'add_ie': ['Zype'],
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://api.zype.com/videos', display_id, query={
+ 'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H',
+ 'per_page': 1,
+ 'active': 'true',
+ 'friendly_title': display_id,
+ })['response'][0]
+
+ zype_id = video['_id']
+
+ thumbnails = []
+ thumbnails_list = video.get('thumbnails')
+ if isinstance(thumbnails_list, list):
+ for thumbnail in thumbnails_list:
+ if not isinstance(thumbnail, dict):
+ continue
+ thumbnail_url = url_or_none(thumbnail.get('url'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ '_type': 'url_transparent',
+ 'url': update_url_query(
+ 'https://player.zype.com/embed/%s.js' % zype_id,
+ {'api_key': 'X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'}),
+ 'ie_key': 'Zype',
+ 'id': zype_id,
+ 'display_id': display_id,
+ 'title': video.get('title'),
+ 'description': dict_get(video, ('description', 'ott_description', 'short_description')),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': unified_timestamp(video.get('published_at')),
+ 'average_rating': float_or_none(video.get('rating')),
+ 'view_count': int_or_none(video.get('request_count')),
+ 'thumbnails': thumbnails,
+ }
diff --git a/youtube_dl/extractor/trutv.py b/youtube_dl/extractor/trutv.py
index 3a5782525..ce892c8c5 100644
--- a/youtube_dl/extractor/trutv.py
+++ b/youtube_dl/extractor/trutv.py
@@ -4,44 +4,72 @@ from __future__ import unicode_literals
import re
from .turner import TurnerBaseIE
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
class TruTVIE(TurnerBaseIE):
- _VALID_URL = r'https?://(?:www\.)?trutv\.com(?:(?P/shows/[^/]+/videos/[^/?#]+?)\.html|/full-episodes/[^/]+/(?P\d+))'
+ _VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P[0-9A-Za-z-]+)/(?:videos/(?P[0-9A-Za-z-]+)|(?P\d+))'
_TEST = {
- 'url': 'http://www.trutv.com/shows/10-things/videos/you-wont-believe-these-sports-bets.html',
- 'md5': '2cdc844f317579fed1a7251b087ff417',
+ 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html',
'info_dict': {
- 'id': '/shows/10-things/videos/you-wont-believe-these-sports-bets',
+ 'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1',
'ext': 'mp4',
- 'title': 'You Won\'t Believe These Sports Bets',
- 'description': 'Jamie Lee sits down with a bookie to discuss the bizarre world of illegal sports betting.',
- 'upload_date': '20130305',
- }
+ 'title': 'Sunlight-Activated Flower',
+ 'description': "A customer is stunned when he sees Michael's sunlight-activated flower.",
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}
def _real_extract(self, url):
- path, video_id = re.match(self._VALID_URL, url).groups()
- auth_required = False
- if path:
- data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path
+ series_slug, clip_slug, video_id = re.match(self._VALID_URL, url).groups()
+
+ if video_id:
+ path = 'episode'
+ display_id = video_id
else:
- webpage = self._download_webpage(url, video_id)
- video_id = self._search_regex(
- r"TTV\.TVE\.episodeId\s*=\s*'([^']+)';",
- webpage, 'video id', default=video_id)
- auth_required = self._search_regex(
- r'TTV\.TVE\.authRequired\s*=\s*(true|false);',
- webpage, 'auth required', default='false') == 'true'
- data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id
- return self._extract_cvp_info(
- data_src, path, {
- 'secure': {
- 'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big',
- 'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do',
- },
- }, {
+ path = 'series/clip'
+ display_id = clip_slug
+
+ data = self._download_json(
+ 'https://api.trutv.com/v2/web/%s/%s/%s' % (path, series_slug, display_id),
+ display_id)
+ video_data = data['episode'] if video_id else data['info']
+ media_id = video_data['mediaId']
+ title = video_data['title'].strip()
+
+ info = self._extract_ngtv_info(
+ media_id, {}, {
'url': url,
'site_name': 'truTV',
- 'auth_required': auth_required,
+ 'auth_required': video_data.get('isAuthRequired'),
})
+
+ thumbnails = []
+ for image in video_data.get('images', []):
+ image_url = image.get('srcUrl')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ info.update({
+ 'id': media_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(video_data.get('publicationDate')),
+ 'series': video_data.get('showTitle'),
+ 'season_number': int_or_none(video_data.get('seasonNum')),
+ 'episode_number': int_or_none(video_data.get('episodeNum')),
+ })
+ return info
diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py
index 8f1ff3b76..d82d48f94 100644
--- a/youtube_dl/extractor/tvplay.py
+++ b/youtube_dl/extractor/tvplay.py
@@ -493,10 +493,9 @@ class TVPlayHomeIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_id = self._search_regex(
- r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id',
- default=None)
+ r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id')
- if video_id:
+ if len(video_id) < 8:
return self.url_result(
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
@@ -537,8 +536,9 @@ class TVPlayHomeIE(InfoExtractor):
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
default=None))
episode = self._search_regex(
- r'(["\'])(?P(?:(?!\1).)+)\1', webpage, 'episode',
- default=None, group='value')
+ (r'\bepisode\s*:\s*(["\'])(?P(?:(?!\1).)+)\1',
+ r'data-subtitle\s*=\s*(["\'])(?P(?:(?!\1).)+)\1'), webpage,
+ 'episode', default=None, group='value')
episode_number = int_or_none(self._search_regex(
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
default=None))
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index 401615683..8c87f6dd3 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -136,7 +136,12 @@ class TwitchBaseIE(InfoExtractor):
source = next(f for f in formats if f['format_id'] == 'Source')
source['preference'] = 10
except StopIteration:
- pass # No Source stream present
+ for f in formats:
+ if '/chunked/' in f['url']:
+ f.update({
+ 'source_preference': 10,
+ 'format_note': 'Source',
+ })
self._sort_formats(formats)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d0cb65814..f5a0bb4b0 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -184,7 +184,7 @@ DATE_FORMATS_MONTH_FIRST.extend([
])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-JSON_LD_RE = r'(?is)'
+JSON_LD_RE = r'(?is)'
def preferredencoding():
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 444fa83a6..63499ba7c 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = 'vc.2019.02.03.1'
+__version__ = 'vc.2019.02.14'