From 8dd2b6a5eff4d559b8ed75e623d11e0b2b8e5083 Mon Sep 17 00:00:00 2001 From: Pika Date: Tue, 24 Mar 2020 18:42:15 -0400 Subject: [PATCH 1/4] [trutv] fix extraction + downloading --- youtube_dl/extractor/trutv.py | 69 +++++++++++++++++----------------- youtube_dl/extractor/turner.py | 4 +- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/trutv.py b/youtube_dl/extractor/trutv.py index ce892c8c5..b72ad012d 100644 --- a/youtube_dl/extractor/trutv.py +++ b/youtube_dl/extractor/trutv.py @@ -6,19 +6,22 @@ import re from .turner import TurnerBaseIE from ..utils import ( int_or_none, - parse_iso8601, ) class TruTVIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P[0-9A-Za-z-]+)/(?:videos/(?P[0-9A-Za-z-]+)|(?P\d+))' + # https://www.trutv.com/shows/impractical-jokers/season-8/episode-2/the-closer + _VALID_URL = r'https?://(?:www\.)?trutv\.com/shows/[0-9A-Za-z-]+/season-\d+/episode-\d+/(?P[0-9A-Za-z-]+)' _TEST = { - 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html', + 'url': 'https://www.trutv.com/shows/impractical-jokers/season-8/episode-2/the-closer', 'info_dict': { - 'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1', + 'id': '0b90803a0d4bba757085a61cc25be505358cd8b5', 'ext': 'mp4', - 'title': 'Sunlight-Activated Flower', - 'description': "A customer is stunned when he sees Michael's sunlight-activated flower.", + 'title': 'The Closer', + 'description': 'Q, Joe, Sal and Murr get tech help from some confused tutors, then play Hot Potato in a shoe store. Plus, the big loser wishes he could press escape during a brutal coffee shop punishment.', + 'series': 'Impractical Jokers', + 'season_number': 8, + 'episode_number': 2, }, 'params': { # m3u8 download @@ -27,49 +30,47 @@ class TruTVIE(TurnerBaseIE): } def _real_extract(self, url): - series_slug, clip_slug, video_id = re.match(self._VALID_URL, url).groups() + episode_slug = self._match_id(url) + + webpage = self._download_webpage(url, episode_slug) + + meta = self._parse_json(self._html_search_regex(r'', webpage, episode_slug), episode_slug) - if video_id: - path = 'episode' - display_id = video_id - else: - path = 'series/clip' - display_id = clip_slug + data = self._parse_json(self._html_search_regex(r'', webpage, episode_slug), episode_slug) + + eps = data['turner_playlist'] - data = self._download_json( - 'https://api.trutv.com/v2/web/%s/%s/%s' % (path, series_slug, display_id), - display_id) - video_data = data['episode'] if video_id else data['info'] - media_id = video_data['mediaId'] + for ep in eps: + if ep['url'] in url: + video_data = ep + + media_id = video_data['mediaID'] title = video_data['title'].strip() info = self._extract_ngtv_info( media_id, {}, { 'url': url, 'site_name': 'truTV', - 'auth_required': video_data.get('isAuthRequired'), + 'auth_required': video_data.get('authRequired'), }) thumbnails = [] - for image in video_data.get('images', []): - image_url = image.get('srcUrl') - if not image_url: - continue - thumbnails.append({ - 'url': image_url, - 'width': int_or_none(image.get('width')), - 'height': int_or_none(image.get('height')), - }) + for images in meta.get('image', []): + for image in images: + if not image: + continue + thumbnails.append({ + 'url': image, + }) info.update({ 'id': media_id, - 'display_id': display_id, + 'display_id': video_data.get('videoId'), 'title': title, - 'description': video_data.get('description'), + 'description': video_data.get('shortDescription'), 'thumbnails': thumbnails, - 'timestamp': parse_iso8601(video_data.get('publicationDate')), - 'series': video_data.get('showTitle'), - 'season_number': int_or_none(video_data.get('seasonNum')), - 'episode_number': int_or_none(video_data.get('episodeNum')), + 'series': meta.get('partOfSeries').get('name'), + 'season_number': int_or_none(meta.get('partOfSeason').get('seasonNumber')), + 'episode_number': int_or_none(meta.get('episodeNumber')), }) return info diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py index 4a6cbfbb8..d5438f733 100644 --- a/youtube_dl/extractor/turner.py +++ b/youtube_dl/extractor/turner.py @@ -196,8 +196,8 @@ class TurnerBaseIE(AdobePassIE): def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None): streams_data = self._download_json( - 'http://medium.ngtv.io/media/%s/tv' % media_id, - media_id)['media']['tv'] + 'http://medium.ngtv.io/media/%s/desktop' % media_id, + media_id)['media']['desktop'] duration = None chapters = [] formats = [] From 86be0691b2d19c8c441d5496f52de3f90227749e Mon Sep 17 00:00:00 2001 From: Pika Date: Tue, 24 Mar 2020 18:51:20 -0400 Subject: [PATCH 2/4] fix flake8 errors --- youtube_dl/extractor/trutv.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/trutv.py b/youtube_dl/extractor/trutv.py index b72ad012d..58633963e 100644 --- a/youtube_dl/extractor/trutv.py +++ b/youtube_dl/extractor/trutv.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .turner import TurnerBaseIE from ..utils import ( int_or_none, @@ -31,13 +29,13 @@ class TruTVIE(TurnerBaseIE): def _real_extract(self, url): episode_slug = self._match_id(url) - + webpage = self._download_webpage(url, episode_slug) - + meta = self._parse_json(self._html_search_regex(r'', webpage, episode_slug), episode_slug) data = self._parse_json(self._html_search_regex(r'', webpage, episode_slug), episode_slug) - + eps = data['turner_playlist'] for ep in eps: From 69b2c9184f8d17012c7135b95949fddab5fd0cf3 Mon Sep 17 00:00:00 2001 From: Pika Date: Tue, 24 Mar 2020 18:52:34 -0400 Subject: [PATCH 3/4] remove comment --- youtube_dl/extractor/trutv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/trutv.py b/youtube_dl/extractor/trutv.py index 58633963e..8c0135145 100644 --- a/youtube_dl/extractor/trutv.py +++ b/youtube_dl/extractor/trutv.py @@ -8,7 +8,6 @@ from ..utils import ( class TruTVIE(TurnerBaseIE): - # https://www.trutv.com/shows/impractical-jokers/season-8/episode-2/the-closer _VALID_URL = r'https?://(?:www\.)?trutv\.com/shows/[0-9A-Za-z-]+/season-\d+/episode-\d+/(?P[0-9A-Za-z-]+)' _TEST = { 'url': 'https://www.trutv.com/shows/impractical-jokers/season-8/episode-2/the-closer', From ab1788329d5e6350e3edf35459ffeb9e75686427 Mon Sep 17 00:00:00 2001 From: Pika Date: Tue, 24 Mar 2020 20:41:33 -0400 Subject: [PATCH 4/4] [trutv] port fixes from tbs --- youtube_dl/extractor/trutv.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/trutv.py b/youtube_dl/extractor/trutv.py index 8c0135145..f7f1a4def 100644 --- a/youtube_dl/extractor/trutv.py +++ b/youtube_dl/extractor/trutv.py @@ -2,6 +2,10 @@ from __future__ import unicode_literals from .turner import TurnerBaseIE +from ..compat import ( + compat_urllib_parse_urlparse, + compat_parse_qs, +) from ..utils import ( int_or_none, ) @@ -44,11 +48,13 @@ class TruTVIE(TurnerBaseIE): media_id = video_data['mediaID'] title = video_data['title'].strip() + tokenizer_query = compat_parse_qs(compat_urllib_parse_urlparse(data['ngtv_token_url']).query) + info = self._extract_ngtv_info( - media_id, {}, { + media_id, tokenizer_query, { 'url': url, 'site_name': 'truTV', - 'auth_required': video_data.get('authRequired'), + 'auth_required': video_data.get('authRequired') == '1', }) thumbnails = []