From 5ed0a91e6603e18649abb66e2a3fef896aaf48d1 Mon Sep 17 00:00:00 2001 From: Kaithar Date: Thu, 29 Jun 2017 00:29:04 +0100 Subject: [PATCH 1/2] Fixes for language matching and playlist handling to pull full shows --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/funimation.py | 181 ++++++++++++++++++++++------- 2 files changed, 145 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a263c88b3..6568e3a1c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -369,7 +369,10 @@ from .francetv import ( from .freesound import FreesoundIE from .freespeech import FreespeechIE from .freshlive import FreshLiveIE -from .funimation import FunimationIE +from .funimation import ( + FunimationIE, + FunimationShowPlaylistIE +) from .funnyordie import FunnyOrDieIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 8c37509ec..2870be886 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -10,14 +10,37 @@ from ..utils import ( ExtractorError, urlencode_postdata ) +import re - -class FunimationIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P[^/?#&]+)' - +class FunimationCommonIE(InfoExtractor): _NETRC_MACHINE = 'funimation' _TOKEN = None + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + try: + data = self._download_json( + 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', + None, 'Logging in as %s' % username, data=urlencode_postdata({ + 'username': username, + 'password': password, + })) + self._TOKEN = data['token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + error = self._parse_json(e.cause.read().decode(), None)['error'] + raise ExtractorError(error, expected=True) + raise + + def _real_initialize(self): + self._login() + +class FunimationIE(FunimationCommonIE): + _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/'+\ + r'(?P[^/?#&]+)/?(?Psimulcast|uncut)?/?(?:\?lang=(?Penglish|japanese))?' + _TESTS = [{ 'url': 'https://www.funimation.com/shows/hacksign/role-play/', 'info_dict': { @@ -50,29 +73,11 @@ class FunimationIE(InfoExtractor): 'only_matching': True, }] - def _login(self): - (username, password) = self._get_login_info() - if username is None: - return - try: - data = self._download_json( - 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', - None, 'Logging in as %s' % username, data=urlencode_postdata({ - 'username': username, - 'password': password, - })) - self._TOKEN = data['token'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - error = self._parse_json(e.cause.read().decode(), None)['error'] - raise ExtractorError(error, expected=True) - raise - - def _real_initialize(self): - self._login() - def _real_extract(self, url): - display_id = self._match_id(url) + m = re.compile(self._VALID_URL).match(url) + display_id = m.group('id') + intended_alpha = m.group('alpha') or 'simulcast' + intended_language = m.group('lang') or 'english' webpage = self._download_webpage(url, display_id) def _search_kane(name): @@ -97,12 +102,60 @@ class FunimationIE(InfoExtractor): ], webpage, fatal=True) video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id') - title = episode = title_data.get('title') or _search_kane('videoTitle') or self._og_search_title(webpage) - series = _search_kane('showName') - if series: - title = '%s - %s' % (series, title) - description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True) - + try: + headers = {} + if self._TOKEN: + headers['Authorization'] = 'Token %s' % self._TOKEN + experience = self._download_json( + 'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/title/experience/%s/' % video_id, + video_id, headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + error = self._parse_json(e.cause.read(), video_id)['errors'][0] + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, error.get('detail') or error.get('title')), expected=True) + raise + showLanguage = _search_kane('showLanguage') + alpha = title_data['alpha'].lower() + target_video_id = int(video_id) + matched_episode = None + for season in experience['seasons']: + for episode in season['episodes']: + # We can use showLanguage to know what the video_id is expected to be, let's look for it + desiredcut = episode['languages'].get(showLanguage, {'alpha': {}}) + desiredcut = desiredcut['alpha'].get(alpha, {}) + if desiredcut.get('experienceId', None) == target_video_id: + # Winning! + matched_episode = episode + break + if matched_episode: + break + if not matched_episode: + raise ExtractorError('%s said: Failed to find the episode' % ( + self.IE_NAME), expected=False) + matched_alpha = None + matched_language = None + # Preferences + for il in [intended_language, 'english', 'japanese']: + if (il in episode['languages']): + for ia in [intended_alpha, 'uncut', 'simulcast', 'extras']: + if (ia in episode['languages'][il]['alpha'] and + episode['languages'][il]['alpha'][ia]['sources']): + matched_language = il + matched_alpha = ia + break + if (matched_alpha): + break + if not matched_alpha: + raise ExtractorError('%s could not find acceptable language and alpha'%self.IE_NAME, expected=False) + if matched_language != intended_language: + print("Falling back to %s"%matched_language) + if matched_alpha != intended_alpha: + print("Falling back to %s"%matched_alpha) + intended_language = matched_language + intended_alpha = matched_alpha + final_alpha = episode['languages'][intended_language]['alpha'][intended_alpha] + video_id = str(final_alpha['experienceId']) try: headers = {} if self._TOKEN: @@ -116,7 +169,6 @@ class FunimationIE(InfoExtractor): raise ExtractorError('%s said: %s' % ( self.IE_NAME, error.get('detail') or error.get('title')), expected=True) raise - formats = [] for source in sources: source_url = source.get('src') @@ -137,13 +189,62 @@ class FunimationIE(InfoExtractor): return { 'id': video_id, 'display_id': display_id, - 'title': title, - 'description': description, + 'title': "%s - %s"%(experience['showTitle'], episode['episodeTitle']), + 'description': episode['episodeSummary'], 'thumbnail': self._og_search_thumbnail(webpage), - 'series': series, - 'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')), - 'episode_number': int_or_none(title_data.get('episodeNum')), - 'episode': episode, - 'season_id': title_data.get('seriesId'), + 'series': experience['showTitle'], + 'season_number': int_or_none(season['seasonId']), + 'episode_number': int_or_none(episode['episodeId']), + 'episode': episode['episodeTitle'], + 'season_id': experience['showId'], 'formats': formats, + 'duration': final_alpha['duration'] + } + +class FunimationShowPlaylistIE(FunimationCommonIE): + IE_NAME = 'Funimation:playlist' + _VALID_URL = (r'(?Phttps?://(?:www\.)?funimation(?P\.com|now\.uk)/shows/'+ + r'(?P[^/?#&]+))/?(?:\?alpha=(?Psimulcast|uncut))?(?:[?&]lang=(?Penglish|japanese))?$') + + def _real_extract(self, url): + m = re.compile(self._VALID_URL).match(url) + display_id = m.group('id') + intended_alpha = m.group('alpha') or 'simulcast' + intended_language = m.group('lang') or 'english' + domext = m.group('ter') + ter = 'US' if (domext == '.com') else 'GB' + url = m.group('real_url') + webpage = self._download_webpage(url, display_id) + + title = self._html_search_regex( + r'(?s)

(.*?)

', + webpage, 'title') + + show_id = re.findall(r'var showId = (\d+)', webpage)[0] + try: + headers = {} + if self._TOKEN: + headers['Authorization'] = 'Token %s' % self._TOKEN + sources = self._download_json( + 'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=-1&ter=%s&title_id=%s&sort=order&sort_direction=ASC' % (ter, show_id), + show_id, headers=headers)['items'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + error = self._parse_json(e.cause.read(), show_id)['errors'][0] + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, error.get('detail') or error.get('title')), expected=True) + raise + + entries = [ + self.url_result('https://www.funimation%s/shows/%s/%s/%s/?lang=%s'%( + domext, ep['item']['titleSlug'], ep['item']['episodeSlug'], intended_alpha, intended_language), + 'Funimation', ep['item']['episodeName']) + for ep in sources + ] + + return { + '_type': 'playlist', + 'id': display_id, + 'title': title, + 'entries': entries, } From 1390824390208a06cbcc1f88aec00dcaf2f5cd22 Mon Sep 17 00:00:00 2001 From: Kaithar Date: Sat, 29 Jul 2017 22:56:50 +0100 Subject: [PATCH 2/2] Add better handling of empty sources, add the language to the returned formats --- youtube_dl/extractor/funimation.py | 99 +++++++++++++++++++----------- 1 file changed, 63 insertions(+), 36 deletions(-) diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index 2870be886..00adcc028 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -37,6 +37,34 @@ class FunimationCommonIE(InfoExtractor): def _real_initialize(self): self._login() + def fetch_experience(self, video_id, showLanguage, alpha): + try: + headers = {} + if self._TOKEN: + headers['Authorization'] = 'Token %s' % self._TOKEN + experience = self._download_json( + 'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/title/experience/%s/' % video_id, + video_id, headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + error = self._parse_json(e.cause.read(), video_id)['errors'][0] + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, error.get('detail') or error.get('title')), expected=True) + raise + target_video_id = int(video_id) + matched_episode = None + for season in experience['seasons']: + for episode in season['episodes']: + # We can use showLanguage to know what the video_id is expected to be, let's look for it + desiredcut = episode['languages'].get(showLanguage, {'alpha': {}}) + desiredcut = desiredcut['alpha'].get(alpha, {}) + if desiredcut.get('experienceId', None) == target_video_id: + # Winning! + return (experience, season, episode) + raise ExtractorError('%s said: Failed to find the episode' % ( + self.IE_NAME), expected=False) + + class FunimationIE(FunimationCommonIE): _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/'+\ r'(?P[^/?#&]+)/?(?Psimulcast|uncut)?/?(?:\?lang=(?Penglish|japanese))?' @@ -102,45 +130,18 @@ class FunimationIE(FunimationCommonIE): ], webpage, fatal=True) video_id = self._search_regex(r'/player/(\d+)', player_url, 'video id') - try: - headers = {} - if self._TOKEN: - headers['Authorization'] = 'Token %s' % self._TOKEN - experience = self._download_json( - 'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/title/experience/%s/' % video_id, - video_id, headers=headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - error = self._parse_json(e.cause.read(), video_id)['errors'][0] - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, error.get('detail') or error.get('title')), expected=True) - raise showLanguage = _search_kane('showLanguage') alpha = title_data['alpha'].lower() - target_video_id = int(video_id) - matched_episode = None - for season in experience['seasons']: - for episode in season['episodes']: - # We can use showLanguage to know what the video_id is expected to be, let's look for it - desiredcut = episode['languages'].get(showLanguage, {'alpha': {}}) - desiredcut = desiredcut['alpha'].get(alpha, {}) - if desiredcut.get('experienceId', None) == target_video_id: - # Winning! - matched_episode = episode - break - if matched_episode: - break - if not matched_episode: - raise ExtractorError('%s said: Failed to find the episode' % ( - self.IE_NAME), expected=False) + experience, season, episode = self.fetch_experience(video_id, showLanguage, alpha) + + # We're going to do two passes here... first we'll try for an exact match matched_alpha = None matched_language = None # Preferences for il in [intended_language, 'english', 'japanese']: if (il in episode['languages']): for ia in [intended_alpha, 'uncut', 'simulcast', 'extras']: - if (ia in episode['languages'][il]['alpha'] and - episode['languages'][il]['alpha'][ia]['sources']): + if (ia in episode['languages'][il]['alpha']): matched_language = il matched_alpha = ia break @@ -148,13 +149,32 @@ class FunimationIE(FunimationCommonIE): break if not matched_alpha: raise ExtractorError('%s could not find acceptable language and alpha'%self.IE_NAME, expected=False) + final_alpha = episode['languages'][matched_language]['alpha'][matched_alpha] + + # Now we want to repeat that if we don't have a source to work with + if (not final_alpha['sources']): + experience, season, episode = self.fetch_experience(final_alpha['experienceId'], matched_language, matched_alpha) + matched_alpha = None + matched_language = None + # Preferences + for il in [intended_language, 'english', 'japanese']: + if (il in episode['languages']): + for ia in [intended_alpha, 'uncut', 'simulcast', 'extras']: + if (ia in episode['languages'][il]['alpha'] and + episode['languages'][il]['alpha'][ia]['sources']): + matched_language = il + matched_alpha = ia + break + if (matched_alpha): + break + if not matched_alpha: + raise ExtractorError('%s could not find acceptable language and alpha'%self.IE_NAME, expected=False) + final_alpha = episode['languages'][matched_language]['alpha'][matched_alpha] + if matched_language != intended_language: print("Falling back to %s"%matched_language) if matched_alpha != intended_alpha: print("Falling back to %s"%matched_alpha) - intended_language = matched_language - intended_alpha = matched_alpha - final_alpha = episode['languages'][intended_language]['alpha'][intended_alpha] video_id = str(final_alpha['experienceId']) try: headers = {} @@ -170,19 +190,26 @@ class FunimationIE(FunimationCommonIE): self.IE_NAME, error.get('detail') or error.get('title')), expected=True) raise formats = [] + f_language = {'japanese': 'jp', 'english': 'en'}.get(matched_language) + f_language_preference = {intended_language: 10}.get(matched_language, -10) for source in sources: source_url = source.get('src') if not source_url: continue source_type = source.get('videoType') or determine_ext(source_url) if source_type == 'm3u8': - formats.extend(self._extract_m3u8_formats( + for f in self._extract_m3u8_formats( source_url, video_id, 'mp4', - m3u8_id='hls', fatal=False)) + m3u8_id='hls', fatal=False): + f['language'] = f_language + f['language_preference'] = f_language_preference + formats.append(f) else: formats.append({ 'format_id': source_type, 'url': source_url, + 'language': f_language, + 'language_preference': f_language_preference }) self._sort_formats(formats)