From 3d586c09b246352f11776b1287cba96d645756a1 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Fri, 28 Apr 2017 02:42:12 +0200 Subject: [PATCH 1/7] [vevo] support multiple featured artists --- youtube_dl/extractor/vevo.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 890a149ea..19cf34a6b 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -214,13 +214,14 @@ class VevoIE(VevoBaseIE): uploader = None artist = None - featured_artist = None + featured_artists = [] artists = video_info.get('artists') for curr_artist in artists: if curr_artist.get('role') == 'Featured': - featured_artist = curr_artist['name'] + featured_artists.append(curr_artist['name']) else: artist = uploader = curr_artist['name'] + featured_artists.sort() formats = [] for video_version in video_versions: @@ -269,8 +270,8 @@ class VevoIE(VevoBaseIE): self._sort_formats(formats) track = video_info['title'] - if featured_artist: - artist = '%s ft. %s' % (artist, featured_artist) + if len(featured_artists) > 0: + artist = '%s ft. %s' % (artist, ' & '.join(featured_artists)) title = '%s - %s' % (artist, track) if artist else track genres = video_info.get('genres') From 790646060697ee26a9d9943e0e1ee88b95a30799 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Fri, 28 Apr 2017 02:48:30 +0200 Subject: [PATCH 2/7] [vevo] raise error on geo restricted videos --- youtube_dl/extractor/vevo.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 19cf34a6b..b513ee7a9 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -228,6 +228,8 @@ class VevoIE(VevoBaseIE): version = self._VERSIONS.get(video_version.get('version'), 'generic') version_url = video_version.get('url') if not version_url: + if video_version.get('errorCode') == 'video-not-viewable-in-country': + raise self.raise_geo_restricted() continue if '.ism' in version_url: From 60de4c4410844d3bde42fc3f78e1836442047a43 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Sun, 30 Apr 2017 00:04:16 +0200 Subject: [PATCH 3/7] [vevo] rewrite authentication - token is now sent in `Authorization` header - token is refreshed when needed - new token is not requested if there is one (playlist downloading) --- youtube_dl/extractor/vevo.py | 51 ++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index b513ee7a9..cb2ac19eb 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -153,41 +153,58 @@ class VevoIE(VevoBaseIE): 4: 'amazon', } - def _initialize_api(self, video_id): - webpage = self._download_webpage( - 'https://accounts.vevo.com/token', None, - note='Retrieving oauth token', - errnote='Unable to retrieve oauth token', - data=json.dumps({ - 'client_id': 'SPupX1tvqFEopQ1YS6SS', - 'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous', - }).encode('utf-8'), - headers={ - 'Content-Type': 'application/json', + def _initialize_api(self, refresh=False): + data = {'client_id': 'SPupX1tvqFEopQ1YS6SS'} + if refresh: + data.update({ + 'grant_type': 'refresh_token', + 'refresh_token': self._REFRESH_TOKEN, }) + else: + data['grant_type'] = 'urn:vevo:params:oauth:grant-type:anonymous' + post_data = json.dumps(data).encode('utf-8') + + webpage = self._download_webpage( + 'https://accounts.vevo.com/token', 'token', + data=post_data, headers={'Content-Type': 'application/json'}, + note='Retrieving oauth token', + errnote='Unable to retrieve oauth token') if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage): self.raise_geo_restricted( '%s said: This page is currently unavailable in your region' % self.IE_NAME) - auth_info = self._parse_json(webpage, video_id) - self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['legacy_token'] + auth_info = self._parse_json(webpage, 'token') + + self._ACCESS_TOKEN = auth_info['legacy_token'] + self._REFRESH_TOKEN = auth_info['refresh_token'] def _call_api(self, path, *args, **kwargs): try: - data = self._download_json(self._api_url_template % path, *args, **kwargs) + data = self._download_json( + 'https://apiv2.vevo.com/%s' % path, headers={ + 'Authorization': 'Bearer %s' % self._ACCESS_TOKEN, + }, *args, **kwargs) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError): errors = self._parse_json(e.cause.read().decode(), None)['errors'] - error_message = ', '.join([error['message'] for error in errors]) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) + error_messages = [error['message'] for error in errors] + if 'Token is expired' in error_messages: + self._initialize_api(refresh=True) + return self._call_api(path, *args, **kwargs) + else: + error_message = ', '.join(error_messages) + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error_message), + expected=True) raise return data def _real_extract(self, url): video_id = self._match_id(url) - self._initialize_api(video_id) + if not hasattr(self, '_ACCESS_TOKEN'): + self._initialize_api() video_info = self._call_api( 'video/%s' % video_id, video_id, 'Downloading api video info', From f31c8182e72492c0317df81d99da3a07c1e192a7 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Sun, 30 Apr 2017 00:52:47 +0200 Subject: [PATCH 4/7] [vevo] move authentication to VevoBase now it can be used by playlist extractor --- youtube_dl/extractor/vevo.py | 94 ++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index cb2ac19eb..61c7b16d0 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -24,6 +24,53 @@ class VevoBaseIE(InfoExtractor): webpage, 'initial store'), video_id) + def _initialize_api(self, refresh=False): + data = {'client_id': 'SPupX1tvqFEopQ1YS6SS'} + if refresh: + data.update({ + 'grant_type': 'refresh_token', + 'refresh_token': self._REFRESH_TOKEN, + }) + else: + data['grant_type'] = 'urn:vevo:params:oauth:grant-type:anonymous' + post_data = json.dumps(data).encode('utf-8') + + webpage = self._download_webpage( + 'https://accounts.vevo.com/token', 'token', + data=post_data, headers={'Content-Type': 'application/json'}, + note='Retrieving oauth token', + errnote='Unable to retrieve oauth token') + + if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage): + self.raise_geo_restricted( + '%s said: This page is currently unavailable in your region' % self.IE_NAME) + + auth_info = self._parse_json(webpage, 'token') + + self._ACCESS_TOKEN = auth_info['legacy_token'] + self._REFRESH_TOKEN = auth_info['refresh_token'] + + def _call_api(self, path, *args, **kwargs): + try: + data = self._download_json( + 'https://apiv2.vevo.com/%s' % path, headers={ + 'Authorization': 'Bearer %s' % self._ACCESS_TOKEN, + }, *args, **kwargs) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + errors = self._parse_json(e.cause.read().decode(), None)['errors'] + error_messages = [error['message'] for error in errors] + if 'Token is expired' in error_messages: + self._initialize_api(refresh=True) + return self._call_api(path, *args, **kwargs) + else: + error_message = ', '.join(error_messages) + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error_message), + expected=True) + raise + return data + class VevoIE(VevoBaseIE): ''' @@ -153,53 +200,6 @@ class VevoIE(VevoBaseIE): 4: 'amazon', } - def _initialize_api(self, refresh=False): - data = {'client_id': 'SPupX1tvqFEopQ1YS6SS'} - if refresh: - data.update({ - 'grant_type': 'refresh_token', - 'refresh_token': self._REFRESH_TOKEN, - }) - else: - data['grant_type'] = 'urn:vevo:params:oauth:grant-type:anonymous' - post_data = json.dumps(data).encode('utf-8') - - webpage = self._download_webpage( - 'https://accounts.vevo.com/token', 'token', - data=post_data, headers={'Content-Type': 'application/json'}, - note='Retrieving oauth token', - errnote='Unable to retrieve oauth token') - - if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage): - self.raise_geo_restricted( - '%s said: This page is currently unavailable in your region' % self.IE_NAME) - - auth_info = self._parse_json(webpage, 'token') - - self._ACCESS_TOKEN = auth_info['legacy_token'] - self._REFRESH_TOKEN = auth_info['refresh_token'] - - def _call_api(self, path, *args, **kwargs): - try: - data = self._download_json( - 'https://apiv2.vevo.com/%s' % path, headers={ - 'Authorization': 'Bearer %s' % self._ACCESS_TOKEN, - }, *args, **kwargs) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError): - errors = self._parse_json(e.cause.read().decode(), None)['errors'] - error_messages = [error['message'] for error in errors] - if 'Token is expired' in error_messages: - self._initialize_api(refresh=True) - return self._call_api(path, *args, **kwargs) - else: - error_message = ', '.join(error_messages) - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, error_message), - expected=True) - raise - return data - def _real_extract(self, url): video_id = self._match_id(url) From 5287e6850665d1c7189837ff7c9e0703e51a2722 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Sun, 30 Apr 2017 02:21:28 +0200 Subject: [PATCH 5/7] [vevo] fix playlist extractor --- youtube_dl/extractor/vevo.py | 55 +++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 61c7b16d0..28548298d 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -13,6 +13,8 @@ from ..utils import ( ExtractorError, int_or_none, parse_iso8601, + smuggle_url, + unsmuggle_url, ) @@ -24,6 +26,10 @@ class VevoBaseIE(InfoExtractor): webpage, 'initial store'), video_id) + def _store_tokens(self, tokens): + self._ACCESS_TOKEN = tokens['legacy_token'] + self._REFRESH_TOKEN = tokens['refresh_token'] + def _initialize_api(self, refresh=False): data = {'client_id': 'SPupX1tvqFEopQ1YS6SS'} if refresh: @@ -47,8 +53,7 @@ class VevoBaseIE(InfoExtractor): auth_info = self._parse_json(webpage, 'token') - self._ACCESS_TOKEN = auth_info['legacy_token'] - self._REFRESH_TOKEN = auth_info['refresh_token'] + self._store_tokens(auth_info) def _call_api(self, path, *args, **kwargs): try: @@ -203,8 +208,16 @@ class VevoIE(VevoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) + url, tokens = unsmuggle_url(url) + + # When downloading a playlist parameters are preserved + # so there is no need to get tokens again if not hasattr(self, '_ACCESS_TOKEN'): - self._initialize_api() + # Use tokens smuggled from playlist extractor if found + if tokens: + self._store_tokens(tokens) + else: + self._initialize_api() video_info = self._call_api( 'video/%s' % video_id, video_id, 'Downloading api video info', @@ -359,31 +372,41 @@ class VevoPlaylistIE(VevoBaseIE): 'only_matching': True, }] + def _url_result(self, isrc, index): + url = 'http://www.vevo.com/watch/%s' % isrc + if index == 0: + url = smuggle_url(url, { + 'legacy_token': self._ACCESS_TOKEN, + 'refresh_token': self._REFRESH_TOKEN, + }) + return self.url_result(url, VevoIE.ie_key(), isrc) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) playlist_id = mobj.group('id') playlist_kind = mobj.group('kind') - webpage = self._download_webpage(url, playlist_id) + self._initialize_api() + + if playlist_kind == 'playlist': + path = 'playlist/%s' + else: + path = 'videos?genre=%s&sort=MostViewedLastWeek' + + playlist = self._call_api( + path % playlist_id, playlist_id, 'Downloading api playlist info', + 'Failed to download playlist info') qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) index = qs.get('index', [None])[0] if index: - video_id = self._search_regex( - r']+content=(["\'])vevo://video/(?P.+?)\1[^>]*>', - webpage, 'video id', default=None, group='id') - if video_id: - return self.url_result('vevo:%s' % video_id, VevoIE.ie_key()) - - playlists = self._extract_json(webpage, playlist_id)['default']['%ss' % playlist_kind] - - playlist = (list(playlists.values())[0] - if playlist_kind == 'playlist' else playlists[playlist_id]) + return self._url_result(playlist['videos'][int(index)]['isrc'], 0) entries = [ - self.url_result('vevo:%s' % src, VevoIE.ie_key()) - for src in playlist['isrcs']] + self._url_result(src['isrc'], i) + for i, src in enumerate(playlist['videos']) + ] return self.playlist_result( entries, playlist.get('playlistId') or playlist_id, From 6f3222f5a2d60878258c4cb123902e09897bf643 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Sun, 30 Apr 2017 02:25:24 +0200 Subject: [PATCH 6/7] [vevo] update playlist tests --- youtube_dl/extractor/vevo.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 28548298d..f4d07ede5 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -342,29 +342,29 @@ class VevoPlaylistIE(VevoBaseIE): 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29', 'info_dict': { 'id': 'dadbf4e7-b99f-4184-9670-6f0e547b6a29', - 'title': 'Best-Of: Birdman', + 'title': 'Best Of: Birdman', + 'description': 'Ca$h Money Records\' ballin\' boss turns 48 today.', }, - 'playlist_count': 10, + 'playlist_count': 24, }, { 'url': 'http://www.vevo.com/watch/genre/rock', 'info_dict': { 'id': 'rock', - 'title': 'Rock', }, 'playlist_count': 20, }, { - 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=0', + 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29?index=1', 'md5': '32dcdfddddf9ec6917fc88ca26d36282', 'info_dict': { 'id': 'USCMV1100073', 'ext': 'mp4', - 'title': 'Birdman - Y.U. MAD', + 'title': 'Birdman ft. Lil Wayne & Nicki Minaj - Y.U. MAD', 'timestamp': 1323417600, 'upload_date': '20111209', 'uploader': 'Birdman', 'track': 'Y.U. MAD', 'artist': 'Birdman', - 'genre': 'Rap/Hip-Hop', + 'genre': 'Hip-Hop', }, 'expected_warnings': ['Unable to download SMIL file'], }, { From e1237665e53e9abb6bffe7657265820fd12fbf71 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Sun, 30 Apr 2017 03:14:04 +0200 Subject: [PATCH 7/7] [vevo] make use of `--no-playlist` option --- youtube_dl/extractor/vevo.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index f4d07ede5..91ba0bd45 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -367,6 +367,9 @@ class VevoPlaylistIE(VevoBaseIE): 'genre': 'Hip-Hop', }, 'expected_warnings': ['Unable to download SMIL file'], + 'params': { + 'noplaylist': True, + }, }, { 'url': 'http://www.vevo.com/watch/genre/rock?index=0', 'only_matching': True, @@ -400,8 +403,15 @@ class VevoPlaylistIE(VevoBaseIE): qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) index = qs.get('index', [None])[0] - if index: - return self._url_result(playlist['videos'][int(index)]['isrc'], 0) + if self._downloader.params.get('noplaylist') and index: + isrc = playlist['videos'][int(index)]['isrc'] + self.to_screen( + 'Downloading just video %s because of --no-playlist' % isrc) + return self._url_result(isrc, 0) + + self.to_screen( + 'Downloading playlist %s - add --no-playlist' + ' to just download video' % playlist_id) entries = [ self._url_result(src['isrc'], i)