From 86d758c21c686c4006aedbf139b20bd59a070ea9 Mon Sep 17 00:00:00 2001 From: Gergely Imreh Date: Wed, 9 Feb 2011 13:22:17 +0800 Subject: [PATCH] Extract additional playlist page type and video link in that playlist Some user pages have additional playlist formats, e.g.: http://www.youtube.com/user/stanforduniversity#g/c/9D558D49CA734A02 and http://www.youtube.com/user/stanforduniversity#p/c/9D558D49CA734A02 There is also a related URL format which refers to a single video within those playlists, where both playlist and video ids are included in the URL: http://www.youtube.com/user/stanforduniversity#p/c/9D558D49CA734A02/0/Ps8jOj7diA0 Extract playlist and turn the URL into a format that is already understood. The third format, the single video, should actually belong to YoutubeIE and handled here temporarily until a better fix is created. --- youtube-dl | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/youtube-dl b/youtube-dl index dd875a38e..efd18305b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2096,7 +2096,8 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/)([^&]+).*' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(?Pp|a)=|user/.*?/user/|p/|user/.*?#(?Pg|p)/c/)(?P[^&]+).*' + _COMBO_ID = r'(?:[^&]+)/(?:[^&]+)/(?P[^&]+)' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' @@ -2125,17 +2126,28 @@ class YoutubePlaylistIE(InfoExtractor): return # Download playlist pages - # prefix is 'p' as default for playlists but there are other types that need extra care - playlist_prefix = mobj.group(1) - if playlist_prefix == 'a': - playlist_access = 'artist' - else: - playlist_access = 'view_play_list' - playlist_id = mobj.group(2) + playlist_prefix = mobj.group('Pre1') + playlist_altprefix = mobj.group('Pre2') + playlist_id = mobj.group('ID') + is_playlist = True video_ids = [] pagenum = 1 - while True: + # prefix is 'p' as default for playlists but there are other types that need extra care + if playlist_prefix == 'a': + playlist_access = 'artist' + else: + if playlist_altprefix == 'p': + # Not really a playlist but single video within the list: + ids = re.match(self._COMBO_ID, playlist_id) + if ids is not None: + is_playlist = False + video_ids = [ids.group('ID')] + if is_playlist: + playlist_prefix = 'p' + playlist_access = 'view_play_list' + + while is_playlist: self.report_download_page(playlist_id, pagenum) request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)) try: @@ -2155,9 +2167,10 @@ class YoutubePlaylistIE(InfoExtractor): break pagenum = pagenum + 1 - playliststart = self._downloader.params.get('playliststart', 1) - 1 - playlistend = self._downloader.params.get('playlistend', -1) - video_ids = video_ids[playliststart:playlistend] + if is_playlist: + playliststart = self._downloader.params.get('playliststart', 1) - 1 + playlistend = self._downloader.params.get('playlistend', -1) + video_ids = video_ids[playliststart:playlistend] for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)