From fc96af00fd1f9695ed03cc2807c5cf6fe0dab760 Mon Sep 17 00:00:00 2001 From: cryzed Date: Wed, 12 Sep 2012 17:23:47 +0200 Subject: [PATCH 1/3] Fixed YouTube playlist parsing --- youtube_dl/InfoExtractors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index ddb4aa16b..fc7b34f7f 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1471,7 +1471,7 @@ class YoutubePlaylistIE(InfoExtractor): _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' - _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&list=(PL)?%s&' + _VIDEO_INDICATOR = r'/watch\?v=(.+?)&list=(.+?)&' _MORE_PAGES_INDICATOR = r'yt-uix-pager-next' IE_NAME = u'youtube:playlist' @@ -1518,8 +1518,8 @@ class YoutubePlaylistIE(InfoExtractor): # Extract video identifiers ids_in_page = [] - for mobj in re.finditer(self._VIDEO_INDICATOR_TEMPLATE % playlist_id, page): - if mobj.group(1) not in ids_in_page: + for mobj in re.finditer(self._VIDEO_INDICATOR, page): + if mobj.group(1) not in ids_in_page and mobj.group(2)[2:] == playlist_id: ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) From 4294898628b0a677bf45a00249154879a00ed3ac Mon Sep 17 00:00:00 2001 From: cryzed Date: Wed, 12 Sep 2012 20:45:30 +0200 Subject: [PATCH 2/3] Improved playlist parsing --- youtube_dl/InfoExtractors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index fc7b34f7f..2fbdac1eb 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1471,7 +1471,7 @@ class YoutubePlaylistIE(InfoExtractor): _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' - _VIDEO_INDICATOR = r'/watch\?v=(.+?)&list=(.+?)&' + _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&list=SP%s' _MORE_PAGES_INDICATOR = r'yt-uix-pager-next' IE_NAME = u'youtube:playlist' @@ -1518,8 +1518,8 @@ class YoutubePlaylistIE(InfoExtractor): # Extract video identifiers ids_in_page = [] - for mobj in re.finditer(self._VIDEO_INDICATOR, page): - if mobj.group(1) not in ids_in_page and mobj.group(2)[2:] == playlist_id: + for mobj in re.finditer(self._VIDEO_INDICATOR_TEMPLATE % playlist_id, page): + if mobj.group(1) not in ids_in_page: ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) From e8091b2f5075a712204f3334ef54e45eafb6ba9c Mon Sep 17 00:00:00 2001 From: cryzed Date: Thu, 20 Sep 2012 15:16:27 +0200 Subject: [PATCH 3/3] Fixed RegEx, playlist prefix seemingly changes, account for all possible cases. --- youtube_dl/InfoExtractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 2fbdac1eb..6976d5fc9 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1471,7 +1471,7 @@ class YoutubePlaylistIE(InfoExtractor): _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' - _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&list=SP%s' + _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&list=.*?%s' _MORE_PAGES_INDICATOR = r'yt-uix-pager-next' IE_NAME = u'youtube:playlist'