From c9b8f99019f4241c93221e56f4ae1096e15b408a Mon Sep 17 00:00:00 2001 From: Thomas Christlieb Date: Fri, 3 Feb 2017 11:48:24 +0100 Subject: [PATCH 1/6] Changed Regex for recognizing playlists --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 01c7b3042..28277fa3d 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -302,7 +302,7 @@ class IqiyiIE(InfoExtractor): PAGE_SIZE = 50 links = re.findall( - r']+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"', + r']+?class="site-piclist_pic_link"[^>]+?>', webpage) if not links: return From 88eeec36936cbb58fb573108b2092e7af2c6c091 Mon Sep 17 00:00:00 2001 From: Thomas Christlieb Date: Fri, 3 Feb 2017 12:14:50 +0100 Subject: [PATCH 2/6] Fixed key error when trying to get next page of playlist and qiyi only gives code A00004 --- youtube_dl/extractor/iqiyi.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 28277fa3d..69fec6451 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -323,10 +323,13 @@ class IqiyiIE(InfoExtractor): errnote='Failed to download playlist page %d' % page_num) pagelist = self._parse_json( remove_start(pagelist_page, 'var tvInfoJs='), album_id) - vlist = pagelist['data']['vlist'] - for item in vlist: - entries.append(self.url_result(item['vurl'])) - if len(vlist) < PAGE_SIZE: + if 'data' in pagelist: + vlist = pagelist['data']['vlist'] + for item in vlist: + entries.append(self.url_result(item['vurl'])) + if len(vlist) < PAGE_SIZE: + break + else: break return self.playlist_result(entries, album_id, album_title) From 8d58ebe4dcd5a6c5b16f8470da41687c4f5cf406 Mon Sep 17 00:00:00 2001 From: Thomas Christlieb Date: Fri, 3 Feb 2017 12:18:47 +0100 Subject: [PATCH 3/6] Slight improvement in Regex to check for playlist or page --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 69fec6451..5d583e08d 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -302,7 +302,7 @@ class IqiyiIE(InfoExtractor): PAGE_SIZE = 50 links = re.findall( - r']+?class="site-piclist_pic_link"[^>]+?>', + r']+?href="(http://www\.iqiyi\.com/.+\.html)"[^>]+?class="site-piclist_pic_link"[^>]+?>', webpage) if not links: return From ee32d5ae0fdd54f013f031a5e4022ec216e53c73 Mon Sep 17 00:00:00 2001 From: Thomas Christlieb Date: Fri, 3 Feb 2017 16:30:02 +0100 Subject: [PATCH 4/6] Slight improvment on Regex for Playlist Detection --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 5d583e08d..6f103e8f4 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -302,7 +302,7 @@ class IqiyiIE(InfoExtractor): PAGE_SIZE = 50 links = re.findall( - r']+?href="(http://www\.iqiyi\.com/.+\.html)"[^>]+?class="site-piclist_pic_link"[^>]+?>', + r']+href="(http://www\.iqiyi\.com/.+\.html)"[^>]+class="site-piclist_pic_link".*>', webpage) if not links: return From 63cf1124fad58a2a200dc8caab7014875cc9bba9 Mon Sep 17 00:00:00 2001 From: Thomas Christlieb Date: Sun, 5 Feb 2017 21:13:51 +0100 Subject: [PATCH 5/6] Redesigned link extraction process --- youtube_dl/extractor/iqiyi.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 6f103e8f4..03028c19e 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -19,6 +19,7 @@ from ..utils import ( ExtractorError, ohdave_rsa_encrypt, remove_start, + extract_attributes, ) @@ -301,10 +302,14 @@ class IqiyiIE(InfoExtractor): def _extract_playlist(self, webpage): PAGE_SIZE = 50 - links = re.findall( - r']+href="(http://www\.iqiyi\.com/.+\.html)"[^>]+class="site-piclist_pic_link".*>', - webpage) - if not links: + links = [] + for link in re.findall(r']+class="[^"]*site-piclist_pic_link[^"]*"[^>]*>', webpage): + attribs = extract_attributes(link) + # It must be a valid url, and links on the playlist page have NO title-Attribute in them + # (links to other videos on the video page have, so beware of that!) + if attribs['href'].startswith('http') and 'title' not in attribs: + links.append(attribs['href']) + if len(links) == 0: return album_id = self._search_regex( @@ -331,7 +336,7 @@ class IqiyiIE(InfoExtractor): break else: break - + return self.playlist_result(entries, album_id, album_title) def _real_extract(self, url): From 280f00d0557849752a4e9884d351db3a38065f6e Mon Sep 17 00:00:00 2001 From: Thomas Christlieb Date: Mon, 6 Feb 2017 09:15:24 +0100 Subject: [PATCH 6/6] Fixed a minor Flake Error (Whitespace in empty line) --- youtube_dl/extractor/iqiyi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 03028c19e..ece6ba953 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -336,7 +336,7 @@ class IqiyiIE(InfoExtractor): break else: break - + return self.playlist_result(entries, album_id, album_title) def _real_extract(self, url):