From 4345f57a5791350c903c4973165fd25da89947f9 Mon Sep 17 00:00:00 2001 From: AnyISalIn Date: Wed, 26 Dec 2018 12:40:59 +0800 Subject: [PATCH 1/2] [iqiyi] Fix extraction video_id --- youtube_dl/extractor/iqiyi.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 4b081bd46..88ddb8ec0 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -344,7 +344,13 @@ class IqiyiIE(InfoExtractor): # Sometimes there are playlist links in individual videos, so treat it # as a single video first tvid = self._search_regex( - r'data-(?:player|shareplattrigger)-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid', default=None) + r'param\[\'tvid\'\] = \"(.*)";', webpage, 'tvid', default=None) + + if tvid is None: # if tw iqiyi + tvid = self._search_regex( + r'"tvid":\"(\d+)",', webpage, 'tvid', default=None) + + if tvid is None: playlist_result = self._extract_playlist(webpage) if playlist_result: @@ -352,7 +358,13 @@ class IqiyiIE(InfoExtractor): raise ExtractorError('Can\'t find any video') video_id = self._search_regex( - r'data-(?:player|shareplattrigger)-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') + r'param\[\'vid\'\] = \"(.*)";', webpage, 'video_id', default=None) + + if video_id is None: # if tw iqiyi + video_id = self._search_regex( + r'"vid":\"([0-9a-z]+)",', webpage, 'video_id') + + formats = [] for _ in range(5): @@ -383,9 +395,15 @@ class IqiyiIE(InfoExtractor): self._sleep(5, video_id) self._sort_formats(formats) - title = (get_element_by_id('widget-videotitle', webpage) or - clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)) or - self._html_search_regex(r']+data-videochanged-title="word"[^>]*>([^<]+)', webpage, 'title')) + + title_link = get_element_by_attribute('class', 'title-link', webpage) or \ + get_element_by_attribute('v-text', 'props.videoAlbumName', webpage) or '' + + title_txt = get_element_by_attribute('class', 'title-txt', webpage) or \ + get_element_by_attribute('v-text', 'props.videoSubTitle', webpage) or \ + get_element_by_attribute('class', 'fontColor-link', webpage) + + title = title_link + title_txt return { 'id': video_id, From a5a940edb6b9d1913898573832f7d65036fef829 Mon Sep 17 00:00:00 2001 From: AnyISalIn Date: Thu, 14 Feb 2019 15:07:02 +0800 Subject: [PATCH 2/2] [iqiyi] Fix extraction playlist --- youtube_dl/extractor/iqiyi.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 88ddb8ec0..f29bfc395 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -307,13 +307,15 @@ class IqiyiIE(InfoExtractor): PAGE_SIZE = 50 links = re.findall( - r']+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"', + r']+class="site-piclist_pic_link"[^>]+href="(//www\.iqiyi\.com/.+\.html)"', webpage) + # add https prefix + links = ['https:{}'.format(link) for link in links] if not links: return album_id = self._search_regex( - r'albumId\s*:\s*(\d+),', webpage, 'album ID') + r'albumId\s*:\s*"(\d+)",', webpage, 'album ID') album_title = self._search_regex( r'data-share-title="([^"]+)"', webpage, 'album title', fatal=False)