From 58734dfc2881e2fde33410bb0895da419a182682 Mon Sep 17 00:00:00 2001 From: carsten demming Date: Thu, 22 Feb 2018 20:43:15 +0100 Subject: [PATCH 1/3] - added title to json output --- youtube_dl/extractor/youtube.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index bc9baffeb..090b69dd4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -76,6 +76,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self.url_result(vid_id, 'Youtube', video_id=vid_id) for vid_id in ids] + def _ids_to_results2(self, id_objects): + return [ + self.url_result(curr_id_obj["vid_id"], 'Youtube', video_id=curr_id_obj["vid_id"], video_title=curr_id_obj["title"]) + for curr_id_obj in id_objects] + def _login(self): """ Attempt to log in to YouTube. @@ -2617,8 +2622,23 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): raise ExtractorError( '[youtube] No video results', expected=True) - new_videos = self._ids_to_results(orderedSet(re.findall( - r'href="/watch\?v=(.{11})', html_content))) + video_ids = orderedSet(re.findall( + r'href="/watch\?v=(.{11})', html_content)) + video_id_objects = [] + regex_pre = '(?s)class\s*=\s*"\s*yt-lockup-title\s*"[^<].+?(?=a\s*href="/watch\?v=' + regex_post = ').+?(?=title)title\s*=\s*"([^"]+)"[^>]+' + for curr_id in video_ids: + regex_combined = r''+regex_pre+curr_id+regex_post + extracted_title = self._html_search_regex( + regex_combined, + html_content,'title') + video_id_objects.append({ + 'vid_id': curr_id, + 'title': extracted_title.encode('utf-8') + }) + + new_videos = self._ids_to_results2(video_id_objects) + videos += new_videos if not new_videos or len(videos) > limit: break From 78333fae62e04d6bc4c92c0bede0b84b27b0de6d Mon Sep 17 00:00:00 2001 From: carsten demming Date: Thu, 22 Feb 2018 20:44:20 +0100 Subject: [PATCH 2/3] - renamed method --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 090b69dd4..4491d35bd 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -76,7 +76,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self.url_result(vid_id, 'Youtube', video_id=vid_id) for vid_id in ids] - def _ids_to_results2(self, id_objects): + def _ids_to_results_with_title(self, id_objects): return [ self.url_result(curr_id_obj["vid_id"], 'Youtube', video_id=curr_id_obj["vid_id"], video_title=curr_id_obj["title"]) for curr_id_obj in id_objects] @@ -2637,7 +2637,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): 'title': extracted_title.encode('utf-8') }) - new_videos = self._ids_to_results2(video_id_objects) + new_videos = self._ids_to_results_with_title(video_id_objects) videos += new_videos if not new_videos or len(videos) > limit: From 31c3d1ed378151a8585f992defecd403e5094185 Mon Sep 17 00:00:00 2001 From: carsten demming Date: Thu, 22 Feb 2018 20:47:08 +0100 Subject: [PATCH 3/3] - flake8 --- youtube_dl/extractor/youtube.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4491d35bd..790f133a2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2628,13 +2628,11 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): regex_pre = '(?s)class\s*=\s*"\s*yt-lockup-title\s*"[^<].+?(?=a\s*href="/watch\?v=' regex_post = ').+?(?=title)title\s*=\s*"([^"]+)"[^>]+' for curr_id in video_ids: - regex_combined = r''+regex_pre+curr_id+regex_post - extracted_title = self._html_search_regex( - regex_combined, - html_content,'title') + regex_combined = r'' + regex_pre + curr_id + regex_post + extracted_title = self._html_search_regex(regex_combined, html_content, 'title') video_id_objects.append({ - 'vid_id': curr_id, - 'title': extracted_title.encode('utf-8') + 'vid_id': curr_id, + 'title': extracted_title.encode('utf-8') }) new_videos = self._ids_to_results_with_title(video_id_objects)