From cddba2646342ba8402ce32e6a4ad71eb55db6aed Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 24 Sep 2015 22:38:00 +0100 Subject: [PATCH 1/5] [keek] fix info extraction --- youtube_dl/extractor/keek.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py index c0956ba09..4493a929d 100644 --- a/youtube_dl/extractor/keek.py +++ b/youtube_dl/extractor/keek.py @@ -4,43 +4,27 @@ from .common import InfoExtractor class KeekIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?keek\.com/(?:!|\w+/keeks/)(?P\w+)' + _VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P\w+)' IE_NAME = 'keek' _TEST = { - 'url': 'https://www.keek.com/ytdl/keeks/NODfbab', - 'md5': '09c5c109067536c1cec8bac8c21fea05', + 'url': 'https://www.keek.com/keek/NODfbab', + 'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83', 'info_dict': { 'id': 'NODfbab', 'ext': 'mp4', - 'uploader': 'youtube-dl project', - 'uploader_id': 'ytdl', - 'title': 'test chars: "\'/\\\u00e4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .', + 'title': 'test chars: "\'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de . - Video - Videos on Keek', }, } def _real_extract(self, url): video_id = self._match_id(url) - video_url = 'http://cdn.keek.com/keek/video/%s' % video_id - thumbnail = 'http://cdn.keek.com/keek/thumbnail/%s/w100/h75' % video_id webpage = self._download_webpage(url, video_id) - raw_desc = self._html_search_meta('description', webpage) - if raw_desc: - uploader = self._html_search_regex( - r'Watch (.*?)\s+\(', raw_desc, 'uploader', fatal=False) - uploader_id = self._html_search_regex( - r'Watch .*?\(@(.+?)\)', raw_desc, 'uploader_id', fatal=False) - else: - uploader = None - uploader_id = None - return { 'id': video_id, - 'url': video_url, + 'url': self._og_search_video_url(webpage), 'ext': 'mp4', 'title': self._og_search_title(webpage), - 'thumbnail': thumbnail, - 'uploader': uploader, - 'uploader_id': uploader_id, + 'thumbnail': self._og_search_thumbnail(webpage), } From 354450509298e6678c7a857db19b72c6fab8319d Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 26 Sep 2015 18:04:25 +0100 Subject: [PATCH 2/5] [keek] extract more info --- youtube_dl/extractor/keek.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py index 4493a929d..24f5c7cb7 100644 --- a/youtube_dl/extractor/keek.py +++ b/youtube_dl/extractor/keek.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor @@ -13,6 +15,9 @@ class KeekIE(InfoExtractor): 'id': 'NODfbab', 'ext': 'mp4', 'title': 'test chars: "\'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de . - Video - Videos on Keek', + 'description': 'test chars: "\'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .', + 'uploader': 'ytdl', + 'uploader_id': 'eGT5bab', }, } @@ -20,11 +25,18 @@ class KeekIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + uploader = uploader_id = None + matches = re.search(r'data-username="(?P[^"]+)"[^>]*data-user-id="(?P[^"]+)"', webpage) + if matches: + uploader, uploader_id = matches.groups() return { 'id': video_id, 'url': self._og_search_video_url(webpage), 'ext': 'mp4', 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), + 'uploader': uploader, + 'uploader_id': uploader_id, } From ca387f6c5863367b8d70895d88d8823d1aa921aa Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 26 Sep 2015 18:25:24 +0100 Subject: [PATCH 3/5] [keek] fix test title --- youtube_dl/extractor/keek.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py index 24f5c7cb7..280134a2a 100644 --- a/youtube_dl/extractor/keek.py +++ b/youtube_dl/extractor/keek.py @@ -14,8 +14,8 @@ class KeekIE(InfoExtractor): 'info_dict': { 'id': 'NODfbab', 'ext': 'mp4', - 'title': 'test chars: "\'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de . - Video - Videos on Keek', - 'description': 'test chars: "\'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de .', + 'title': 'test chars: "\'/\\\xe4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de . - Video - Videos on Keek', + 'description': 'md5:35d42050a3ece241d5ddd7fdcc6fd896', 'uploader': 'ytdl', 'uploader_id': 'eGT5bab', }, From 679f2e83a984676cba0a4aa1769d69dd07740c61 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 26 Sep 2015 19:07:18 +0100 Subject: [PATCH 4/5] [keek] add utf-8 coding cookie --- youtube_dl/extractor/keek.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py index 280134a2a..d84774dca 100644 --- a/youtube_dl/extractor/keek.py +++ b/youtube_dl/extractor/keek.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import re @@ -14,7 +15,7 @@ class KeekIE(InfoExtractor): 'info_dict': { 'id': 'NODfbab', 'ext': 'mp4', - 'title': 'test chars: "\'/\\\xe4<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de . - Video - Videos on Keek', + 'title': 'test chars: "\'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de . - Video - Videos on Keek', 'description': 'md5:35d42050a3ece241d5ddd7fdcc6fd896', 'uploader': 'ytdl', 'uploader_id': 'eGT5bab', From 4cde1bbad72f7dc891e303dc36a353c6e1d629c0 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sun, 27 Sep 2015 19:09:48 +0100 Subject: [PATCH 5/5] [keek] extract uploader and uploader id with _search_regex --- youtube_dl/extractor/keek.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py index d84774dca..99154112b 100644 --- a/youtube_dl/extractor/keek.py +++ b/youtube_dl/extractor/keek.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor @@ -26,10 +24,6 @@ class KeekIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - uploader = uploader_id = None - matches = re.search(r'data-username="(?P[^"]+)"[^>]*data-user-id="(?P[^"]+)"', webpage) - if matches: - uploader, uploader_id = matches.groups() return { 'id': video_id, @@ -38,6 +32,6 @@ class KeekIE(InfoExtractor): 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), - 'uploader': uploader, - 'uploader_id': uploader_id, + 'uploader': self._search_regex(r'data-username="([^"]+)"', webpage, 'uploader', None), + 'uploader_id': self._search_regex(r'data-user-id="([^"]+)"', webpage, 'uploader id', None), }