From 740d7c49c251464b25bea5146e054390ada6b997 Mon Sep 17 00:00:00 2001 From: skacurt Date: Sat, 23 Apr 2016 04:20:04 +0300 Subject: [PATCH 1/3] [odatv] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/odatv.py | 51 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 youtube_dl/extractor/odatv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7314be747..6de50296c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -587,6 +587,7 @@ from .nytimes import ( NYTimesArticleIE, ) from .nuvid import NuvidIE +from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE from .onet import ( diff --git a/youtube_dl/extractor/odatv.py b/youtube_dl/extractor/odatv.py new file mode 100644 index 000000000..dbf96aefe --- /dev/null +++ b/youtube_dl/extractor/odatv.py @@ -0,0 +1,51 @@ +# coding: utf-8 + +from __future__ import unicode_literals +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + remove_start +) +import re + + +class OdaTVIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?id=(?P[^&]*)' + _TESTS = [{ + 'url': 'http://odatv.com/vid_video.php?id=8E388', + 'md5': 'dc61d052f205c9bf2da3545691485154', + 'info_dict': { + 'id': '8E388', + 'ext': 'mp4', + 'title': 'md5:69654805a16a16cf9ec9d055e079831c' + } + }, { + 'url': 'http://odatv.com/mob_video.php?id=8E388', + 'md5': 'dc61d052f205c9bf2da3545691485154', + 'info_dict': { + 'id': '8E388', + 'ext': 'mp4', + 'title': 'md5:69654805a16a16cf9ec9d055e079831c' + } + }, { + 'url': 'http://odatv.com/mob_video.php?id=8E900', + 'md5': '', + 'info_dict': { + 'id': '8E900', + 'ext': 'mp4', + 'title': 'not found check' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + if 'NO VIDEO!' in webpage: + raise ExtractorError('Video %s does not exist' % video_id, expected=True) + + return { + 'id': video_id, + 'title': remove_start(self._og_search_title(webpage), 'Video: '), + 'thumbnail': self._og_search_thumbnail(webpage), + 'url': self._html_search_regex(r"(http.+?video_%s\.mp4)" % re.escape(video_id), webpage, 'url', flags=re.IGNORECASE) + } From 790b06b7d4490e1e54659fafb71167bc459c701c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 20 Jul 2016 21:43:22 +0700 Subject: [PATCH 2/3] [odatv] Improve (Closes #9285) --- youtube_dl/extractor/odatv.py | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/odatv.py b/youtube_dl/extractor/odatv.py index dbf96aefe..314527f98 100644 --- a/youtube_dl/extractor/odatv.py +++ b/youtube_dl/extractor/odatv.py @@ -1,51 +1,50 @@ # coding: utf-8 - from __future__ import unicode_literals + from .common import InfoExtractor from ..utils import ( ExtractorError, + NO_DEFAULT, remove_start ) -import re class OdaTVIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?id=(?P[^&]*)' + _VALID_URL = r'https?://(?:www\.)?odatv\.com/(?:mob|vid)_video\.php\?.*\bid=(?P[^&]+)' _TESTS = [{ 'url': 'http://odatv.com/vid_video.php?id=8E388', 'md5': 'dc61d052f205c9bf2da3545691485154', 'info_dict': { 'id': '8E388', 'ext': 'mp4', - 'title': 'md5:69654805a16a16cf9ec9d055e079831c' + 'title': 'Artık Davutoğlu ile devam edemeyiz' } }, { + # mobile URL 'url': 'http://odatv.com/mob_video.php?id=8E388', - 'md5': 'dc61d052f205c9bf2da3545691485154', - 'info_dict': { - 'id': '8E388', - 'ext': 'mp4', - 'title': 'md5:69654805a16a16cf9ec9d055e079831c' - } + 'only_matching': True, }, { + # no video 'url': 'http://odatv.com/mob_video.php?id=8E900', - 'md5': '', - 'info_dict': { - 'id': '8E900', - 'ext': 'mp4', - 'title': 'not found check' - } + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - if 'NO VIDEO!' in webpage: + + no_video = 'NO VIDEO!' in webpage + + video_url = self._search_regex( + r'mp4\s*:\s*(["\'])(?Phttp.+?)\1', webpage, 'video url', + default=None if no_video else NO_DEFAULT, group='url') + + if no_video: raise ExtractorError('Video %s does not exist' % video_id, expected=True) return { 'id': video_id, + 'url': video_url, 'title': remove_start(self._og_search_title(webpage), 'Video: '), 'thumbnail': self._og_search_thumbnail(webpage), - 'url': self._html_search_regex(r"(http.+?video_%s\.mp4)" % re.escape(video_id), webpage, 'url', flags=re.IGNORECASE) } From 84e8cca48bdb2cda290f458e31b34a677eb260ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 20 Jul 2016 22:41:13 +0700 Subject: [PATCH 3/3] [youjizz] Relax _VALID_URL (Closes #10131) --- youtube_dl/extractor/youjizz.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py index 4150b28da..31e2f9263 100644 --- a/youtube_dl/extractor/youjizz.py +++ b/youtube_dl/extractor/youjizz.py @@ -9,8 +9,8 @@ from ..utils import ( class YouJizzIE(InfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/[^/#?]+-(?P[0-9]+)\.html(?:$|[?#])' - _TEST = { + _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P[0-9]+)\.html(?:$|[?#])' + _TESTS = [{ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html', 'md5': '07e15fa469ba384c7693fd246905547c', 'info_dict': { @@ -19,7 +19,10 @@ class YouJizzIE(InfoExtractor): 'title': 'Zeichentrick 1', 'age_limit': 18, } - } + }, { + 'url': 'http://www.youjizz.com/videos/-2189178.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url)