From 7182d9264079ec47024b450f72982f6e175d36e1 Mon Sep 17 00:00:00 2001 From: Marco Ferragina Date: Wed, 14 Oct 2015 11:11:52 +0200 Subject: [PATCH 1/3] [vidto] Add extractor --- docs/supportedsites.md | 2 + youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vidto.py | 82 ++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+) create mode 100644 youtube_dl/extractor/vidto.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index dc0354095..a9b6cfa0a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -159,6 +159,7 @@ - **facebook** - **faz.net** - **fc2** + - **Fczenit** - **fernsehkritik.tv** - **Firstpost** - **FiveTV** @@ -619,6 +620,7 @@ - **VideoTt**: video.tt - Your True Tube - **videoweed**: VideoWeed - **Vidme** + - **vidto**: VidTo.me - **Vidzi** - **vier** - **vier:videos** diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f6d185818..5c8fe6ed7 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -725,6 +725,7 @@ from .videopremium import VideoPremiumIE from .videott import VideoTtIE from .videoweed import VideoWeedIE from .vidme import VidmeIE +from .vidto import VidtoIE from .vidzi import VidziIE from .vier import VierIE, VierVideosIE from .viewster import ViewsterIE diff --git a/youtube_dl/extractor/vidto.py b/youtube_dl/extractor/vidto.py new file mode 100644 index 000000000..3cc585471 --- /dev/null +++ b/youtube_dl/extractor/vidto.py @@ -0,0 +1,82 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import sys +from .common import InfoExtractor +import time + +from ..utils import ( + encode_dict, +) +from ..compat import ( + compat_chr, + compat_parse_qs, + compat_urllib_parse, + compat_urllib_parse_unquote, + compat_urllib_parse_unquote_plus, + compat_urllib_parse_urlparse, + compat_urllib_request, + compat_urlparse, + compat_str, +) + + +class VidtoIE(InfoExtractor): + IE_NAME = 'vidto' + IE_DESC = 'VidTo.me' + _VALID_URL = r'https?://(?:www\.)?vidto\.me/(?P[0-9a-zA-Z]+)\.html' + _HOST = 'vidto.me' + _TEST = { + 'url': 'http://vidto.me/ku5glz52nqe1.html', + 'info_dict': { + 'id': 'ku5glz52nqe1', + 'ext': 'mp4', + 'title': 'test.mp4' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + page = self._download_webpage( + 'http://%s/%s.html' % (self._HOST, video_id), video_id, 'Downloading video page') + hash_regex = r'' + hash_value = self._search_regex(hash_regex, page, 'hash', fatal=True) + title_regex = r'' + title = self._search_regex(title_regex, page, 'title', fatal=False) + id_regex = r'' + id_value = self._search_regex(id_regex, page, 'id', fatal=True) + cookies = self._get_cookies('http://%s/%s.html' % (self._HOST, video_id)) + + + form_str = { + 'op': 'download1', + 'imhuman': 'Proceed to video', + 'usr_login': '', + 'id': id_value, + 'fname': title, + 'referer': '', + 'hash': hash_value, + } + post_data = compat_urllib_parse.urlencode(encode_dict(form_str)).encode('ascii') + req = compat_urllib_request.Request(url, post_data) + req.add_header('Content-type', 'application/x-www-form-urlencoded') + for key, morsel in cookies.iteritems(): + req.add_header('Cookie', '%s=%s' % (morsel.key, morsel.value)) + + print("Waiting for countdown...") + time.sleep(7) + post_result = self._download_webpage( + req, None, + note='Proceed to video...', errnote='unable to proceed', fatal=True) + + file_link_regex = r'file_link ?= ?\'(https?:\/\/[0-9a-zA-z.\/\-_]+)' + file_link = self._search_regex(file_link_regex, post_result, 'file_link', fatal=True) + + return { + 'id': video_id, + 'url': file_link, + 'title': title, + } From 9c6017323603ac72556239d5343dce6de5bbb6fd Mon Sep 17 00:00:00 2001 From: Marco Ferragina Date: Sat, 17 Oct 2015 19:35:41 +0200 Subject: [PATCH 2/3] vidto extractor: code cleanup --- docs/supportedsites.md | 3 --- youtube_dl/extractor/vidto.py | 32 ++++++++++++-------------------- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a9b6cfa0a..cad54f34a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -159,7 +159,6 @@ - **facebook** - **faz.net** - **fc2** - - **Fczenit** - **fernsehkritik.tv** - **Firstpost** - **FiveTV** @@ -282,7 +281,6 @@ - **Malemotion** - **MDR** - **media.ccc.de** - - **MegaVideoz** - **metacafe** - **Metacritic** - **Mgoon** @@ -620,7 +618,6 @@ - **VideoTt**: video.tt - Your True Tube - **videoweed**: VideoWeed - **Vidme** - - **vidto**: VidTo.me - **Vidzi** - **vier** - **vier:videos** diff --git a/youtube_dl/extractor/vidto.py b/youtube_dl/extractor/vidto.py index 3cc585471..391f400fc 100644 --- a/youtube_dl/extractor/vidto.py +++ b/youtube_dl/extractor/vidto.py @@ -1,24 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import sys from .common import InfoExtractor +import re import time -from ..utils import ( - encode_dict, -) +from ..utils import encode_dict from ..compat import ( - compat_chr, - compat_parse_qs, - compat_urllib_parse, - compat_urllib_parse_unquote, - compat_urllib_parse_unquote_plus, - compat_urllib_parse_urlparse, compat_urllib_request, - compat_urlparse, - compat_str, + compat_urllib_parse ) @@ -37,8 +27,7 @@ class VidtoIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) page = self._download_webpage( 'http://%s/%s.html' % (self._HOST, video_id), video_id, 'Downloading video page') @@ -63,16 +52,19 @@ class VidtoIE(InfoExtractor): post_data = compat_urllib_parse.urlencode(encode_dict(form_str)).encode('ascii') req = compat_urllib_request.Request(url, post_data) req.add_header('Content-type', 'application/x-www-form-urlencoded') - for key, morsel in cookies.iteritems(): - req.add_header('Cookie', '%s=%s' % (morsel.key, morsel.value)) + cookie_string = "" + for key in cookies.keys(): + cookie_string += "%s=%s;" % (key, cookies[key].value) - print("Waiting for countdown...") + req.add_header('Cookie', '%s' % cookie_string) + + self.to_screen("Waiting for countdown...") time.sleep(7) post_result = self._download_webpage( - req, None, + req, video_id, note='Proceed to video...', errnote='unable to proceed', fatal=True) - file_link_regex = r'file_link ?= ?\'(https?:\/\/[0-9a-zA-z.\/\-_]+)' + file_link_regex = r'file_link\s*=\s*\'(https?:\/\/[0-9a-zA-z.\/\-_]+)' file_link = self._search_regex(file_link_regex, post_result, 'file_link', fatal=True) return { From 0e04cdfa3c3ca4f31bf96e6975d641345c7131c8 Mon Sep 17 00:00:00 2001 From: Marco Ferragina Date: Mon, 9 Nov 2015 12:03:34 +0100 Subject: [PATCH 3/3] Fixed nowvideo extractor --- youtube_dl/extractor/nowvideo.py | 73 ++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/nowvideo.py b/youtube_dl/extractor/nowvideo.py index 17baa9679..c3dbb48c1 100644 --- a/youtube_dl/extractor/nowvideo.py +++ b/youtube_dl/extractor/nowvideo.py @@ -1,28 +1,75 @@ from __future__ import unicode_literals -from .novamov import NovaMovIE +from .common import InfoExtractor +from ..utils import encode_dict +from ..compat import ( + compat_urllib_request, + compat_urllib_parse, + compat_urlparse +) +from ..utils import ( + ExtractorError, +) +import re - -class NowVideoIE(NovaMovIE): +class NowVideoIE(InfoExtractor): IE_NAME = 'nowvideo' IE_DESC = 'NowVideo' - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'} + _VALID_URL = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|ec|sx|eu|at|ag|co|li)'} - _HOST = 'www.nowvideo.ch' + _HOST = 'www.nowvideo.li' _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' _FILEKEY_REGEX = r'var fkzd="([^"]+)";' - _TITLE_REGEX = r'

([^<]+)

' - _DESCRIPTION_REGEX = r'\s*

([^<]+)

' + _STEPKEY_REGEX = r'' _TEST = { - 'url': 'http://www.nowvideo.ch/video/0mw0yow7b6dxa', - 'md5': 'f8fbbc8add72bd95b7850c6a02fc8817', + 'url': 'http://www.nowvideo.li/video/edb2ded3aa118', 'info_dict': { - 'id': '0mw0yow7b6dxa', - 'ext': 'flv', - 'title': 'youtubedl test video _BaW_jenozKc.mp4', - 'description': 'Description', + 'id': 'edb2ded3aa118', + 'ext': 'mp4', + 'title': 'test' } } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + page = self._download_webpage(url, video_id, 'Downloading video page') + + if re.search(self._FILE_DELETED_REGEX, page) is not None: + raise ExtractorError('Video %s does not exist' % video_id, expected=True) + + stepkey_value = self._search_regex(self._STEPKEY_REGEX, page, 'stepkey', fatal=True) + + form_str = { + 'stepkey': stepkey_value, + } + post_data = compat_urllib_parse.urlencode(encode_dict(form_str)).encode('ascii') + req = compat_urllib_request.Request(url, post_data) + req.add_header('Content-type', 'application/x-www-form-urlencoded') + + post_result = self._download_webpage( + req, video_id, + note='Proceed to video...', errnote='unable to proceed', fatal=True) + + filekey = self._search_regex(self._FILEKEY_REGEX, post_result, 'token', fatal=True) + + api_response = self._download_webpage( + 'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id, + 'Downloading video api response') + + response = compat_urlparse.parse_qs(api_response) + + if 'error_msg' in response: + raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True) + + video_url = response['url'][0] + title = response['title'][0] + + return { + 'id': video_id, + 'url': video_url, + 'title': title + }