From 79725ab95e1077ee6ae14d016c3f5ce27794628f Mon Sep 17 00:00:00 2001 From: FDMX2 Date: Mon, 10 Apr 2017 15:07:24 +0200 Subject: [PATCH] [novamov] fix for all included provider (WholeCloud, NowVideo, BitVid, CloudTime, AuroraVid) --- youtube_dl/extractor/extractors.py | 9 +- youtube_dl/extractor/novamov.py | 169 +++++++++++++++-------------- 2 files changed, 92 insertions(+), 86 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 68e1a5cfc..7fbefbd8b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -165,10 +165,7 @@ from .ccc import CCCIE from .ccma import CCMAIE from .cctv import CCTVIE from .cda import CDAIE -from .ceskatelevize import ( - CeskaTelevizeIE, - CeskaTelevizePoradyIE, -) +from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE @@ -541,7 +538,6 @@ from .mangomolo import ( ) from .matchtv import MatchTVIE from .mdr import MDRIE -from .medici import MediciIE from .meipai import MeipaiIE from .melonvod import MelonVODIE from .meta import METAIE @@ -669,7 +665,7 @@ from .novamov import ( AuroraVidIE, CloudTimeIE, NowVideoIE, - VideoWeedIE, + BitVidIE, WholeCloudIE, ) from .nowness import ( @@ -984,7 +980,6 @@ from .theplatform import ( from .thescene import TheSceneIE from .thesixtyone import TheSixtyOneIE from .thestar import TheStarIE -from .thesun import TheSunIE from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index 829c71960..9661fe407 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -1,9 +1,10 @@ +# coding: utf-8 + from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( ExtractorError, NO_DEFAULT, @@ -29,9 +30,9 @@ class NovaMovIE(InfoExtractor): _HOST = 'www.novamov.com' _FILE_DELETED_REGEX = r'This file no longer exists on our servers!' - _FILEKEY_REGEX = r'flashvars\.filekey=(?P"?[^"]+"?);' - _TITLE_REGEX = r'(?s)
\s*

([^<]+)

' - _DESCRIPTION_REGEX = r'(?s)
\s*

[^<]+

([^<]+)

' + _STEPKEY_REGEX = r'' + _URL_REGEX = r'' + _TITLE_REGEX = r'' _URL_TEMPLATE = 'http://%s/video/%s' _TEST = None @@ -45,52 +46,45 @@ class NovaMovIE(InfoExtractor): url = self._URL_TEMPLATE % (self._HOST, video_id) + # 1. get the website webpage = self._download_webpage( url, video_id, 'Downloading video page') self._check_existence(webpage, video_id) - def extract_filekey(default=NO_DEFAULT): - filekey = self._search_regex( - self._FILEKEY_REGEX, webpage, 'filekey', default=default) - if filekey is not default and (filekey[0] != '"' or filekey[-1] != '"'): - return self._search_regex( - r'var\s+%s\s*=\s*"([^"]+)"' % re.escape(filekey), webpage, 'filekey', default=default) - else: - return filekey + # 2. extract the 'stepkey' value from form + def extract_stepkey(default=NO_DEFAULT): + stepkey = self._search_regex( + self._STEPKEY_REGEX, webpage, 'stepkey', default=default) + return stepkey - filekey = extract_filekey(default=None) + stepkey = extract_stepkey(default=None) - if not filekey: - fields = self._hidden_inputs(webpage) - post_url = self._search_regex( - r']+action=(["\'])(?P.+?)\1', webpage, - 'post url', default=url, group='url') - if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(url, post_url) - request = sanitized_Request( - post_url, urlencode_postdata(fields)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') - request.add_header('Referer', post_url) - webpage = self._download_webpage( - request, video_id, 'Downloading continue to the video page') - self._check_existence(webpage, video_id) + if not stepkey: + raise ExtractorError('stepkey could not be read of %s, please report this error' % video_id, expected=True) - filekey = extract_filekey() + # 3. send the post request + data = urlencode_postdata({ + 'stepkey': stepkey, + 'submit': 'submit', + }) + request = sanitized_Request(url, data) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') - title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title') - description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False) + webpage = self._download_webpage(request, url) - api_response = self._download_webpage( - 'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id, - 'Downloading video api response') + # 4. extract the real video url from response + video_url = self._search_regex(self._URL_REGEX, webpage, 'stepkey') - response = compat_urlparse.parse_qs(api_response) + if hasattr(self, '_TITLE_REGEX'): + title = self._search_regex(self._TITLE_REGEX, webpage, 'title') + else: + title = str(id) - if 'error_msg' in response: - raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True) - - video_url = response['url'][0] + if hasattr(self, '_DESCRIPTION_REGEX'): + description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False) + else: + description = None return { 'id': video_id, @@ -109,19 +103,22 @@ class WholeCloudIE(NovaMovIE): _HOST = 'www.wholecloud.net' _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' - _TITLE_REGEX = r'Title: ([^<]+)

' + _TITLE_REGEX = r'' _DESCRIPTION_REGEX = r'Description: ([^<]+)

' - _TEST = { - 'url': 'http://www.wholecloud.net/video/559e28be54d96', - 'md5': 'abd31a2132947262c50429e1d16c1bfd', + _TESTS = [{ + 'url': u'http://www.wholecloud.net/video/e1de95371c94a', 'info_dict': { - 'id': '559e28be54d96', - 'ext': 'flv', - 'title': 'dissapeared image', - 'description': 'optical illusion dissapeared image magic illusion', - } - } + 'id': u'e1de95371c94a', + 'ext': 'mp4', + 'title': u'Big Buck Bunny UHD 4K 60fps', + 'description': u'No description', + }, + 'md5': '909304eb0b75ef231ceb72d84fade33d', + }, { + 'url': 'http://www.wholecloud.net/video/e1de95371c94a', + 'only_matching': True, + }] class NowVideoIE(NovaMovIE): @@ -136,40 +133,46 @@ class NowVideoIE(NovaMovIE): _TITLE_REGEX = r'

([^<]+)

' _DESCRIPTION_REGEX = r'\s*

([^<]+)

' - _TEST = { - 'url': 'http://www.nowvideo.sx/video/f1d6fce9a968b', - 'md5': '12c82cad4f2084881d8bc60ee29df092', + _TESTS = [{ + 'url': u'http://www.nowvideo.sx/video/461ebb17e1a83', 'info_dict': { - 'id': 'f1d6fce9a968b', - 'ext': 'flv', - 'title': 'youtubedl test video BaWjenozKc', - 'description': 'Description', + 'id': u'461ebb17e1a83', + 'ext': 'mp4', + 'title': u'Big Buck Bunny UHD 4K 60fps', + 'description': u'No description', }, - } + 'md5': '909304eb0b75ef231ceb72d84fade33d', + }, { + 'url': 'http://www.nowvideo.sx/video/461ebb17e1a83', + 'only_matching': True, + }] -class VideoWeedIE(NovaMovIE): - IE_NAME = 'videoweed' - IE_DESC = 'VideoWeed' +# VideoWeed is now BitVid +class BitVidIE(NovaMovIE): + IE_NAME = 'bitvid' + IE_DESC = 'Bitvid' - _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'videoweed\.(?:es|com)'} + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'bitvid\.(?:sx)'} - _HOST = 'www.videoweed.es' + _HOST = 'www.bitvid.sx' _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' _TITLE_REGEX = r'

([^<]+)

' _URL_TEMPLATE = 'http://%s/file/%s' - _TEST = { - 'url': 'http://www.videoweed.es/file/b42178afbea14', - 'md5': 'abd31a2132947262c50429e1d16c1bfd', + _TESTS = [{ + 'url': u'http://www.bitvid.sx/file/bceedaa7b969c', 'info_dict': { - 'id': 'b42178afbea14', - 'ext': 'flv', - 'title': 'optical illusion dissapeared image magic illusion', - 'description': '' + 'id': u'bceedaa7b969c', + 'ext': 'mp4', + 'title': u'Big Buck Bunny UHD 4K 60fps' }, - } + 'md5': '909304eb0b75ef231ceb72d84fade33d', + }, { + 'url': 'http://www.bitvid.sx/file/bceedaa7b969c', + 'only_matching': True, + }] class CloudTimeIE(NovaMovIE): @@ -181,9 +184,19 @@ class CloudTimeIE(NovaMovIE): _HOST = 'www.cloudtime.to' _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' - _TITLE_REGEX = r']+class=["\']video_det["\'][^>]*>\s*([^<]+)' - _TEST = None + _TESTS = [{ + 'url': u'http://www.cloudtime.to/video/ef47760a7793d', + 'info_dict': { + 'id': u'ef47760a7793d', + 'ext': 'mp4', + 'title': u'Big Buck Bunny UHD 4K 60fps' + }, + 'md5': '909304eb0b75ef231ceb72d84fade33d', + }, { + 'url': 'http://www.cloudtime.to/video/ef47760a7793d', + 'only_matching': True, + }] class AuroraVidIE(NovaMovIE): @@ -197,16 +210,14 @@ class AuroraVidIE(NovaMovIE): _FILE_DELETED_REGEX = r'This file no longer exists on our servers!<' _TESTS = [{ - 'url': 'http://www.auroravid.to/video/4rurhn9x446jj', - 'md5': '7205f346a52bbeba427603ba10d4b935', + 'url': u'http://www.auroravid.to/video/27851f1e57c95', 'info_dict': { - 'id': '4rurhn9x446jj', - 'ext': 'flv', - 'title': 'search engine optimization', - 'description': 'search engine optimization is used to rank the web page in the google search engine' + 'id': u'27851f1e57c95', + 'ext': 'mp4', + 'title': u'Big Buck Bunny UHD 4K 60fps', }, - 'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)' + 'md5': '909304eb0b75ef231ceb72d84fade33d', }, { - 'url': 'http://www.auroravid.to/embed/?v=4rurhn9x446jj', + 'url': 'http://www.auroravid.to/video/27851f1e57c95', 'only_matching': True, }]