From c445991679b40bcb6f956bb30903b823a90a5c7f Mon Sep 17 00:00:00 2001 From: anovicecodemonkey Date: Mon, 17 Mar 2014 02:02:54 +1030 Subject: [PATCH 1/2] More generic support for Ooyala Player This commit adds more generic support for Ooyala Player. It should remove the need for slashdot.py and vice.py (but I will leave that decision up to others), as well as add support for sites that don't follow the usual pattern by searching for "var embedCode" which as best as I can tell is a variable unique to Ooyala Player and shouldn't cause any false positives or conflicts with other extractors. --- youtube_dl/extractor/generic.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 66e189da1..afb08f76a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -392,10 +392,18 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url')) # Look for Ooyala videos + + # First, try and see if the full player URL is provided mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage) if mobj is not None: return OoyalaIE._build_url_result(mobj.group(1)) - + + # If it isn't, you have to dig deeper. "OO.Player.create" can be given different values. + # Best bet is to simply look for the embed code variable inside the Javascript. + mobj = re.search(r"var embedCode = '(.*?)';", webpage) + if mobj is not None: + return OoyalaIE._build_url_result(mobj.group(1)) + # Look for Aparat videos mobj = re.search(r'', - page) - - if not mobj: - raise ExtractorError('No media found', expected=True) - - video_type = mobj.group('type') - video_id = mobj.group('id') json_data = self._download_json( 'http://player.rutv.ru/iframe/%splay/id/%s' % ('live-' if video_type == 'live' else '', video_id), video_id, 'Downloading JSON') if json_data['errors']: - raise ExtractorError('vesti returned error: %s' % json_data['errors'], expected=True) + raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True) playlist = json_data['data']['playlist'] medialist = playlist['medialist'] media = medialist[0] if media['errors']: - raise ExtractorError('vesti returned error: %s' % media['errors'], expected=True) + raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True) view_count = playlist.get('count_views') priority_transport = playlist['priority_transport'] diff --git a/youtube_dl/extractor/vesti.py b/youtube_dl/extractor/vesti.py new file mode 100644 index 000000000..27f9acb67 --- /dev/null +++ b/youtube_dl/extractor/vesti.py @@ -0,0 +1,121 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError +from .rutv import RUTVIE + + +class VestiIE(InfoExtractor): + IE_DESC = 'Вести.Ru' + _VALID_URL = r'http://(?:.+?\.)?vesti\.ru/(?P.+)' + + _TESTS = [ + { + 'url': 'http://www.vesti.ru/videos?vid=575582&cid=1', + 'info_dict': { + 'id': '765035', + 'ext': 'mp4', + 'title': 'Вести.net: биткоины в России не являются законными', + 'description': 'md5:d4bb3859dc1177b28a94c5014c35a36b', + 'duration': 302, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.vesti.ru/doc.html?id=1349233', + 'info_dict': { + 'id': '773865', + 'ext': 'mp4', + 'title': 'Участники митинга штурмуют Донецкую областную администрацию', + 'description': 'md5:1a160e98b3195379b4c849f2f4958009', + 'duration': 210, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.vesti.ru/only_video.html?vid=576180', + 'info_dict': { + 'id': '766048', + 'ext': 'mp4', + 'title': 'США заморозило, Британию затопило', + 'description': 'md5:f0ed0695ec05aed27c56a70a58dc4cc1', + 'duration': 87, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://hitech.vesti.ru/news/view/id/4000', + 'info_dict': { + 'id': '766888', + 'ext': 'mp4', + 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', + 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', + 'duration': 279, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403', + 'info_dict': { + 'id': '766403', + 'ext': 'mp4', + 'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы', + 'description': 'md5:55805dfd35763a890ff50fa9e35e31b3', + 'duration': 271, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': 'Blocked outside Russia', + }, + { + 'url': 'http://sochi2014.vesti.ru/live/play/live_id/301', + 'info_dict': { + 'id': '51499', + 'ext': 'flv', + 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', + 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Translation has finished' + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + page = self._download_webpage(url, video_id, 'Downloading page') + + mobj = re.search( + r']+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P\d+)', + page) + if mobj: + video_id = mobj.group('id') + page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id, + 'Downloading video page') + + rutv_url = RUTVIE._extract_url(page) + if rutv_url: + return self.url_result(rutv_url, 'RUTV') + + raise ExtractorError('No video found', expected=True) \ No newline at end of file diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 3cf29e63a..f60141bd1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -6,6 +6,7 @@ import ctypes import datetime import email.utils import errno +import getpass import gzip import itertools import io @@ -1279,3 +1280,12 @@ def parse_xml(s): parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder()) kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {} return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs) + + +if sys.version_info < (3, 0) and sys.platform == 'win32': + def compat_getpass(prompt, *args, **kwargs): + if isinstance(prompt, compat_str): + prompt = prompt.encode(preferredencoding()) + return getpass.getpass(prompt, *args, **kwargs) +else: + compat_getpass = getpass.getpass diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c038225f7..cafe4b75c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.03.12' +__version__ = '2014.03.18.1'