([^<]+)<', webpage, 'description', fatal=False)
video_urls = list(map(compat_urllib_parse.unquote , re.findall(r'flashvars\.quality_[0-9]{3}p = "([^"]+)', webpage)))
if webpage.find('flashvars\.encrypted = "true"') != -1:
- password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, u'password').replace('+', ' ')
+ password = self._html_search_regex(r'flashvars\.video_title = "([^"]+)', webpage, 'password').replace('+', ' ')
video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls))
formats = []
@@ -52,14 +55,21 @@ class SpankwireIE(InfoExtractor):
path = compat_urllib_parse_urlparse(video_url).path
extension = os.path.splitext(path)[1][1:]
format = path.split('/')[4].split('_')[:2]
+ resolution, bitrate_str = format
format = "-".join(format)
+ height = int(resolution.rstrip('P'))
+ tbr = int(bitrate_str.rstrip('K'))
+
formats.append({
'url': video_url,
'ext': extension,
+ 'resolution': resolution,
'format': format,
+ 'tbr': tbr,
+ 'height': height,
'format_id': format,
})
- formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
+ self._sort_formats(formats)
age_limit = self._rta_search(webpage)
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 2bf26d056..9dcffead0 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import re
from .common import InfoExtractor
@@ -9,61 +11,66 @@ from ..utils import (
class TeamcocoIE(InfoExtractor):
_VALID_URL = r'http://teamcoco\.com/video/(?P
.*)'
_TEST = {
- u'url': u'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
- u'file': u'19705.mp4',
- u'md5': u'cde9ba0fa3506f5f017ce11ead928f9a',
- u'info_dict': {
- u"description": u"Louis C.K. got starstruck by George W. Bush, so what? Part one.",
- u"title": u"Louis C.K. Interview Pt. 1 11/3/11"
+ 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
+ 'file': '19705.mp4',
+ 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
+ 'info_dict': {
+ "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
+ "title": "Louis C.K. Interview Pt. 1 11/3/11"
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
+ raise ExtractorError('Invalid URL: %s' % url)
url_title = mobj.group('url_title')
webpage = self._download_webpage(url, url_title)
- video_id = self._html_search_regex(r'\w+) # Here goes the name and then ".html"
'''
_TEST = {
- u'url': u'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
- u'file': u'102.mp4',
- u'md5': u'2d76ee1576672e0bd8f187513267adf6',
- u'info_dict': {
- u"description": u"md5:c6fa72e6eedbd938c9caf6b2702f5922",
- u"title": u"Dan Dennett: The illusion of consciousness"
+ 'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
+ 'file': '102.mp4',
+ 'md5': '4ea1dada91e4174b53dac2bb8ace429d',
+ 'info_dict': {
+ "description": "md5:c6fa72e6eedbd938c9caf6b2702f5922",
+ "title": "Dan Dennett: The illusion of consciousness"
}
}
@@ -47,7 +50,7 @@ class TEDIE(SubtitlesInfoExtractor):
'''Returns the videos of the playlist'''
webpage = self._download_webpage(
- url, playlist_id, u'Downloading playlist webpage')
+ url, playlist_id, 'Downloading playlist webpage')
matches = re.finditer(
r'/talks/[^"]+\.html)">[^<]*
',
webpage)
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index cec65261b..23172143e 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -55,15 +55,21 @@ class ThePlatformIE(InfoExtractor):
formats = []
for f in switch.findall(_x('smil:video')):
attr = f.attrib
+ width = int(attr['width'])
+ height = int(attr['height'])
+ vbr = int(attr['system-bitrate']) // 1000
+ format_id = '%dx%d_%dk' % (width, height, vbr)
formats.append({
+ 'format_id': format_id,
'url': base_url,
'play_path': 'mp4:' + attr['src'],
'ext': 'flv',
- 'width': int(attr['width']),
- 'height': int(attr['height']),
- 'vbr': int(attr['system-bitrate']),
+ 'width': width,
+ 'height': height,
+ 'vbr': vbr,
})
- formats.sort(key=lambda f: (f['height'], f['width'], f['vbr']))
+
+ self._sort_formats(formats)
return {
'id': video_id,
diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py
index 3cf8c853d..b1c854a64 100644
--- a/youtube_dl/extractor/veehd.py
+++ b/youtube_dl/extractor/veehd.py
@@ -1,3 +1,5 @@
+from __future__ import unicode_literals
+
import re
import json
@@ -8,16 +10,17 @@ from ..utils import (
clean_html,
)
+
class VeeHDIE(InfoExtractor):
_VALID_URL = r'https?://veehd\.com/video/(?P\d+)'
_TEST = {
- u'url': u'http://veehd.com/video/4686958',
- u'file': u'4686958.mp4',
- u'info_dict': {
- u'title': u'Time Lapse View from Space ( ISS)',
- u'uploader_id': u'spotted',
- u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
+ 'url': 'http://veehd.com/video/4686958',
+ 'file': '4686958.mp4',
+ 'info_dict': {
+ 'title': 'Time Lapse View from Space ( ISS)',
+ 'uploader_id': 'spotted',
+ 'description': 'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
},
}
@@ -25,24 +28,30 @@ class VeeHDIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ # VeeHD seems to send garbage on the first request.
+ # See https://github.com/rg3/youtube-dl/issues/2102
+ self._download_webpage(url, video_id, 'Requesting webpage')
webpage = self._download_webpage(url, video_id)
- player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
- webpage, u'player path')
+ player_path = self._search_regex(
+ r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
+ webpage, 'player path')
player_url = compat_urlparse.urljoin(url, player_path)
- player_page = self._download_webpage(player_url, video_id,
- u'Downloading player page')
- config_json = self._search_regex(r'value=\'config=({.+?})\'',
- player_page, u'config json')
+
+ self._download_webpage(player_url, video_id, 'Requesting player page')
+ player_page = self._download_webpage(
+ player_url, video_id, 'Downloading player page')
+ config_json = self._search_regex(
+ r'value=\'config=({.+?})\'', player_page, 'config json')
config = json.loads(config_json)
video_url = compat_urlparse.unquote(config['clip']['url'])
title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
uploader_id = self._html_search_regex(r'(.+?)',
- webpage, u'uploader')
+ webpage, 'uploader')
thumbnail = self._search_regex(r'
(.*?)\d*)'
+ _VALID_URL = r'http://(?:www\.)?veoh\.com/(?:watch|iphone/#_Watch)/v(?P\d*)'
_TEST = {
- u'url': u'http://www.veoh.com/watch/v56314296nk7Zdmz3',
- u'file': u'56314296.mp4',
- u'md5': u'620e68e6a3cff80086df3348426c9ca3',
- u'info_dict': {
- u'title': u'Straight Backs Are Stronger',
- u'uploader': u'LUMOback',
- u'description': u'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
+ 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
+ 'file': '56314296.mp4',
+ 'md5': '620e68e6a3cff80086df3348426c9ca3',
+ 'info_dict': {
+ 'title': 'Straight Backs Are Stronger',
+ 'uploader': 'LUMOback',
+ 'description': 'At LUMOback, we believe straight backs are stronger. The LUMOback Posture & Movement Sensor: It gently vibrates when you slouch, inspiring improved posture and mobility. Use the app to track your data and improve your posture over time. ',
}
}
@@ -28,20 +28,20 @@ class VeohIE(InfoExtractor):
m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|")', webpage)
if m_youtube is not None:
youtube_id = m_youtube.group(1)
- self.to_screen(u'%s: detected Youtube video.' % video_id)
+ self.to_screen('%s: detected Youtube video.' % video_id)
return self.url_result(youtube_id, 'Youtube')
self.report_extraction(video_id)
info = self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info')
info = json.loads(info)
- video_url = info.get('fullPreviewHashHighPath') or info.get('fullPreviewHashLowPath')
+ video_url = info.get('fullPreviewHashHighPath') or info.get('fullPreviewHashLowPath')
- return {'id': info['videoId'],
- 'title': info['title'],
- 'ext': determine_ext(video_url),
- 'url': video_url,
- 'uploader': info['username'],
- 'thumbnail': info.get('highResImage') or info.get('medResImage'),
- 'description': info['description'],
- 'view_count': info['views'],
- }
+ return {
+ 'id': info['videoId'],
+ 'title': info['title'],
+ 'url': video_url,
+ 'uploader': info['username'],
+ 'thumbnail': info.get('highResImage') or info.get('medResImage'),
+ 'description': info['description'],
+ 'view_count': info['views'],
+ }
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index c3623fcbe..193675549 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -1,4 +1,6 @@
# encoding: utf-8
+from __future__ import unicode_literals
+
import json
import re
import itertools
@@ -22,7 +24,7 @@ class VimeoIE(InfoExtractor):
# _VALID_URL matches Vimeo URLs
_VALID_URL = r'''(?x)
- (?Phttps?://)?
+ (?P(?:https?:)?//)?
(?:(?:www|(?Pplayer))\.)?
vimeo(?Ppro)?\.com/
(?:.*?/)?
@@ -31,54 +33,55 @@ class VimeoIE(InfoExtractor):
(?P[0-9]+)
/?(?:[?&].*)?(?:[#].*)?$'''
_NETRC_MACHINE = 'vimeo'
- IE_NAME = u'vimeo'
+ IE_NAME = 'vimeo'
_TESTS = [
{
- u'url': u'http://vimeo.com/56015672#at=0',
- u'file': u'56015672.mp4',
- u'md5': u'8879b6cc097e987f02484baf890129e5',
- u'info_dict': {
- u"upload_date": u"20121220",
- u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
- u"uploader_id": u"user7108434",
- u"uploader": u"Filippo Valsorda",
- u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
+ 'url': 'http://vimeo.com/56015672#at=0',
+ 'file': '56015672.mp4',
+ 'md5': '8879b6cc097e987f02484baf890129e5',
+ 'info_dict': {
+ "upload_date": "20121220",
+ "description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
+ "uploader_id": "user7108434",
+ "uploader": "Filippo Valsorda",
+ "title": "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
},
},
{
- u'url': u'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
- u'file': u'68093876.mp4',
- u'md5': u'3b5ca6aa22b60dfeeadf50b72e44ed82',
- u'note': u'Vimeo Pro video (#1197)',
- u'info_dict': {
- u'uploader_id': u'openstreetmapus',
- u'uploader': u'OpenStreetMap US',
- u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
+ 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
+ 'file': '68093876.mp4',
+ 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82',
+ 'note': 'Vimeo Pro video (#1197)',
+ 'info_dict': {
+ 'uploader_id': 'openstreetmapus',
+ 'uploader': 'OpenStreetMap US',
+ 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
},
},
{
- u'url': u'http://player.vimeo.com/video/54469442',
- u'file': u'54469442.mp4',
- u'md5': u'619b811a4417aa4abe78dc653becf511',
- u'note': u'Videos that embed the url in the player page',
- u'info_dict': {
- u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
- u'uploader': u'The BLN & Business of Software',
+ 'url': 'http://player.vimeo.com/video/54469442',
+ 'file': '54469442.mp4',
+ 'md5': '619b811a4417aa4abe78dc653becf511',
+ 'note': 'Videos that embed the url in the player page',
+ 'info_dict': {
+ 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software',
+ 'uploader': 'The BLN & Business of Software',
+ 'uploader_id': 'theblnbusinessofsoftware',
},
},
{
- u'url': u'http://vimeo.com/68375962',
- u'file': u'68375962.mp4',
- u'md5': u'aaf896bdb7ddd6476df50007a0ac0ae7',
- u'note': u'Video protected with password',
- u'info_dict': {
- u'title': u'youtube-dl password protected test video',
- u'upload_date': u'20130614',
- u'uploader_id': u'user18948128',
- u'uploader': u'Jaime Marquínez Ferrándiz',
+ 'url': 'http://vimeo.com/68375962',
+ 'file': '68375962.mp4',
+ 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
+ 'note': 'Video protected with password',
+ 'info_dict': {
+ 'title': 'youtube-dl password protected test video',
+ 'upload_date': '20130614',
+ 'uploader_id': 'user18948128',
+ 'uploader': 'Jaime Marquínez Ferrándiz',
},
- u'params': {
- u'videopassword': u'youtube-dl',
+ 'params': {
+ 'videopassword': 'youtube-dl',
},
},
]
@@ -90,7 +93,7 @@ class VimeoIE(InfoExtractor):
self.report_login()
login_url = 'https://vimeo.com/log_in'
webpage = self._download_webpage(login_url, None, False)
- token = re.search(r'xsrft: \'(.*?)\'', webpage).group(1)
+ token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
data = compat_urllib_parse.urlencode({'email': username,
'password': password,
'action': 'login',
@@ -100,13 +103,13 @@ class VimeoIE(InfoExtractor):
login_request = compat_urllib_request.Request(login_url, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
login_request.add_header('Cookie', 'xsrft=%s' % token)
- self._download_webpage(login_request, None, False, u'Wrong login info')
+ self._download_webpage(login_request, None, False, 'Wrong login info')
def _verify_video_password(self, url, video_id, webpage):
password = self._downloader.params.get('videopassword', None)
if password is None:
- raise ExtractorError(u'This video is protected by a password, use the --video-password option')
- token = re.search(r'xsrft: \'(.*?)\'', webpage).group(1)
+ raise ExtractorError('This video is protected by a password, use the --video-password option')
+ token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
data = compat_urllib_parse.urlencode({'password': password,
'token': token})
# I didn't manage to use the password with https
@@ -118,8 +121,21 @@ class VimeoIE(InfoExtractor):
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Cookie', 'xsrft=%s' % token)
self._download_webpage(password_request, video_id,
- u'Verifying the password',
- u'Wrong password')
+ 'Verifying the password',
+ 'Wrong password')
+
+ def _verify_player_video_password(self, url, video_id):
+ password = self._downloader.params.get('videopassword', None)
+ if password is None:
+ raise ExtractorError('This video is protected by a password, use the --video-password option')
+ data = compat_urllib_parse.urlencode({'password': password})
+ pass_url = url + '/check-password'
+ password_request = compat_urllib_request.Request(pass_url, data)
+ password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ return self._download_json(
+ password_request, video_id,
+ 'Verifying the password',
+ 'Wrong password')
def _real_initialize(self):
self._login()
@@ -133,9 +149,6 @@ class VimeoIE(InfoExtractor):
# Extract ID from URL
mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
-
video_id = mobj.group('id')
if mobj.group('pro') or mobj.group('player'):
url = 'http://player.vimeo.com/video/' + video_id
@@ -155,7 +168,7 @@ class VimeoIE(InfoExtractor):
try:
try:
config_url = self._html_search_regex(
- r' data-config-url="(.+?)"', webpage, u'config URL')
+ r' data-config-url="(.+?)"', webpage, 'config URL')
config_json = self._download_webpage(config_url, video_id)
config = json.loads(config_json)
except RegexNotFoundError:
@@ -166,19 +179,22 @@ class VimeoIE(InfoExtractor):
config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1))
else:
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
- config = self._search_regex(config_re, webpage, u'info section',
+ config = self._search_regex(config_re, webpage, 'info section',
flags=re.DOTALL)
config = json.loads(config)
except Exception as e:
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
- raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option')
+ raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
if re.search('