1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-09 11:57:17 +08:00

Merge remote branch 'rg3/master' into ytuser-infoextractor

Conflicts:
	youtube-dl
This commit is contained in:
Paweł Paprota 2011-01-26 19:27:03 +01:00
commit 6513cc1739

View File

@ -4,9 +4,12 @@
# Author: Danny Colligan
# Author: Benjamin Johnson
# Author: Vasyl' Vavrychuk
# Author: Witold Baryluk
# License: Public domain code
import cookielib
import ctypes
import datetime
import gzip
import htmlentitydefs
import httplib
import locale
@ -17,11 +20,13 @@ import os.path
import re
import socket
import string
import StringIO
import subprocess
import sys
import time
import urllib
import urllib2
import zlib
# parse_qs was moved from the cgi module to the urlparse module recently.
try:
@ -160,6 +165,64 @@ class ContentTooShortError(Exception):
self.downloaded = downloaded
self.expected = expected
class YoutubeDLHandler(urllib2.HTTPHandler):
"""Handler for HTTP requests and responses.
This class, when installed with an OpenerDirector, automatically adds
the standard headers to every HTTP request and handles gzipped and
deflated responses from web servers. If compression is to be avoided in
a particular request, the original request in the program code only has
to include the HTTP header "Youtubedl-No-Compression", which will be
removed before making the real request.
Part of this code was copied from:
http://techknack.net/python-urllib2-handlers/
Andrew Rowls, the author of that code, agreed to release it to the
public domain.
"""
@staticmethod
def deflate(data):
try:
return zlib.decompress(data, -zlib.MAX_WBITS)
except zlib.error:
return zlib.decompress(data)
@staticmethod
def addinfourl_wrapper(stream, headers, url, code):
if hasattr(urllib2.addinfourl, 'getcode'):
return urllib2.addinfourl(stream, headers, url, code)
ret = urllib2.addinfourl(stream, headers, url)
ret.code = code
return ret
def http_request(self, req):
for h in std_headers:
if h in req.headers:
del req.headers[h]
req.add_header(h, std_headers[h])
if 'Youtubedl-no-compression' in req.headers:
if 'Accept-encoding' in req.headers:
del req.headers['Accept-encoding']
del req.headers['Youtubedl-no-compression']
return req
def http_response(self, req, resp):
old_resp = resp
# gzip
if resp.headers.get('Content-encoding', '') == 'gzip':
gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
# deflate
if resp.headers.get('Content-encoding', '') == 'deflate':
gz = StringIO.StringIO(self.deflate(resp.read()))
resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
return resp
class FileDownloader(object):
"""File Downloader class.
@ -208,6 +271,8 @@ class FileDownloader(object):
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
"""
params = None
@ -236,13 +301,6 @@ class FileDownloader(object):
if not os.path.exists(dir):
os.mkdir(dir)
@staticmethod
def temp_name(filename):
"""Returns a temporary filename for the given filename."""
if filename == u'-' or (os.path.exists(filename) and not os.path.isfile(filename)):
return filename
return filename + u'.part'
@staticmethod
def format_bytes(bytes):
if bytes is None:
@ -332,6 +390,17 @@ class FileDownloader(object):
"""Print message to stderr."""
print >>sys.stderr, message.encode(preferredencoding())
def to_cons_title(self, message):
"""Set console/terminal window title to message."""
if not self.params.get('consoletitle', False):
return
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
def fixed_template(self):
"""Checks if the output template is fixed."""
return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
@ -362,6 +431,18 @@ class FileDownloader(object):
if speed > rate_limit:
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
def temp_name(self, filename):
"""Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == u'-' or \
(os.path.exists(filename) and not os.path.isfile(filename)):
return filename
return filename + u'.part'
def undo_temp_name(self, filename):
if filename.endswith(u'.part'):
return filename[:-len(u'.part')]
return filename
def try_rename(self, old_filename, new_filename):
try:
if old_filename == new_filename:
@ -380,6 +461,8 @@ class FileDownloader(object):
return
self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
@ -532,7 +615,7 @@ class FileDownloader(object):
def _do_download(self, filename, url, player_url):
# Check file already present
if self.params.get('continuedl', False) and os.path.isfile(filename):
if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
self.report_file_already_downloaded(filename)
return True
@ -543,8 +626,11 @@ class FileDownloader(object):
tmpfilename = self.temp_name(filename)
stream = None
open_mode = 'wb'
basic_request = urllib2.Request(url, None, std_headers)
request = urllib2.Request(url, None, std_headers)
# Do not include the Accept-Encoding header
headers = {'Youtubedl-no-compression': 'True'}
basic_request = urllib2.Request(url, None, headers)
request = urllib2.Request(url, None, headers)
# Establish possible resume length
if os.path.isfile(tmpfilename):
@ -626,6 +712,7 @@ class FileDownloader(object):
if stream is None:
try:
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
filename = self.undo_temp_name(tmpfilename)
self.report_destination(filename)
except (OSError, IOError), err:
self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
@ -727,7 +814,7 @@ class InfoExtractor(object):
class YoutubeIE(InfoExtractor):
"""Information extractor for youtube.com."""
_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
@ -806,7 +893,7 @@ class YoutubeIE(InfoExtractor):
return
# Set language
request = urllib2.Request(self._LANG_URL, None, std_headers)
request = urllib2.Request(self._LANG_URL)
try:
self.report_lang()
urllib2.urlopen(request).read()
@ -826,7 +913,7 @@ class YoutubeIE(InfoExtractor):
'username': username,
'password': password,
}
request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
try:
self.report_login()
login_results = urllib2.urlopen(request).read()
@ -842,7 +929,7 @@ class YoutubeIE(InfoExtractor):
'next_url': '/',
'action_confirm': 'Confirm',
}
request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
try:
self.report_age_confirmation()
age_results = urllib2.urlopen(request).read()
@ -860,7 +947,7 @@ class YoutubeIE(InfoExtractor):
# Get video webpage
self.report_video_webpage_download(video_id)
request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id, None, std_headers)
request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
try:
video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@ -879,7 +966,7 @@ class YoutubeIE(InfoExtractor):
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (video_id, el_type))
request = urllib2.Request(video_info_url, None, std_headers)
request = urllib2.Request(video_info_url)
try:
video_info_webpage = urllib2.urlopen(request).read()
video_info = parse_qs(video_info_webpage)
@ -1002,7 +1089,7 @@ class YoutubeIE(InfoExtractor):
'player_url': player_url,
})
except UnavailableVideoError, err:
self._downloader.trouble(u'ERROR: unable to download video')
self._downloader.trouble(u'\nERROR: unable to download video')
class MetacafeIE(InfoExtractor):
@ -1039,7 +1126,7 @@ class MetacafeIE(InfoExtractor):
def _real_initialize(self):
# Retrieve disclaimer
request = urllib2.Request(self._DISCLAIMER, None, std_headers)
request = urllib2.Request(self._DISCLAIMER)
try:
self.report_disclaimer()
disclaimer = urllib2.urlopen(request).read()
@ -1052,7 +1139,7 @@ class MetacafeIE(InfoExtractor):
'filters': '0',
'submit': "Continue - I'm over 18",
}
request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
try:
self.report_age_confirmation()
disclaimer = urllib2.urlopen(request).read()
@ -1147,7 +1234,7 @@ class MetacafeIE(InfoExtractor):
'player_url': None,
})
except UnavailableVideoError:
self._downloader.trouble(u'ERROR: unable to download video')
self._downloader.trouble(u'\nERROR: unable to download video')
class DailymotionIE(InfoExtractor):
@ -1216,7 +1303,7 @@ class DailymotionIE(InfoExtractor):
video_title = mobj.group(1).decode('utf-8')
video_title = sanitize_title(video_title)
mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
@ -1236,7 +1323,7 @@ class DailymotionIE(InfoExtractor):
'player_url': None,
})
except UnavailableVideoError:
self._downloader.trouble(u'ERROR: unable to download video')
self._downloader.trouble(u'\nERROR: unable to download video')
class GoogleIE(InfoExtractor):
"""Information extractor for video.google.com."""
@ -1346,7 +1433,7 @@ class GoogleIE(InfoExtractor):
'player_url': None,
})
except UnavailableVideoError:
self._downloader.trouble(u'ERROR: unable to download video')
self._downloader.trouble(u'\nERROR: unable to download video')
class PhotobucketIE(InfoExtractor):
@ -1428,7 +1515,7 @@ class PhotobucketIE(InfoExtractor):
'player_url': None,
})
except UnavailableVideoError:
self._downloader.trouble(u'ERROR: unable to download video')
self._downloader.trouble(u'\nERROR: unable to download video')
class YahooIE(InfoExtractor):
@ -1586,7 +1673,7 @@ class YahooIE(InfoExtractor):
'player_url': None,
})
except UnavailableVideoError:
self._downloader.trouble(u'ERROR: unable to download video')
self._downloader.trouble(u'\nERROR: unable to download video')
class GenericIE(InfoExtractor):
@ -1687,7 +1774,7 @@ class GenericIE(InfoExtractor):
'player_url': None,
})
except UnavailableVideoError, err:
self._downloader.trouble(u'ERROR: unable to download video')
self._downloader.trouble(u'\nERROR: unable to download video')
class YoutubeSearchIE(InfoExtractor):
@ -1755,7 +1842,7 @@ class YoutubeSearchIE(InfoExtractor):
while True:
self.report_download_page(query, pagenum)
result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
request = urllib2.Request(result_url, None, std_headers)
request = urllib2.Request(result_url)
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@ -1846,7 +1933,7 @@ class GoogleSearchIE(InfoExtractor):
while True:
self.report_download_page(query, pagenum)
result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
request = urllib2.Request(result_url, None, std_headers)
request = urllib2.Request(result_url)
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@ -1937,7 +2024,7 @@ class YahooSearchIE(InfoExtractor):
while True:
self.report_download_page(query, pagenum)
result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
request = urllib2.Request(result_url, None, std_headers)
request = urllib2.Request(result_url)
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@ -1966,7 +2053,7 @@ class YahooSearchIE(InfoExtractor):
class YoutubePlaylistIE(InfoExtractor):
"""Information Extractor for YouTube playlists."""
_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*'
_TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
@ -2001,7 +2088,7 @@ class YoutubePlaylistIE(InfoExtractor):
while True:
self.report_download_page(playlist_id, pagenum)
request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum))
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@ -2137,7 +2224,7 @@ class DepositFilesIE(InfoExtractor):
# Retrieve file webpage with 'Free download' button pressed
free_download_indication = { 'gateway_result' : '1' }
request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers)
request = urllib2.Request(url, urllib.urlencode(free_download_indication))
try:
self.report_download_webpage(file_id)
webpage = urllib2.urlopen(request).read()
@ -2236,20 +2323,26 @@ if __name__ == '__main__':
import getpass
import optparse
# Function to update the program file with the latest version from bitbucket.org
# Function to update the program file with the latest version from the repository.
def update_self(downloader, filename):
# Note: downloader only used for options
if not os.access (filename, os.W_OK):
if not os.access(filename, os.W_OK):
sys.exit('ERROR: no write permissions on %s' % filename)
downloader.to_screen('Updating to latest stable version...')
try:
latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
latest_version = urllib.urlopen(latest_url).read().strip()
prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
newcontent = urllib.urlopen(prog_url).read()
except (IOError, OSError), err:
sys.exit('ERROR: unable to download latest version')
try:
stream = open(filename, 'w')
stream.write(newcontent)
stream.close()
except (IOError, OSError), err:
sys.exit('ERROR: unable to overwrite current version')
downloader.to_screen('Updated to version %s' % latest_version)
# Parse command line
@ -2275,6 +2368,8 @@ if __name__ == '__main__':
dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
parser.add_option('--playlist-end',
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
parser.add_option('--dump-user-agent',
action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False)
authentication = optparse.OptionGroup(parser, 'Authentication Options')
authentication.add_option('-u', '--username',
@ -2309,6 +2404,8 @@ if __name__ == '__main__':
action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
verbosity.add_option('--no-progress',
action='store_true', dest='noprogress', help='do not print progress bar', default=False)
verbosity.add_option('--console-title',
action='store_true', dest='consoletitle', help='display progress in console titlebar', default=False)
parser.add_option_group(verbosity)
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
@ -2328,6 +2425,8 @@ if __name__ == '__main__':
action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
filesystem.add_option('--cookies',
dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
filesystem.add_option('--no-part',
action='store_true', dest='nopart', help='do not use .part files', default=False)
parser.add_option_group(filesystem)
(opts, args) = parser.parse_args()
@ -2343,10 +2442,14 @@ if __name__ == '__main__':
except (IOError, OSError), err:
sys.exit(u'ERROR: unable to open cookie file')
# Dump user agent
if opts.dump_user_agent:
print std_headers['User-Agent']
sys.exit(0)
# General configuration
cookie_processor = urllib2.HTTPCookieProcessor(jar)
urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
urllib2.install_opener(urllib2.build_opener(cookie_processor))
urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
# Batch file verification
@ -2445,6 +2548,8 @@ if __name__ == '__main__':
'playliststart': opts.playliststart,
'playlistend': opts.playlistend,
'logtostderr': opts.outtmpl == '-',
'consoletitle': opts.consoletitle,
'nopart': opts.nopart,
})
fd.add_info_extractor(youtube_search_ie)
fd.add_info_extractor(youtube_pl_ie)