From 03a0a2b0386b02a0c0326c2a52e63d68f99c662f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 23 Apr 2016 15:44:34 +0800 Subject: [PATCH 01/12] [socks] Add socks.py from @bluec0re's public domain implementation https://gist.github.com/bluec0re/cafd3764412967417fd3 --- youtube_dl/socks.py | 336 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 336 insertions(+) create mode 100644 youtube_dl/socks.py diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py new file mode 100644 index 000000000..b0c36a189 --- /dev/null +++ b/youtube_dl/socks.py @@ -0,0 +1,336 @@ +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# For more information, please refer to +# +# Example: +# import socks +# import ftplib +# import socket +# +# socks.patch_socket() +# +# f = ftplib.FTP('ftp.kernel.org') +# f.login() +# print f.retrlines('LIST') +# f.quit() +# +# s = socket.create_connection(('www.google.com', 80)) +# s.sendall('HEAD / HTTP/1.0\r\n\r\n') +# print s.recv(1024) +# s.close() +from __future__ import unicode_literals +import os +import struct +import socket +import time + +__author__ = 'Timo Schmid ' + +_orig_socket = socket.socket + +try: + from collections import namedtuple +except ImportError: + from Collections import namedtuple + +try: + from urllib.parse import urlparse +except: + from urlparse import urlparse + +try: + from enum import Enum +except ImportError: + Enum = object + + +class ProxyError(IOError): pass +class Socks4Error(ProxyError): + CODES = { + 0x5B: 'request rejected or failed', + 0x5C: 'request rejected becasue SOCKS server cannot connect to identd on the client', + 0x5D: 'request rejected because the client program and identd report different user-ids' + } + def __init__(self, code=None, msg=None): + if code is not None and msg is None: + msg = self.CODES.get(code) + if msg is None: + msg = 'unknown error' + super(Socks4Error, self).__init__(code, msg) + +class Socks5Error(Socks4Error): + CODES = { + 0x01: 'general SOCKS server failure', + 0x02: 'connection not allowed by ruleset', + 0x03: 'Network unreachable', + 0x04: 'Host unreachable', + 0x05: 'Connection refused', + 0x06: 'TTL expired', + 0x07: 'Command not supported', + 0x08: 'Address type not supported', + 0xFE: 'unknown username or invalid password', + 0xFF: 'all offered authentication methods were rejected' + } + +class ProxyType(Enum): + SOCKS4 = 0 + SOCKS4A = 1 + SOCKS5 = 2 + +Proxy = namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) + +_default_proxy = None + +def setdefaultproxy(proxytype=None, addr=None, port=None, rdns=True, username=None, password=None, allow_env_override=True): + global _default_proxy + if allow_env_override: + all_proxy = os.environ.get('ALL_PROXY', os.environ.get('all_proxy')) + if all_proxy: + all_proxy = urlparse(all_proxy) + if all_proxy.scheme.startswith('socks'): + if all_proxy.scheme == 'socks' or all_proxy.scheme == 'socks4': + proxytype = ProxyType.SOCKS4 + elif all_proxy.scheme == 'socks4a': + proxytype = ProxyType.SOCKS4A + elif all_proxy.scheme == 'socks5': + proxytype = ProxyType.SOCKS5 + addr = all_proxy.hostname + port = all_proxy.port + username = all_proxy.username + password = all_proxy.password + + if proxytype is not None: + _default_proxy = Proxy(proxytype, addr, port, username, password, rdns) + + +def wrap_socket(sock): + return socksocket(_sock=sock._sock) + +def wrap_module(module): + if hasattr(module, 'socket'): + sock = module.socket + if isinstance(sock, socket.socket): + module.socket = sockssocket + elif hasattr(socket, 'socket'): + socket.socket = sockssocket + +def patch_socket(): + import sys + if 'socket' not in sys.modules: + import socket + sys.modules['socket'].socket = sockssocket + + +class sockssocket(socket.socket): + def __init__(self, *args, **kwargs): + self.__proxy = None + if 'proxy' in kwargs: + self.__proxy = kwargs['proxy'] + del kwargs['proxy'] + super(sockssocket, self).__init__(*args, **kwargs) + + @property + def _proxy(self): + if self.__proxy: + return self.__proxy + return _default_proxy + + @property + def _proxy_port(self): + if self._proxy: + if self._proxy.port: + return self._proxy.port + return 1080 + return None + + def setproxy(self, proxytype=None, addr=None, port=None, rdns=True, username=None, password=None): + if proxytype is None: + self.__proxy = None + else: + self.__proxy = Proxy(proxytype, addr, port, username, password, rdns) + + def recvall(self, cnt): + data = b'' + while len(data) < cnt: + cur = self.recv(cnt - len(data)) + if not cur: + raise IOError("{0} bytes missing".format(cnt-len(data))) + data += cur + return data + + def _setup_socks4(self, address, is_4a=False): + destaddr, port = address + + try: + ipaddr = socket.inet_aton(destaddr) + except socket.error: + if is_4a and self._proxy.remote_dns: + ipaddr = struct.pack('!BBBB', 0, 0, 0, 0xFF) + else: + ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + + packet = struct.pack('!BBH', 0x4, 0x1, port) + ipaddr + if self._proxy.username: + username = self._proxy.username + if hasattr(username, 'encode'): + username = username.encode() + packet += struct.pack('!{0}s'.format(len(username)+1), username) + else: + packet += b'\x00' + + if is_4a and self._proxy.remote_dns: + if hasattr(destaddr, 'encode'): + destaddr = destaddr.encode() + packet += struct.pack('!{0}s'.format(len(destaddr)+1), destaddr) + + self.sendall(packet) + + packet = self.recvall(8) + nbyte, resp_code, dstport, dsthost = struct.unpack('!BBHI', packet) + + # check valid response + if nbyte != 0x00: + self.close() + raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(0, nbyte)) + + # access granted + if resp_code != 0x5a: + self.close() + raise Socks4Error(resp_code) + + def _setup_socks5(self, address): + destaddr, port = address + + try: + ipaddr = socket.inet_aton(destaddr) + except socket.error: + if self._proxy.remote_dns: + ipaddr = None + else: + ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + + auth_methods = 1 + if self._proxy.username and self._proxy.password: + # two auth methods available + auth_methods = 2 + packet = struct.pack('!BBB', 0x5, auth_methods, 0x00) # no auth + if self._proxy.username and self._proxy.password: + packet += struct.pack('!B', 0x02) # user/pass auth + + self.sendall(packet) + + packet = self.recvall(2) + version, method = struct.unpack('!BB', packet) + + # check valid response + if version != 0x05: + self.close() + raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + + # no auth methods + if method == 0xFF: + self.close() + raise Socks5Error(method) + + # user/pass auth + if method == 0x01: + username = self._proxy.username + if hasattr(username, 'encode'): + username = username.encode() + password = self._proxy.password + if hasattr(password, 'encode'): + password = password.encode() + packet = struct.pack('!BB', 1, len(username)) + username + packet += struct.pack('!B', len(password)) + password + self.sendall(packet) + + packet = self.recvall(2) + version, status = struct.unpack('!BB', packet) + + if version != 0x01: + self.close() + raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(1, version)) + + if status != 0x00: + self.close() + raise Socks5Error(1) + elif method == 0x00: # no auth + pass + + + packet = struct.pack('!BBB', 5, 1, 0) + if ipaddr is None: + if hasattr(destaddr, 'encode'): + destaddr = destaddr.encode() + packet += struct.pack('!BB', 3, len(destaddr)) + destaddr + else: + packet += struct.pack('!B', 1) + ipaddr + packet += struct.pack('!H', port) + + self.sendall(packet) + + packet = self.recvall(4) + version, status, _, atype = struct.unpack('!BBBB', packet) + + if version != 0x05: + self.close() + raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + + if status != 0x00: + self.close() + raise Socks5Error(status) + + if atype == 0x01: + destaddr = self.recvall(4) + elif atype == 0x03: + alen = struct.unpack('!B', self.recv(1))[0] + destaddr = self.recvall(alen) + elif atype == 0x04: + destaddr = self.recvall(16) + destport = struct.unpack('!H', self.recvall(2))[0] + + def _make_proxy(self, connect_func, address): + if self._proxy.type == ProxyType.SOCKS4: + result = connect_func(self, (self._proxy.host, self._proxy_port)) + if result != 0 and result is not None: + return result + self._setup_socks4(address) + elif self._proxy.type == ProxyType.SOCKS4A: + result = connect_func(self, (self._proxy.host, self._proxy_port)) + if result != 0 and result is not None: + return result + self._setup_socks4(address, is_4a=True) + elif self._proxy.type == ProxyType.SOCKS5: + result = connect_func(self, (self._proxy.host, self._proxy_port)) + if result != 0 and result is not None: + return result + self._setup_socks5(address) + else: + return connect_func(self, address) + + def connect(self, address): + self._make_proxy(_orig_socket.connect, address) + + def connect_ex(self, address): + return self._make_proxy(_orig_socket.connect_ex, address) From adc1dc91745d2b610d5583eef12a33d12f2dbc3c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 23 Apr 2016 18:28:49 +0800 Subject: [PATCH 02/12] [utils,compat] Move struct_pack and struct_unpack to compat.py --- test/test_compat.py | 5 +++++ test/test_utils.py | 4 ---- youtube_dl/compat.py | 23 +++++++++++++++++++++++ youtube_dl/downloader/f4m.py | 4 ++-- youtube_dl/extractor/rtve.py | 4 +++- youtube_dl/swfinterp.py | 6 ++++-- youtube_dl/utils.py | 20 +------------------- 7 files changed, 38 insertions(+), 28 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index 618668210..a5b663e61 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -20,6 +20,7 @@ from youtube_dl.compat import ( compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, + struct_unpack, ) @@ -99,5 +100,9 @@ class TestCompat(unittest.TestCase): self.assertTrue(isinstance(doc.find('chinese').text, compat_str)) self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) + def test_struct_unpack(self): + self.assertEqual(struct_unpack('!B', b'\x00'), (0,)) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index e16a6761b..9092a5ec2 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -55,7 +55,6 @@ from youtube_dl.utils import ( smuggle_url, str_to_int, strip_jsonp, - struct_unpack, timeconvert, unescapeHTML, unified_strdate, @@ -453,9 +452,6 @@ class TestUtil(unittest.TestCase): testPL(5, 2, (2, 99), [2, 3, 4]) testPL(5, 2, (20, 99), []) - def test_struct_unpack(self): - self.assertEqual(struct_unpack('!B', b'\x00'), (0,)) - def test_read_batch_urls(self): f = io.StringIO('''\xef\xbb\xbf foo bar\r diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 0b6c5ca7a..e56cf5c61 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -11,6 +11,7 @@ import re import shlex import shutil import socket +import struct import subprocess import sys import itertools @@ -583,6 +584,26 @@ if sys.version_info >= (3, 0): else: from tokenize import generate_tokens as compat_tokenize_tokenize + +try: + struct.pack('!I', 0) +except TypeError: + # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument + # See https://bugs.python.org/issue19099 + def struct_pack(spec, *args): + if isinstance(spec, compat_str): + spec = spec.encode('ascii') + return struct.pack(spec, *args) + + def struct_unpack(spec, *args): + if isinstance(spec, compat_str): + spec = spec.encode('ascii') + return struct.unpack(spec, *args) +else: + struct_pack = struct.pack + struct_unpack = struct.unpack + + __all__ = [ 'compat_HTMLParser', 'compat_HTTPError', @@ -624,6 +645,8 @@ __all__ = [ 'compat_xml_parse_error', 'compat_xpath', 'shlex_quote', + 'struct_pack', + 'struct_unpack', 'subprocess_check_output', 'workaround_optparse_bug9161', ] diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 664d87543..b282fe3d6 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -12,13 +12,13 @@ from ..compat import ( compat_urlparse, compat_urllib_error, compat_urllib_parse_urlparse, + struct_pack, + struct_unpack, ) from ..utils import ( encodeFilename, fix_xml_ampersands, sanitize_open, - struct_pack, - struct_unpack, xpath_text, ) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 79af47715..f59040877 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -6,6 +6,9 @@ import re import time from .common import InfoExtractor +from ..compat import ( + struct_unpack, +) from ..utils import ( ExtractorError, float_or_none, @@ -13,7 +16,6 @@ from ..utils import ( remove_start, sanitized_Request, std_headers, - struct_unpack, ) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 06c1d6cc1..86b28716c 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -4,10 +4,12 @@ import collections import io import zlib -from .compat import compat_str +from .compat import ( + compat_str, + struct_unpack, +) from .utils import ( ExtractorError, - struct_unpack, ) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index f333e4712..01f5216a2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -26,7 +26,6 @@ import platform import re import ssl import socket -import struct import subprocess import sys import tempfile @@ -53,6 +52,7 @@ from .compat import ( compat_urlparse, compat_xpath, shlex_quote, + struct_pack, ) @@ -1754,24 +1754,6 @@ def escape_url(url): fragment=escape_rfc3986(url_parsed.fragment) ).geturl() -try: - struct.pack('!I', 0) -except TypeError: - # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument - # See https://bugs.python.org/issue19099 - def struct_pack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.pack(spec, *args) - - def struct_unpack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.unpack(spec, *args) -else: - struct_pack = struct.pack - struct_unpack = struct.unpack - def read_batch_urls(batch_fd): def fixup(url): From 1409ad22cae35d35defa31489119115a10e9fc35 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 23 Apr 2016 21:30:06 +0800 Subject: [PATCH 03/12] [socks] Support SOCKS proxies --- youtube_dl/socks.py | 196 ++++++++++++-------------------------------- youtube_dl/utils.py | 63 +++++++++++++- 2 files changed, 115 insertions(+), 144 deletions(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index b0c36a189..95795b5a9 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -1,77 +1,30 @@ -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# For more information, please refer to -# -# Example: -# import socks -# import ftplib -# import socket -# -# socks.patch_socket() -# -# f = ftplib.FTP('ftp.kernel.org') -# f.login() -# print f.retrlines('LIST') -# f.quit() -# -# s = socket.create_connection(('www.google.com', 80)) -# s.sendall('HEAD / HTTP/1.0\r\n\r\n') -# print s.recv(1024) -# s.close() +# Public Domain SOCKS proxy protocol implementation +# Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3 + from __future__ import unicode_literals -import os -import struct + +import collections import socket -import time + +from .compat import ( + struct_pack, + struct_unpack, +) __author__ = 'Timo Schmid ' -_orig_socket = socket.socket -try: - from collections import namedtuple -except ImportError: - from Collections import namedtuple - -try: - from urllib.parse import urlparse -except: - from urlparse import urlparse - -try: - from enum import Enum -except ImportError: - Enum = object +class ProxyError(IOError): + pass -class ProxyError(IOError): pass class Socks4Error(ProxyError): CODES = { 0x5B: 'request rejected or failed', 0x5C: 'request rejected becasue SOCKS server cannot connect to identd on the client', 0x5D: 'request rejected because the client program and identd report different user-ids' } + def __init__(self, code=None, msg=None): if code is not None and msg is None: msg = self.CODES.get(code) @@ -79,6 +32,7 @@ class Socks4Error(ProxyError): msg = 'unknown error' super(Socks4Error, self).__init__(code, msg) + class Socks5Error(Socks4Error): CODES = { 0x01: 'general SOCKS server failure', @@ -93,68 +47,19 @@ class Socks5Error(Socks4Error): 0xFF: 'all offered authentication methods were rejected' } -class ProxyType(Enum): - SOCKS4 = 0 + +class ProxyType(object): + SOCKS4 = 0 SOCKS4A = 1 - SOCKS5 = 2 + SOCKS5 = 2 -Proxy = namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) - -_default_proxy = None - -def setdefaultproxy(proxytype=None, addr=None, port=None, rdns=True, username=None, password=None, allow_env_override=True): - global _default_proxy - if allow_env_override: - all_proxy = os.environ.get('ALL_PROXY', os.environ.get('all_proxy')) - if all_proxy: - all_proxy = urlparse(all_proxy) - if all_proxy.scheme.startswith('socks'): - if all_proxy.scheme == 'socks' or all_proxy.scheme == 'socks4': - proxytype = ProxyType.SOCKS4 - elif all_proxy.scheme == 'socks4a': - proxytype = ProxyType.SOCKS4A - elif all_proxy.scheme == 'socks5': - proxytype = ProxyType.SOCKS5 - addr = all_proxy.hostname - port = all_proxy.port - username = all_proxy.username - password = all_proxy.password - - if proxytype is not None: - _default_proxy = Proxy(proxytype, addr, port, username, password, rdns) - - -def wrap_socket(sock): - return socksocket(_sock=sock._sock) - -def wrap_module(module): - if hasattr(module, 'socket'): - sock = module.socket - if isinstance(sock, socket.socket): - module.socket = sockssocket - elif hasattr(socket, 'socket'): - socket.socket = sockssocket - -def patch_socket(): - import sys - if 'socket' not in sys.modules: - import socket - sys.modules['socket'].socket = sockssocket +Proxy = collections.namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) class sockssocket(socket.socket): - def __init__(self, *args, **kwargs): - self.__proxy = None - if 'proxy' in kwargs: - self.__proxy = kwargs['proxy'] - del kwargs['proxy'] - super(sockssocket, self).__init__(*args, **kwargs) - @property def _proxy(self): - if self.__proxy: - return self.__proxy - return _default_proxy + return self.__proxy @property def _proxy_port(self): @@ -175,7 +80,7 @@ class sockssocket(socket.socket): while len(data) < cnt: cur = self.recv(cnt - len(data)) if not cur: - raise IOError("{0} bytes missing".format(cnt-len(data))) + raise IOError('{0} bytes missing'.format(cnt - len(data))) data += cur return data @@ -186,39 +91,42 @@ class sockssocket(socket.socket): ipaddr = socket.inet_aton(destaddr) except socket.error: if is_4a and self._proxy.remote_dns: - ipaddr = struct.pack('!BBBB', 0, 0, 0, 0xFF) + ipaddr = struct_pack('!BBBB', 0, 0, 0, 0xFF) else: ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) - packet = struct.pack('!BBH', 0x4, 0x1, port) + ipaddr + packet = struct_pack('!BBH', 0x4, 0x1, port) + ipaddr if self._proxy.username: username = self._proxy.username if hasattr(username, 'encode'): username = username.encode() - packet += struct.pack('!{0}s'.format(len(username)+1), username) + packet += struct_pack('!{0}s'.format(len(username) + 1), username) else: packet += b'\x00' if is_4a and self._proxy.remote_dns: if hasattr(destaddr, 'encode'): destaddr = destaddr.encode() - packet += struct.pack('!{0}s'.format(len(destaddr)+1), destaddr) + packet += struct_pack('!{0}s'.format(len(destaddr) + 1), destaddr) self.sendall(packet) packet = self.recvall(8) - nbyte, resp_code, dstport, dsthost = struct.unpack('!BBHI', packet) + nbyte, resp_code, dstport, dsthost = struct_unpack('!BBHI', packet) # check valid response if nbyte != 0x00: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(0, nbyte)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(0, nbyte)) # access granted if resp_code != 0x5a: self.close() raise Socks4Error(resp_code) + return (dsthost, dstport) + def _setup_socks5(self, address): destaddr, port = address @@ -234,19 +142,20 @@ class sockssocket(socket.socket): if self._proxy.username and self._proxy.password: # two auth methods available auth_methods = 2 - packet = struct.pack('!BBB', 0x5, auth_methods, 0x00) # no auth + packet = struct_pack('!BBB', 0x5, auth_methods, 0x00) # no auth if self._proxy.username and self._proxy.password: - packet += struct.pack('!B', 0x02) # user/pass auth + packet += struct_pack('!B', 0x02) # user/pass auth self.sendall(packet) packet = self.recvall(2) - version, method = struct.unpack('!BB', packet) + version, method = struct_unpack('!BB', packet) # check valid response if version != 0x05: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) # no auth methods if method == 0xFF: @@ -261,41 +170,42 @@ class sockssocket(socket.socket): password = self._proxy.password if hasattr(password, 'encode'): password = password.encode() - packet = struct.pack('!BB', 1, len(username)) + username - packet += struct.pack('!B', len(password)) + password + packet = struct_pack('!BB', 1, len(username)) + username + packet += struct_pack('!B', len(password)) + password self.sendall(packet) packet = self.recvall(2) - version, status = struct.unpack('!BB', packet) + version, status = struct_unpack('!BB', packet) if version != 0x01: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(1, version)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(1, version)) if status != 0x00: self.close() raise Socks5Error(1) - elif method == 0x00: # no auth + elif method == 0x00: # no auth pass - - packet = struct.pack('!BBB', 5, 1, 0) + packet = struct_pack('!BBB', 5, 1, 0) if ipaddr is None: if hasattr(destaddr, 'encode'): destaddr = destaddr.encode() - packet += struct.pack('!BB', 3, len(destaddr)) + destaddr + packet += struct_pack('!BB', 3, len(destaddr)) + destaddr else: - packet += struct.pack('!B', 1) + ipaddr - packet += struct.pack('!H', port) + packet += struct_pack('!B', 1) + ipaddr + packet += struct_pack('!H', port) self.sendall(packet) packet = self.recvall(4) - version, status, _, atype = struct.unpack('!BBBB', packet) + version, status, _, atype = struct_unpack('!BBBB', packet) if version != 0x05: self.close() - raise ProxyError(0, "Invalid response from server. Expected {0:02x} got {1:02x}".format(5, version)) + raise ProxyError( + 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) if status != 0x00: self.close() @@ -304,11 +214,13 @@ class sockssocket(socket.socket): if atype == 0x01: destaddr = self.recvall(4) elif atype == 0x03: - alen = struct.unpack('!B', self.recv(1))[0] + alen = struct_unpack('!B', self.recv(1))[0] destaddr = self.recvall(alen) elif atype == 0x04: destaddr = self.recvall(16) - destport = struct.unpack('!H', self.recvall(2))[0] + destport = struct_unpack('!H', self.recvall(2))[0] + + return (destaddr, destport) def _make_proxy(self, connect_func, address): if self._proxy.type == ProxyType.SOCKS4: @@ -330,7 +242,7 @@ class sockssocket(socket.socket): return connect_func(self, address) def connect(self, address): - self._make_proxy(_orig_socket.connect, address) + self._make_proxy(socket.socket.connect, address) def connect_ex(self, address): - return self._make_proxy(_orig_socket.connect_ex, address) + return self._make_proxy(socket.socket.connect_ex, address) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 01f5216a2..181184879 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -55,6 +55,11 @@ from .compat import ( struct_pack, ) +from .socks import ( + ProxyType, + sockssocket, +) + # This is not clearly defined otherwise compiled_regex_type = type(re.compile('')) @@ -745,8 +750,15 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): self._params = params def http_open(self, req): + conn_class = compat_http_client.HTTPConnection + + socks_proxy = req.headers.get('Ytdl-socks-proxy') + if socks_proxy: + conn_class = make_socks_conn_class(conn_class, socks_proxy) + del req.headers['Ytdl-socks-proxy'] + return self.do_open(functools.partial( - _create_http_connection, self, compat_http_client.HTTPConnection, False), + _create_http_connection, self, conn_class, False), req) @staticmethod @@ -842,6 +854,41 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): https_response = http_response +def make_socks_conn_class(base_class, socks_proxy): + assert issubclass(base_class, ( + compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection)) + + url_components = compat_urlparse.urlparse(socks_proxy) + if url_components.scheme.lower() == 'socks5': + socks_type = ProxyType.SOCKS5 + elif url_components.scheme.lower() in ('socks', 'socks4'): + socks_type = ProxyType.SOCKS4 + + proxy_args = ( + socks_type, + url_components.hostname, url_components.port or 1080, + True, # Remote DNS + url_components.username, url_components.password + ) + + class SocksConnection(base_class): + def connect(self): + self.sock = sockssocket() + self.sock.setproxy(*proxy_args) + if type(self.timeout) in (int, float): + self.sock.settimeout(self.timeout) + self.sock.connect((self.host, self.port)) + + if isinstance(self, compat_http_client.HTTPSConnection): + if hasattr(self, '_context'): # Python > 2.6 + self.sock = self._context.wrap_socket( + self.sock, server_hostname=self.host) + else: + self.sock = ssl.wrap_socket(self.sock) + + return SocksConnection + + class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): def __init__(self, params, https_conn_class=None, *args, **kwargs): compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs) @@ -850,12 +897,20 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): def https_open(self, req): kwargs = {} + conn_class = self._https_conn_class + if hasattr(self, '_context'): # python > 2.6 kwargs['context'] = self._context if hasattr(self, '_check_hostname'): # python 3.x kwargs['check_hostname'] = self._check_hostname + + socks_proxy = req.headers.get('Ytdl-socks-proxy') + if socks_proxy: + conn_class = make_socks_conn_class(conn_class, socks_proxy) + del req.headers['Ytdl-socks-proxy'] + return self.do_open(functools.partial( - _create_http_connection, self, self._https_conn_class, True), + _create_http_connection, self, conn_class, True), req, **kwargs) @@ -2673,6 +2728,10 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): if proxy == '__noproxy__': return None # No Proxy + if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks5'): + req.add_header('Ytdl-socks-proxy', proxy) + # youtube-dl's http/https handlers do wrapping the socket with socks + return None return compat_urllib_request.ProxyHandler.proxy_open( self, req, proxy, type) From 4953d304967c817c74be20e11782bff1456801bd Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 23 Apr 2016 21:30:44 +0800 Subject: [PATCH 04/12] [test/test_socks] Add tests for SOCKS proxies --- .gitignore | 1 + Makefile | 2 +- test/helper.py | 5 ++++ test/test_socks.py | 71 ++++++++++++++++++++++++++++++++++++++++++++++ tox.ini | 1 + 5 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 test/test_socks.py diff --git a/.gitignore b/.gitignore index 72c10425d..0e7128551 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ updates_key.pem *.part *.swp test/testdata +test/local_parameters.json .tox youtube-dl.zsh .idea diff --git a/Makefile b/Makefile index 06cffcb71..173c70d0d 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ test: ot: offlinetest offlinetest: codetest - $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py + $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py tar: youtube-dl.tar.gz diff --git a/test/helper.py b/test/helper.py index b8e22c5cb..dfee217a9 100644 --- a/test/helper.py +++ b/test/helper.py @@ -24,8 +24,13 @@ from youtube_dl.utils import ( def get_params(override=None): PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") + LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "local_parameters.json") with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) + if os.path.exists(LOCAL_PARAMETERS_FILE): + with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: + parameters.update(json.load(pf)) if override: parameters.update(override) return parameters diff --git a/test/test_socks.py b/test/test_socks.py new file mode 100644 index 000000000..92574c6fd --- /dev/null +++ b/test/test_socks.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# coding: utf-8 +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import (FakeYDL, get_params) +from youtube_dl.compat import compat_urllib_request + + +class TestSocks(unittest.TestCase): + @staticmethod + def _check_params(attrs): + params = get_params() + for attr in attrs: + if attr not in params: + print('Missing %s. Skipping.' % attr) + return + return params + + def test_proxy_http(self): + params = self._check_params(['primary_proxy', 'primary_server_ip']) + if params is None: + return + ydl = FakeYDL({ + 'proxy': params['primary_proxy'] + }) + self.assertEqual( + ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8'), + params['primary_server_ip']) + + def test_proxy_https(self): + params = self._check_params(['primary_proxy', 'primary_server_ip']) + if params is None: + return + ydl = FakeYDL({ + 'proxy': params['primary_proxy'] + }) + self.assertEqual( + ydl.urlopen('https://yt-dl.org/ip').read().decode('utf-8'), + params['primary_server_ip']) + + def test_secondary_proxy_http(self): + params = self._check_params(['secondary_proxy', 'secondary_server_ip']) + if params is None: + return + ydl = FakeYDL() + req = compat_urllib_request.Request('http://yt-dl.org/ip') + req.add_header('Ytdl-request-proxy', params['secondary_proxy']) + self.assertEqual( + ydl.urlopen(req).read().decode('utf-8'), + params['secondary_server_ip']) + + def test_secondary_proxy_https(self): + params = self._check_params(['secondary_proxy', 'secondary_server_ip']) + if params is None: + return + ydl = FakeYDL() + req = compat_urllib_request.Request('https://yt-dl.org/ip') + req.add_header('Ytdl-request-proxy', params['secondary_proxy']) + self.assertEqual( + ydl.urlopen(req).read().decode('utf-8'), + params['secondary_server_ip']) + + +if __name__ == '__main__': + unittest.main() diff --git a/tox.ini b/tox.ini index 2d7134005..9c4e4a3d1 100644 --- a/tox.ini +++ b/tox.ini @@ -9,5 +9,6 @@ passenv = HOME defaultargs = test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py + --exclude test_socks.py commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html # test.test_download:TestDownload.test_NowVideo From 0ab2d374812702f2bc6a5f8f9c7114d86fb21743 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 3 May 2016 15:11:05 +0800 Subject: [PATCH 05/12] [socks] Eliminate magic constants and improve --- youtube_dl/socks.py | 289 ++++++++++++++++++++++++-------------------- 1 file changed, 157 insertions(+), 132 deletions(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index 95795b5a9..0e3dd7893 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -3,37 +3,87 @@ from __future__ import unicode_literals +# References: +# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol +# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol +# SOCKS5 protocol https://tools.ietf.org/html/rfc1928 +# SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929 + import collections import socket from .compat import ( + compat_ord, struct_pack, struct_unpack, ) __author__ = 'Timo Schmid ' +SOCKS4_VERSION = 4 +SOCKS4_REPLY_VERSION = 0x00 +# Excerpt from SOCKS4A protocol: +# if the client cannot resolve the destination host's domain name to find its +# IP address, it should set the first three bytes of DSTIP to NULL and the last +# byte to a non-zero value. +SOCKS4_DEFAULT_DSTIP = struct_pack('!BBBB', 0, 0, 0, 0xFF) + +SOCKS5_VERSION = 5 +SOCKS5_USER_AUTH_VERSION = 0x01 +SOCKS5_USER_AUTH_SUCCESS = 0x00 + + +class Socks4Command(object): + CMD_CONNECT = 0x01 + CMD_BIND = 0x02 + + +class Socks5Command(Socks4Command): + CMD_UDP_ASSOCIATE = 0x03 + + +class Socks5Auth(object): + AUTH_NONE = 0x00 + AUTH_GSSAPI = 0x01 + AUTH_USER_PASS = 0x02 + AUTH_NO_ACCEPTABLE = 0xFF # For server response + + +class Socks5AddressType(object): + ATYP_IPV4 = 0x01 + ATYP_DOMAINNAME = 0x03 + ATYP_IPV6 = 0x04 + class ProxyError(IOError): - pass - - -class Socks4Error(ProxyError): - CODES = { - 0x5B: 'request rejected or failed', - 0x5C: 'request rejected becasue SOCKS server cannot connect to identd on the client', - 0x5D: 'request rejected because the client program and identd report different user-ids' - } + ERR_SUCCESS = 0x00 def __init__(self, code=None, msg=None): if code is not None and msg is None: - msg = self.CODES.get(code) - if msg is None: - msg = 'unknown error' - super(Socks4Error, self).__init__(code, msg) + msg = self.CODES.get(code) and 'unknown error' + super(ProxyError, self).__init__(code, msg) -class Socks5Error(Socks4Error): +class InvalidVersionError(ProxyError): + def __init__(self, expected_version, got_version): + msg = ('Invalid response version from server. Expected {0:02x} got ' + '{1:02x}'.format(expected_version, got_version)) + super(InvalidVersionError, self).__init__(0, msg) + + +class Socks4Error(ProxyError): + ERR_SUCCESS = 90 + + CODES = { + 91: 'request rejected or failed', + 92: 'request rejected becasue SOCKS server cannot connect to identd on the client', + 93: 'request rejected because the client program and identd report different user-ids' + } + + +class Socks5Error(ProxyError): + ERR_GENERAL_FAILURE = 0x01 + CODES = { 0x01: 'general SOCKS server failure', 0x02: 'connection not allowed by ruleset', @@ -53,27 +103,19 @@ class ProxyType(object): SOCKS4A = 1 SOCKS5 = 2 -Proxy = collections.namedtuple('Proxy', ('type', 'host', 'port', 'username', 'password', 'remote_dns')) +Proxy = collections.namedtuple('Proxy', ( + 'type', 'host', 'port', 'username', 'password', 'remote_dns')) class sockssocket(socket.socket): - @property - def _proxy(self): - return self.__proxy + def __init__(self, *args, **kwargs): + self._proxy = None + super(sockssocket, self).__init__(*args, **kwargs) - @property - def _proxy_port(self): - if self._proxy: - if self._proxy.port: - return self._proxy.port - return 1080 - return None + def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None): + assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5) - def setproxy(self, proxytype=None, addr=None, port=None, rdns=True, username=None, password=None): - if proxytype is None: - self.__proxy = None - else: - self.__proxy = Proxy(proxytype, addr, port, username, password, rdns) + self._proxy = Proxy(proxytype, addr, port, username, password, rdns) def recvall(self, cnt): data = b'' @@ -84,163 +126,146 @@ class sockssocket(socket.socket): data += cur return data + def _recv_bytes(self, cnt): + data = self.recvall(cnt) + return struct_unpack('!{0}B'.format(cnt), data) + + @staticmethod + def _len_and_data(data): + return struct_pack('!B', len(data)) + data + + def _check_response_version(self, expected_version, got_version): + if got_version != expected_version: + self.close() + raise InvalidVersionError(expected_version, got_version) + + def _resolve_address(self, destaddr, default, use_remote_dns): + try: + return socket.inet_aton(destaddr) + except socket.error: + if use_remote_dns and self._proxy.remote_dns: + return default + else: + return socket.inet_aton(socket.gethostbyname(destaddr)) + def _setup_socks4(self, address, is_4a=False): destaddr, port = address - try: - ipaddr = socket.inet_aton(destaddr) - except socket.error: - if is_4a and self._proxy.remote_dns: - ipaddr = struct_pack('!BBBB', 0, 0, 0, 0xFF) - else: - ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a) - packet = struct_pack('!BBH', 0x4, 0x1, port) + ipaddr - if self._proxy.username: - username = self._proxy.username - if hasattr(username, 'encode'): - username = username.encode() - packet += struct_pack('!{0}s'.format(len(username) + 1), username) - else: - packet += b'\x00' + packet = struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr + + username = (self._proxy.username or '').encode('utf-8') + packet += username + b'\x00' if is_4a and self._proxy.remote_dns: - if hasattr(destaddr, 'encode'): - destaddr = destaddr.encode() - packet += struct_pack('!{0}s'.format(len(destaddr) + 1), destaddr) + packet += destaddr.encode('utf-8') + b'\x00' self.sendall(packet) - packet = self.recvall(8) - nbyte, resp_code, dstport, dsthost = struct_unpack('!BBHI', packet) + version, resp_code, dstport, dsthost = struct_unpack('!BBHI', self.recvall(8)) - # check valid response - if nbyte != 0x00: - self.close() - raise ProxyError( - 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(0, nbyte)) + self._check_response_version(SOCKS4_REPLY_VERSION, version) - # access granted - if resp_code != 0x5a: + if resp_code != Socks4Error.ERR_SUCCESS: self.close() raise Socks4Error(resp_code) return (dsthost, dstport) - def _setup_socks5(self, address): - destaddr, port = address + def _setup_socks4a(self, address): + self._setup_socks4(address, is_4a=True) - try: - ipaddr = socket.inet_aton(destaddr) - except socket.error: - if self._proxy.remote_dns: - ipaddr = None - else: - ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + def _socks5_auth(self): + packet = struct_pack('!B', SOCKS5_VERSION) - auth_methods = 1 + auth_methods = [Socks5Auth.AUTH_NONE] if self._proxy.username and self._proxy.password: - # two auth methods available - auth_methods = 2 - packet = struct_pack('!BBB', 0x5, auth_methods, 0x00) # no auth - if self._proxy.username and self._proxy.password: - packet += struct_pack('!B', 0x02) # user/pass auth + auth_methods.append(Socks5Auth.AUTH_USER_PASS) + + packet += struct_pack('!B', len(auth_methods)) + packet += struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods) self.sendall(packet) - packet = self.recvall(2) - version, method = struct_unpack('!BB', packet) + version, method = self._recv_bytes(2) - # check valid response - if version != 0x05: - self.close() - raise ProxyError( - 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) + self._check_response_version(SOCKS5_VERSION, version) - # no auth methods - if method == 0xFF: + if method == Socks5Auth.AUTH_NO_ACCEPTABLE: self.close() raise Socks5Error(method) - # user/pass auth - if method == 0x01: - username = self._proxy.username - if hasattr(username, 'encode'): - username = username.encode() - password = self._proxy.password - if hasattr(password, 'encode'): - password = password.encode() - packet = struct_pack('!BB', 1, len(username)) + username - packet += struct_pack('!B', len(password)) + password + if method == Socks5Auth.AUTH_USER_PASS: + username = self._proxy.username.encode('utf-8') + password = self._proxy.password.encode('utf-8') + packet = struct_pack('!B', SOCKS5_USER_AUTH_VERSION) + packet += self._len_and_data(username) + self._len_and_data(password) self.sendall(packet) - packet = self.recvall(2) - version, status = struct_unpack('!BB', packet) + version, status = self._recv_bytes(2) - if version != 0x01: - self.close() - raise ProxyError( - 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(1, version)) + self._check_response_version(SOCKS5_USER_AUTH_VERSION, version) - if status != 0x00: + if status != SOCKS5_USER_AUTH_SUCCESS: self.close() - raise Socks5Error(1) - elif method == 0x00: # no auth + raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE) + elif method == Socks5Auth.AUTH_NONE: pass - packet = struct_pack('!BBB', 5, 1, 0) + def _setup_socks5(self, address): + destaddr, port = address + + ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True) + + self._socks5_auth() + + reserved = 0 + packet = struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) if ipaddr is None: - if hasattr(destaddr, 'encode'): - destaddr = destaddr.encode() - packet += struct_pack('!BB', 3, len(destaddr)) + destaddr + destaddr = destaddr.encode('utf-8') + packet += struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) + packet += self._len_and_data(destaddr) else: - packet += struct_pack('!B', 1) + ipaddr + packet += struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr packet += struct_pack('!H', port) self.sendall(packet) - packet = self.recvall(4) - version, status, _, atype = struct_unpack('!BBBB', packet) + version, status, reserved, atype = self._recv_bytes(4) - if version != 0x05: - self.close() - raise ProxyError( - 0, 'Invalid response from server. Expected {0:02x} got {1:02x}'.format(5, version)) + self._check_response_version(SOCKS5_VERSION, version) - if status != 0x00: + if status != Socks5Error.ERR_SUCCESS: self.close() raise Socks5Error(status) - if atype == 0x01: + if atype == Socks5AddressType.ATYP_IPV4: destaddr = self.recvall(4) - elif atype == 0x03: - alen = struct_unpack('!B', self.recv(1))[0] + elif atype == Socks5AddressType.ATYP_DOMAINNAME: + alen = compat_ord(self.recv(1)) destaddr = self.recvall(alen) - elif atype == 0x04: + elif atype == Socks5AddressType.ATYP_IPV6: destaddr = self.recvall(16) destport = struct_unpack('!H', self.recvall(2))[0] return (destaddr, destport) def _make_proxy(self, connect_func, address): - if self._proxy.type == ProxyType.SOCKS4: - result = connect_func(self, (self._proxy.host, self._proxy_port)) - if result != 0 and result is not None: - return result - self._setup_socks4(address) - elif self._proxy.type == ProxyType.SOCKS4A: - result = connect_func(self, (self._proxy.host, self._proxy_port)) - if result != 0 and result is not None: - return result - self._setup_socks4(address, is_4a=True) - elif self._proxy.type == ProxyType.SOCKS5: - result = connect_func(self, (self._proxy.host, self._proxy_port)) - if result != 0 and result is not None: - return result - self._setup_socks5(address) - else: + if not self._proxy: return connect_func(self, address) + result = connect_func(self, (self._proxy.host, self._proxy.port)) + if result != 0 and result is not None: + return result + setup_funcs = { + ProxyType.SOCKS4: self._setup_socks4, + ProxyType.SOCKS4A: self._setup_socks4a, + ProxyType.SOCKS5: self._setup_socks5, + } + setup_funcs[self._proxy.type](address) + return result + def connect(self, address): self._make_proxy(socket.socket.connect, address) From a526eff0597af13e9637bfa81dbc7c57977862cf Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 3 May 2016 15:15:32 +0800 Subject: [PATCH 06/12] [utils] Register SOCKS protocols in urllib and support SOCKS4A --- youtube_dl/YoutubeDL.py | 3 +++ youtube_dl/utils.py | 11 ++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 055433362..0f81de386 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -64,6 +64,7 @@ from .utils import ( PostProcessingError, preferredencoding, prepend_extension, + register_socks_protocols, render_table, replace_extension, SameFileError, @@ -361,6 +362,8 @@ class YoutubeDL(object): for ph in self.params.get('progress_hooks', []): self.add_progress_hook(ph) + register_socks_protocols() + def warn_if_short_id(self, argv): # short YouTube ID starting with dash? idxs = [ diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 181184879..41b16d614 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -61,6 +61,13 @@ from .socks import ( ) +def register_socks_protocols(): + # "Register" SOCKS protocols + for scheme in ('socks', 'socks4', 'socks4a', 'socks5'): + if scheme not in compat_urlparse.uses_netloc: + compat_urlparse.uses_netloc.append(scheme) + + # This is not clearly defined otherwise compiled_regex_type = type(re.compile('')) @@ -863,6 +870,8 @@ def make_socks_conn_class(base_class, socks_proxy): socks_type = ProxyType.SOCKS5 elif url_components.scheme.lower() in ('socks', 'socks4'): socks_type = ProxyType.SOCKS4 + elif url_components.scheme.lower() == 'socks4a': + socks_type = ProxyType.SOCKS4A proxy_args = ( socks_type, @@ -2728,7 +2737,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): if proxy == '__noproxy__': return None # No Proxy - if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks5'): + if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): req.add_header('Ytdl-socks-proxy', proxy) # youtube-dl's http/https handlers do wrapping the socket with socks return None From ed8f6b736bb97048d85337e716d2c733ecd9783f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 3 May 2016 15:37:30 +0800 Subject: [PATCH 07/12] [utils] Add rationale for register_socks_protocols --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 41b16d614..21421770f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -63,6 +63,8 @@ from .socks import ( def register_socks_protocols(): # "Register" SOCKS protocols + # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904 + # URLs with protocols not in urlparse.uses_netloc are not handled correctly for scheme in ('socks', 'socks4', 'socks4a', 'socks5'): if scheme not in compat_urlparse.uses_netloc: compat_urlparse.uses_netloc.append(scheme) From 56a1748cfbfc1be65e47377edcca405c39096043 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 3 May 2016 16:50:16 +0800 Subject: [PATCH 08/12] [compat] Rename struct_(un)pack to compat_struct_(un)pack --- test/test_compat.py | 4 ++-- youtube_dl/compat.py | 12 ++++++------ youtube_dl/downloader/f4m.py | 14 +++++++------- youtube_dl/extractor/rtve.py | 4 ++-- youtube_dl/socks.py | 32 ++++++++++++++++---------------- youtube_dl/swfinterp.py | 14 +++++++------- youtube_dl/utils.py | 4 ++-- 7 files changed, 42 insertions(+), 42 deletions(-) diff --git a/test/test_compat.py b/test/test_compat.py index a5b663e61..835ee2c91 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -17,10 +17,10 @@ from youtube_dl.compat import ( compat_expanduser, compat_shlex_split, compat_str, + compat_struct_unpack, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, - struct_unpack, ) @@ -101,7 +101,7 @@ class TestCompat(unittest.TestCase): self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) def test_struct_unpack(self): - self.assertEqual(struct_unpack('!B', b'\x00'), (0,)) + self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,)) if __name__ == '__main__': diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index e56cf5c61..c02abaf56 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -590,18 +590,18 @@ try: except TypeError: # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument # See https://bugs.python.org/issue19099 - def struct_pack(spec, *args): + def compat_struct_pack(spec, *args): if isinstance(spec, compat_str): spec = spec.encode('ascii') return struct.pack(spec, *args) - def struct_unpack(spec, *args): + def compat_struct_unpack(spec, *args): if isinstance(spec, compat_str): spec = spec.encode('ascii') return struct.unpack(spec, *args) else: - struct_pack = struct.pack - struct_unpack = struct.unpack + compat_struct_pack = struct.pack + compat_struct_unpack = struct.unpack __all__ = [ @@ -628,6 +628,8 @@ __all__ = [ 'compat_shlex_split', 'compat_socket_create_connection', 'compat_str', + 'compat_struct_pack', + 'compat_struct_unpack', 'compat_subprocess_get_DEVNULL', 'compat_tokenize_tokenize', 'compat_urllib_error', @@ -645,8 +647,6 @@ __all__ = [ 'compat_xml_parse_error', 'compat_xpath', 'shlex_quote', - 'struct_pack', - 'struct_unpack', 'subprocess_check_output', 'workaround_optparse_bug9161', ] diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index b282fe3d6..3d9337afa 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -12,8 +12,8 @@ from ..compat import ( compat_urlparse, compat_urllib_error, compat_urllib_parse_urlparse, - struct_pack, - struct_unpack, + compat_struct_pack, + compat_struct_unpack, ) from ..utils import ( encodeFilename, @@ -31,13 +31,13 @@ class FlvReader(io.BytesIO): # Utility functions for reading numbers and strings def read_unsigned_long_long(self): - return struct_unpack('!Q', self.read(8))[0] + return compat_struct_unpack('!Q', self.read(8))[0] def read_unsigned_int(self): - return struct_unpack('!I', self.read(4))[0] + return compat_struct_unpack('!I', self.read(4))[0] def read_unsigned_char(self): - return struct_unpack('!B', self.read(1))[0] + return compat_struct_unpack('!B', self.read(1))[0] def read_string(self): res = b'' @@ -194,11 +194,11 @@ def build_fragments_list(boot_info): def write_unsigned_int(stream, val): - stream.write(struct_pack('!I', val)) + stream.write(compat_struct_pack('!I', val)) def write_unsigned_int_24(stream, val): - stream.write(struct_pack('!I', val)[1:]) + stream.write(compat_struct_pack('!I', val)[1:]) def write_flv_header(stream): diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index f59040877..edd0d108e 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -7,7 +7,7 @@ import time from .common import InfoExtractor from ..compat import ( - struct_unpack, + compat_struct_unpack, ) from ..utils import ( ExtractorError, @@ -23,7 +23,7 @@ def _decrypt_url(png): encrypted_data = base64.b64decode(png.encode('utf-8')) text_index = encrypted_data.find(b'tEXt') text_chunk = encrypted_data[text_index - 4:] - length = struct_unpack('!I', text_chunk[:4])[0] + length = compat_struct_unpack('!I', text_chunk[:4])[0] # Use bytearray to get integers when iterating in both python 2.x and 3.x data = bytearray(text_chunk[8:8 + length]) data = [chr(b) for b in data if b != 0] diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index 0e3dd7893..a5b27fea7 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -14,8 +14,8 @@ import socket from .compat import ( compat_ord, - struct_pack, - struct_unpack, + compat_struct_pack, + compat_struct_unpack, ) __author__ = 'Timo Schmid ' @@ -26,7 +26,7 @@ SOCKS4_REPLY_VERSION = 0x00 # if the client cannot resolve the destination host's domain name to find its # IP address, it should set the first three bytes of DSTIP to NULL and the last # byte to a non-zero value. -SOCKS4_DEFAULT_DSTIP = struct_pack('!BBBB', 0, 0, 0, 0xFF) +SOCKS4_DEFAULT_DSTIP = compat_struct_pack('!BBBB', 0, 0, 0, 0xFF) SOCKS5_VERSION = 5 SOCKS5_USER_AUTH_VERSION = 0x01 @@ -128,11 +128,11 @@ class sockssocket(socket.socket): def _recv_bytes(self, cnt): data = self.recvall(cnt) - return struct_unpack('!{0}B'.format(cnt), data) + return compat_struct_unpack('!{0}B'.format(cnt), data) @staticmethod def _len_and_data(data): - return struct_pack('!B', len(data)) + data + return compat_struct_pack('!B', len(data)) + data def _check_response_version(self, expected_version, got_version): if got_version != expected_version: @@ -153,7 +153,7 @@ class sockssocket(socket.socket): ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a) - packet = struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr + packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr username = (self._proxy.username or '').encode('utf-8') packet += username + b'\x00' @@ -163,7 +163,7 @@ class sockssocket(socket.socket): self.sendall(packet) - version, resp_code, dstport, dsthost = struct_unpack('!BBHI', self.recvall(8)) + version, resp_code, dstport, dsthost = compat_struct_unpack('!BBHI', self.recvall(8)) self._check_response_version(SOCKS4_REPLY_VERSION, version) @@ -177,14 +177,14 @@ class sockssocket(socket.socket): self._setup_socks4(address, is_4a=True) def _socks5_auth(self): - packet = struct_pack('!B', SOCKS5_VERSION) + packet = compat_struct_pack('!B', SOCKS5_VERSION) auth_methods = [Socks5Auth.AUTH_NONE] if self._proxy.username and self._proxy.password: auth_methods.append(Socks5Auth.AUTH_USER_PASS) - packet += struct_pack('!B', len(auth_methods)) - packet += struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods) + packet += compat_struct_pack('!B', len(auth_methods)) + packet += compat_struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods) self.sendall(packet) @@ -199,7 +199,7 @@ class sockssocket(socket.socket): if method == Socks5Auth.AUTH_USER_PASS: username = self._proxy.username.encode('utf-8') password = self._proxy.password.encode('utf-8') - packet = struct_pack('!B', SOCKS5_USER_AUTH_VERSION) + packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION) packet += self._len_and_data(username) + self._len_and_data(password) self.sendall(packet) @@ -221,14 +221,14 @@ class sockssocket(socket.socket): self._socks5_auth() reserved = 0 - packet = struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) + packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) if ipaddr is None: destaddr = destaddr.encode('utf-8') - packet += struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) + packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) packet += self._len_and_data(destaddr) else: - packet += struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr - packet += struct_pack('!H', port) + packet += compat_struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr + packet += compat_struct_pack('!H', port) self.sendall(packet) @@ -247,7 +247,7 @@ class sockssocket(socket.socket): destaddr = self.recvall(alen) elif atype == Socks5AddressType.ATYP_IPV6: destaddr = self.recvall(16) - destport = struct_unpack('!H', self.recvall(2))[0] + destport = compat_struct_unpack('!H', self.recvall(2))[0] return (destaddr, destport) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 86b28716c..7cf490aa4 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -6,7 +6,7 @@ import zlib from .compat import ( compat_str, - struct_unpack, + compat_struct_unpack, ) from .utils import ( ExtractorError, @@ -25,17 +25,17 @@ def _extract_tags(file_contents): file_contents[:1]) # Determine number of bits in framesize rectangle - framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3 + framesize_nbits = compat_struct_unpack('!B', content[:1])[0] >> 3 framesize_len = (5 + 4 * framesize_nbits + 7) // 8 pos = framesize_len + 2 + 2 while pos < len(content): - header16 = struct_unpack('> 6 tag_len = header16 & 0x3f if tag_len == 0x3f: - tag_len = struct_unpack('= 0x80) else b'\x00' - return struct_unpack(' Date: Thu, 5 May 2016 17:09:13 +0800 Subject: [PATCH 09/12] [test/test_socks] Test with local SOCKS servers --- .gitignore | 1 + .travis.yml | 3 +++ devscripts/install_srelay.sh | 8 +++++++ test/test_socks.py | 42 +++++++++++++++++++++++++++++++++--- 4 files changed, 51 insertions(+), 3 deletions(-) create mode 100755 devscripts/install_srelay.sh diff --git a/.gitignore b/.gitignore index 0e7128551..d5f216b5f 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ test/local_parameters.json youtube-dl.zsh .idea .idea/* +tmp/ diff --git a/.travis.yml b/.travis.yml index cc21fae8f..998995845 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,9 @@ python: - "3.4" - "3.5" sudo: false +install: + - bash ./devscripts/install_srelay.sh + - export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6 script: nosetests test --verbose notifications: email: diff --git a/devscripts/install_srelay.sh b/devscripts/install_srelay.sh new file mode 100755 index 000000000..33ce8a3f7 --- /dev/null +++ b/devscripts/install_srelay.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +mkdir -p tmp && cd tmp +wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz +tar zxvf srelay-0.4.8b6.tar.gz +cd srelay-0.4.8b6 +./configure +make diff --git a/test/test_socks.py b/test/test_socks.py index 92574c6fd..dc9b8d276 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -8,11 +8,20 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import (FakeYDL, get_params) -from youtube_dl.compat import compat_urllib_request +import random +import subprocess + +from test.helper import ( + FakeYDL, + get_params, +) +from youtube_dl.compat import ( + compat_str, + compat_urllib_request, +) -class TestSocks(unittest.TestCase): +class TestMultipleSocks(unittest.TestCase): @staticmethod def _check_params(attrs): params = get_params() @@ -67,5 +76,32 @@ class TestSocks(unittest.TestCase): params['secondary_server_ip']) +class TestSocks(unittest.TestCase): + def setUp(self): + self.port = random.randint(49152, 65535) + self.server_process = subprocess.Popen([ + 'srelay', '-f', '-i', '127.0.0.1:%d' % self.port], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + def tearDown(self): + self.server_process.terminate() + self.server_process.communicate() + + def _get_ip(self, protocol): + ydl = FakeYDL({ + 'proxy': '%s://127.0.0.1:%d' % (protocol, self.port), + }) + return ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8') + + def test_socks4(self): + self.assertTrue(isinstance(self._get_ip('socks4'), compat_str)) + + def test_socks4a(self): + self.assertTrue(isinstance(self._get_ip('socks4a'), compat_str)) + + def test_socks5(self): + self.assertTrue(isinstance(self._get_ip('socks5'), compat_str)) + + if __name__ == '__main__': unittest.main() From 47a6539a066520be16123f27f89cdb481744fe6c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 8 May 2016 15:14:56 +0800 Subject: [PATCH 10/12] [socks] Remove a superfluous clause --- youtube_dl/socks.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index a5b27fea7..fd49d7435 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -210,8 +210,6 @@ class sockssocket(socket.socket): if status != SOCKS5_USER_AUTH_SUCCESS: self.close() raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE) - elif method == Socks5Auth.AUTH_NONE: - pass def _setup_socks5(self, address): destaddr, port = address From 1b53d85d5a9a7fba5c009b7323b3bc16d858853e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 8 May 2016 15:15:58 +0800 Subject: [PATCH 11/12] [options] Update --proxy description for SOCKS proxies --- youtube_dl/options.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index d1f8d1331..38efd292d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -188,7 +188,10 @@ def parseOpts(overrideArguments=None): network.add_option( '--proxy', dest='proxy', default=None, metavar='URL', - help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') + help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable experimental ' + 'SOCKS proxy, specify a proper scheme. For example ' + 'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") ' + 'for direct connection') network.add_option( '--socket-timeout', dest='socket_timeout', type=float, default=None, metavar='SECONDS', From eb87b65b97185be25a3a98f2977d42f2126e345f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 8 May 2016 15:16:32 +0800 Subject: [PATCH 12/12] [test/test_socks] Use a different port range Seems on Travis CI, ports in the original range are often used. --- test/test_socks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_socks.py b/test/test_socks.py index dc9b8d276..d07003ceb 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -78,7 +78,7 @@ class TestMultipleSocks(unittest.TestCase): class TestSocks(unittest.TestCase): def setUp(self): - self.port = random.randint(49152, 65535) + self.port = random.randint(20000, 30000) self.server_process = subprocess.Popen([ 'srelay', '-f', '-i', '127.0.0.1:%d' % self.port], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)