From 76d7235f08046dfb82624f3d30d73678eef2103e Mon Sep 17 00:00:00 2001 From: Sepero Date: Tue, 30 Oct 2012 21:55:17 -0300 Subject: [PATCH 1/7] retries cannot be None, because a default was set retries cannot be None, because a default of 10 was set in add_option --- youtube_dl/__init__.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 3aa7bde12..0b223c0cb 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -441,11 +441,10 @@ def _real_main(): if numeric_limit is None: parser.error(u'invalid rate limit specified') opts.ratelimit = numeric_limit - if opts.retries is not None: - try: - opts.retries = long(opts.retries) - except (TypeError, ValueError), err: - parser.error(u'invalid retry count specified') + try: + opts.retries = long(opts.retries) + except (TypeError, ValueError), err: + parser.error(u'invalid retry count specified') try: opts.playliststart = int(opts.playliststart) if opts.playliststart <= 0: From c4cb885a449531eceacccbdb3aa6de68320af4dc Mon Sep 17 00:00:00 2001 From: Sepero Date: Tue, 30 Oct 2012 21:58:57 -0300 Subject: [PATCH 2/7] retries cannot be None, because a default was set --- youtube_dl/FileDownloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index ed5a79f13..2ef0d6855 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -579,7 +579,7 @@ class FileDownloader(object): resume_len = 0 count = 0 - retries = self.params.get('retries', 0) + retries = self.params['retries'] while count <= retries: # Establish connection try: From f01e218ae28f659d3796c1133070b774a79eb57e Mon Sep 17 00:00:00 2001 From: Sepero Date: Tue, 30 Oct 2012 22:09:11 -0300 Subject: [PATCH 3/7] Allow for infinite retries with retries option 0 Allow for infinite retries if the -R retries option was set as 0 --- youtube_dl/FileDownloader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 2ef0d6855..b73ba027a 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -580,7 +580,7 @@ class FileDownloader(object): count = 0 retries = self.params['retries'] - while count <= retries: + while retries == 0 or count < retries: # Establish connection try: if count == 0 and 'urlhandle' in info_dict: @@ -624,7 +624,7 @@ class FileDownloader(object): if count <= retries: self.report_retry(count, retries) - if count > retries: + if retries != 0 and count > retries: self.trouble(u'ERROR: giving up after %s retries' % retries) return False From 148749e919fd38077fd02f88ed1f3346f4025955 Mon Sep 17 00:00:00 2001 From: Sepero Date: Tue, 30 Oct 2012 22:13:17 -0300 Subject: [PATCH 4/7] Change print output for infinite retries --- youtube_dl/FileDownloader.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index b73ba027a..09a147073 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -292,7 +292,7 @@ class FileDownloader(object): def report_retry(self, count, retries): """Report retry in case of HTTP error 5xx""" - self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) + self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %s of %s)...' % (count, retries)) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" @@ -621,8 +621,7 @@ class FileDownloader(object): break # Retry count += 1 - if count <= retries: - self.report_retry(count, retries) + self.report_retry(count, retries if retries else "infinite") if retries != 0 and count > retries: self.trouble(u'ERROR: giving up after %s retries' % retries) From 147054c4d9dcbda4c89caf2220122cd5372c7fbc Mon Sep 17 00:00:00 2001 From: Sepero Date: Tue, 30 Oct 2012 22:27:42 -0300 Subject: [PATCH 5/7] Edited code and documentation for infinite retries Changed code to accept "0" or "inf" for infinite retries (like wget), and modified OptParse help documentation to reflect changes. --- youtube_dl/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 0b223c0cb..1231beef7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -188,7 +188,7 @@ def parseOpts(): general.add_option('-r', '--rate-limit', dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') general.add_option('-R', '--retries', - dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) + dest='retries', metavar='RETRIES', help='number of retries (default is %default). specify 0 or inf for infinite retries', default=10) general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) @@ -442,6 +442,7 @@ def _real_main(): parser.error(u'invalid rate limit specified') opts.ratelimit = numeric_limit try: + if opts.retries = "inf": opts.retries = 0 opts.retries = long(opts.retries) except (TypeError, ValueError), err: parser.error(u'invalid retry count specified') From e6b2b3eccc9250be4e88f4722225e1ced21b8117 Mon Sep 17 00:00:00 2001 From: Sepero Date: Tue, 30 Oct 2012 22:32:38 -0300 Subject: [PATCH 6/7] Updated more documentation --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1a96bd98d..3ef050ce7 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,8 @@ which means you can modify it, redistribute it or use it however you like. -U, --update update this program to latest version -i, --ignore-errors continue on download errors -r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m) - -R, --retries RETRIES number of retries (default is 10) + -R, --retries RETRIES number of retries (default is 10). Specify 0 or inf + for infinite retries --dump-user-agent display the current browser identification --user-agent UA specify a custom user agent --list-extractors List all supported extractors and the URLs they From 71bcf9ee2cbe00cada493dac25036728aff51d59 Mon Sep 17 00:00:00 2001 From: Sepero Date: Wed, 31 Oct 2012 09:02:27 -0300 Subject: [PATCH 7/7] Modularization of login code An attempt to modularize the method of logging in across different websites. --- youtube_dl/InfoExtractors.py | 171 ++++++++++++++++------------------- 1 file changed, 78 insertions(+), 93 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 9df521d02..9349f708c 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -93,6 +93,66 @@ class InfoExtractor(object): """Real extraction process. Redefine in subclasses.""" pass + def _login(self): + if self._downloader is None: + return False + + username = None + password = None + downloader_params = self._downloader.params + + # Attempt to use provided username and password or .netrc data + if downloader_params.get('username', None) and \ + downloader_params.get('password', None): + username = downloader_params['username'] + password = downloader_params['password'] + elif downloader_params.get('usenetrc', False): + try: + info = netrc.netrc().authenticators(self._NETRC_MACHINE) + if info is not None: + username = info[0] + password = info[2] + else: + raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) + except (IOError, netrc.NetrcParseError), err: + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + return False + + # Set language + if hasattr(self, "_LANG_URL"): + request = urllib2.Request(self._LANG_URL) + try: + self.report_lang() + urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err)) + return False + + # No authentication to be performed + if username is None: + return False + + login_form = self._LOGIN_FORM + # Set login credentials + for k in login_form: + if login_form[k] == "username": + login_form[k] = username + elif login_form[k] == "password": + login_form[k] = password + + request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) + try: + self.report_login() + login_results = urllib2.urlopen(request).read() + if re.search(self._FAILED_LOGIN, login_results) is not None: + self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') + return + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + return False + + return request + class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" @@ -119,9 +179,17 @@ class YoutubeIE(InfoExtractor): $""" _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' + _FAILED_LOGIN = r'(?i)]* name="loginForm"' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NEXT_URL_RE = r'[\?&]next_url=([^&]+)' _NETRC_MACHINE = 'youtube' + _LOGIN_FORM = { + 'current_form': 'loginForm', + 'next': '/', + 'action_login': 'Log In', + 'username': "username", + 'password': "password", + } # Listed in order of quality _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13'] _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13'] @@ -218,59 +286,10 @@ class YoutubeIE(InfoExtractor): print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')) def _real_initialize(self): - if self._downloader is None: - return - - username = None - password = None - downloader_params = self._downloader.params - - # Attempt to use provided username and password or .netrc data - if downloader_params.get('username', None) is not None: - username = downloader_params['username'] - password = downloader_params['password'] - elif downloader_params.get('usenetrc', False): - try: - info = netrc.netrc().authenticators(self._NETRC_MACHINE) - if info is not None: - username = info[0] - password = info[2] - else: - raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) - except (IOError, netrc.NetrcParseError), err: - self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) - return - - # Set language - request = urllib2.Request(self._LANG_URL) - try: - self.report_lang() - urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err)) - return - - # No authentication to be performed - if username is None: - return - # Log in - login_form = { - 'current_form': 'loginForm', - 'next': '/', - 'action_login': 'Log In', - 'username': username, - 'password': password, - } - request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) - try: - self.report_login() - login_results = urllib2.urlopen(request).read() - if re.search(r'(?i)]* name="loginForm"', login_results) is not None: - self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') - return - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + request = self._login() + + if not request: return # Confirm age @@ -1882,7 +1901,13 @@ class FacebookIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P\d+)(?:.*)' _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' + _FAILED_LOGIN = r'' _NETRC_MACHINE = 'facebook' + _LOGIN_FORM = { + 'email': "username", + 'pass': "password", + 'login': 'Log+In' + } _available_formats = ['video', 'highqual', 'lowqual'] _video_extensions = { 'video': 'mp4', @@ -1937,48 +1962,8 @@ class FacebookIE(InfoExtractor): return video_info def _real_initialize(self): - if self._downloader is None: - return - - useremail = None - password = None - downloader_params = self._downloader.params - - # Attempt to use provided username and password or .netrc data - if downloader_params.get('username', None) is not None: - useremail = downloader_params['username'] - password = downloader_params['password'] - elif downloader_params.get('usenetrc', False): - try: - info = netrc.netrc().authenticators(self._NETRC_MACHINE) - if info is not None: - useremail = info[0] - password = info[2] - else: - raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) - except (IOError, netrc.NetrcParseError), err: - self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) - return - - if useremail is None: - return - # Log in - login_form = { - 'email': useremail, - 'pass': password, - 'login': 'Log+In' - } - request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) - try: - self.report_login() - login_results = urllib2.urlopen(request).read() - if re.search(r'', login_results) is not None: - self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') - return - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) - return + self._login() def _real_extract(self, url): mobj = re.match(self._VALID_URL, url)