mirror of
https://github.com/l1ving/youtube-dl
synced 2025-02-02 22:32:54 +08:00
Login when is present cloudflare challenge
This commit is contained in:
parent
38592b0123
commit
1e77c43688
@ -2818,6 +2818,64 @@ class InfoExtractor(object):
|
|||||||
def _generic_title(self, url):
|
def _generic_title(self, url):
|
||||||
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
||||||
|
|
||||||
|
def _cf_solve_challenge(self, body, domain):
|
||||||
|
'''
|
||||||
|
Solve CloudFlrae Callenge.
|
||||||
|
@param <String> domain result `ompat_urlparse.urlparse().netloc`
|
||||||
|
Oryginal code from :https://github.com/Anorov/cloudflare-scrape/blob/master/cfscrape/__init__.py#L112-L149
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
js = re.search(r"setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n", body).group(1)
|
||||||
|
except Exception:
|
||||||
|
raise ValueError("Unable to identify Cloudflare IUAM Javascript on website.")
|
||||||
|
|
||||||
|
js = re.sub(r"a\.value = (.+ \+ t\.length).+", r"\1", js)
|
||||||
|
js = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", js).replace("t.length", str(len(domain)))
|
||||||
|
|
||||||
|
# Strip characters that could be used to exit the string context
|
||||||
|
# These characters are not currently used in Cloudflare's arithmetic snippet
|
||||||
|
js = re.sub(r"[\n\\']", "", js)
|
||||||
|
|
||||||
|
if "toFixed" not in js:
|
||||||
|
raise ValueError("Error parsing Cloudflare IUAM Javascript challenge.")
|
||||||
|
|
||||||
|
# Use vm.runInNewContext to safely evaluate code
|
||||||
|
# The sandboxed code cannot use the Node.js standard library
|
||||||
|
js = "console.log(require('vm').runInNewContext('%s', Object.create(null), {timeout: 5000}));" % js
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
try:
|
||||||
|
result = subprocess.check_output(["node", "-e", js]).strip()
|
||||||
|
except OSError as e:
|
||||||
|
if e.errno == 2:
|
||||||
|
raise EnvironmentError("Missing Node.js runtime. Node is required and must be in the PATH (check with `node -v`). Your Node binary may be called `nodejs` rather than `node`, in which case you may need to run `apt-get install nodejs-legacy` on some Debian-based systems. (Please read the cfscrape README's Dependencies section: https://github.com/Anorov/cloudflare-scrape#dependencies.")
|
||||||
|
raise
|
||||||
|
except Exception:
|
||||||
|
self.to_screen("Error executing Cloudflare IUAM Javascript.")
|
||||||
|
raise
|
||||||
|
|
||||||
|
try:
|
||||||
|
float(result)
|
||||||
|
except Exception:
|
||||||
|
raise ValueError("Cloudflare IUAM challenge returned unexpected answer.")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def cf_solve_and_download_webpage(self, html, download_url):
|
||||||
|
if '/cdn-cgi/l/chk_jschl' not in html:
|
||||||
|
return False
|
||||||
|
parsed_url = compat_urlparse.urlparse(download_url)
|
||||||
|
domain = parsed_url.netloc
|
||||||
|
submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain)
|
||||||
|
form_data = self._form_hidden_inputs('challenge-form', html)
|
||||||
|
form_data['jschl_answer'] = self._cf_solve_challenge(html, domain)
|
||||||
|
|
||||||
|
self._sleep(5, None, 'Solving Cloudflare challenge (5s)')
|
||||||
|
return self._download_webpage(
|
||||||
|
submit_url,
|
||||||
|
None, 'Sending Cloudflare challenge', 'Wrong Cloudflare challenge', query=form_data
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
@ -36,6 +36,7 @@ from ..aes import (
|
|||||||
class CrunchyrollBaseIE(InfoExtractor):
|
class CrunchyrollBaseIE(InfoExtractor):
|
||||||
_LOGIN_URL = 'https://www.crunchyroll.com/login'
|
_LOGIN_URL = 'https://www.crunchyroll.com/login'
|
||||||
_LOGIN_FORM = 'login_form'
|
_LOGIN_FORM = 'login_form'
|
||||||
|
_PROFILE_URL = 'https://www.crunchyroll.com/acct/membership'
|
||||||
_NETRC_MACHINE = 'crunchyroll'
|
_NETRC_MACHINE = 'crunchyroll'
|
||||||
|
|
||||||
def _call_rpc_api(self, method, video_id, note=None, data=None):
|
def _call_rpc_api(self, method, video_id, note=None, data=None):
|
||||||
@ -52,25 +53,17 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||||||
username, password = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
return
|
return
|
||||||
'''
|
|
||||||
import cfscrape
|
|
||||||
|
|
||||||
proxies = {"http": self._downloader.params.get('proxy'), "https": self._downloader.params.get('proxy')}
|
|
||||||
tokens, user_agent = cfscrape.get_tokens(self._LOGIN_URL, proxies=proxies, user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0")
|
|
||||||
|
|
||||||
self._set_cookie( '.crunchyroll.com', 'cf_clearance',tokens['cf_clearance'])
|
|
||||||
self._set_cookie( '.crunchyroll.com', '__cfduid',tokens['__cfduid'])
|
|
||||||
'''
|
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
'https://www.crunchyroll.com/?a=formhandler',
|
'https://www.crunchyroll.com/?a=formhandler',
|
||||||
None, 'Logging in', 'Wrong login info',
|
None, 'Logging in', 'Wrong login info',
|
||||||
data=urlencode_postdata({
|
data=urlencode_postdata({
|
||||||
'formname': 'RpcApiUser_Login',
|
'formname': 'RpcApiUser_Login',
|
||||||
'next_url': 'https://www.crunchyroll.com/acct/membership',
|
'next_url': self._PROFILE_URL,
|
||||||
'fail_url': self._LOGIN_URL,
|
'fail_url': self._PROFILE_URL, # On login fail redirect to login page
|
||||||
'name': username,
|
'name': username,
|
||||||
'password': password,
|
'password': password,
|
||||||
}), expected_status=503)
|
}), expected_status=503) # 503 for CloudFlare
|
||||||
|
|
||||||
def is_logged(webpage):
|
def is_logged(webpage):
|
||||||
return '<title>Redirecting' in webpage or '/logout' in webpage
|
return '<title>Redirecting' in webpage or '/logout' in webpage
|
||||||
@ -79,23 +72,13 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||||||
if is_logged(login_page):
|
if is_logged(login_page):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
cf_page = self.cf_solve_and_download_webpage(login_page, self._LOGIN_URL)
|
||||||
'''
|
if cf_page:
|
||||||
print [tokens, user_agent]
|
login_page = cf_page
|
||||||
|
if is_logged(cf_page):
|
||||||
|
login_page = self._download_webpage(self._PROFILE_URL, None, 'Get new CSRF Token')
|
||||||
form_data = self._form_hidden_inputs('challenge-form', login_page)
|
if is_logged(login_page):
|
||||||
form_data['jschl_answer'] = self.solve_challenge(login_page, 'www.crunchyroll.com')
|
return
|
||||||
print form_data
|
|
||||||
self._sleep(6, None, 'Solving CloudFlare Challenge')
|
|
||||||
login_page = self._download_webpage('https://www.crunchyroll.com/cdn-cgi/l/chk_jschl', None, 'Login Form', data=urlencode_postdata(form_data), headers={
|
|
||||||
'Referer': self._LOGIN_URL,
|
|
||||||
}, expected_status= 503)
|
|
||||||
|
|
||||||
import codecs
|
|
||||||
with codecs.open("yop", "w", encoding="utf-8") as f:
|
|
||||||
f.write(login_page)
|
|
||||||
'''
|
|
||||||
|
|
||||||
login_form_str = self._search_regex(
|
login_form_str = self._search_regex(
|
||||||
r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
|
r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
|
||||||
@ -130,7 +113,6 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||||
|
|
||||||
raise ExtractorError('Unable to log in')
|
raise ExtractorError('Unable to log in')
|
||||||
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
@ -146,7 +128,6 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||||||
# Crunchyroll to not work in georestriction cases in some browsers that don't place
|
# Crunchyroll to not work in georestriction cases in some browsers that don't place
|
||||||
# the locale lang first in header. However allowing any language seems to workaround the issue.
|
# the locale lang first in header. However allowing any language seems to workaround the issue.
|
||||||
request.add_header('Accept-Language', '*')
|
request.add_header('Accept-Language', '*')
|
||||||
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0')
|
|
||||||
return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
|
return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -164,7 +164,7 @@ class PhantomJSwrapper(object):
|
|||||||
cookie['expire_time'] = cookie['expiry']
|
cookie['expire_time'] = cookie['expiry']
|
||||||
self.extractor._set_cookie(**compat_kwargs(cookie))
|
self.extractor._set_cookie(**compat_kwargs(cookie))
|
||||||
|
|
||||||
def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
|
def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();', expected_status=None):
|
||||||
"""
|
"""
|
||||||
Downloads webpage (if needed) and executes JS
|
Downloads webpage (if needed) and executes JS
|
||||||
|
|
||||||
@ -203,7 +203,7 @@ class PhantomJSwrapper(object):
|
|||||||
if 'saveAndExit();' not in jscode:
|
if 'saveAndExit();' not in jscode:
|
||||||
raise ExtractorError('`saveAndExit();` not found in `jscode`')
|
raise ExtractorError('`saveAndExit();` not found in `jscode`')
|
||||||
if not html:
|
if not html:
|
||||||
html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
|
html = self.extractor._download_webpage(url, video_id, note=note, headers=headers, expected_status=expected_status)
|
||||||
with open(self._TMP_FILES['html'].name, 'wb') as f:
|
with open(self._TMP_FILES['html'].name, 'wb') as f:
|
||||||
f.write(html.encode('utf-8'))
|
f.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user