1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-21 15:25:36 +08:00

[utils.py:js_to_json] add support for octal escape sequences

This commit is contained in:
uno20001 2020-01-24 22:57:45 +01:00
parent 2a5c26c980
commit 5c62bedd6b
2 changed files with 22 additions and 8 deletions

View File

@ -918,6 +918,9 @@ class TestUtil(unittest.TestCase):
inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}'''
self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''')
inp = '''{label: "Fran\347ais"}'''
self.assertEqual(js_to_json(inp), '''{"label": "Fran\u00e7ais"}''')
def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})

View File

@ -3982,20 +3982,31 @@ def js_to_json(code):
(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
)
def convert_escapes(m):
# convert Javascript's octal escape sequences (and '\0')
# into valid JSON escape sequences (e.g. '\347' => '\u00e7', '\0' => '\u0000')
if m.group(1):
return "\\u%04x" % int(m.group(1), 8)
# convert the remaining escape sequences
# into valid JSON
return {
'"': '\\"',
"\\'": "'",
'\\\n': '',
'\\x': '\\u00',
}.get(m.group(0), m.group(0))
def fix_kv(m):
v = m.group(0)
if v in ('true', 'false', 'null'):
return v
elif v.startswith('/*') or v.startswith('//') or v == ',':
return ""
if v[0] in ("'", '"'):
v = re.sub(r'(?s)\\.|"', lambda m: {
'"': '\\"',
"\\'": "'",
'\\\n': '',
'\\x': '\\u00',
}.get(m.group(0), m.group(0)), v[1:-1])
v = re.sub(r'(?s)\\(?:([0-7]{1,3})|.)|"', convert_escapes, v[1:-1])
for regex, base in INTEGER_TABLE:
im = re.match(regex, v)
@ -4006,8 +4017,8 @@ def js_to_json(code):
return '"%s"' % v
return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n01234567]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n01234567]))*[^'\\]*'|
{comment}|,(?={skip}[\]}}])|
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|