From 300a9da662dca1441eeab0330f02a85c21f46f0c Mon Sep 17 00:00:00 2001 From: uno20001 <> Date: Sat, 2 Nov 2019 16:25:27 +0100 Subject: [PATCH] [youtube_dl/utils] add support for octal escape sequences to js_to_json() --- youtube_dl/utils.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index aed988b88..85fe1e34a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3979,18 +3979,27 @@ def js_to_json(code): def fix_kv(m): v = m.group(0) + if v in ('true', 'false', 'null'): return v elif v.startswith('/*') or v.startswith('//') or v == ',': return "" if v[0] in ("'", '"'): + + v = v[1:-1] + + # convert Javascript's octal escape sequences (and '\0') + # into valid JSON escape sequences ( e.g '\347' => '\u00e7', '\0' => '\u0000' ) + # regex based on https://mathiasbynens.be/notes/javascript-escapes + v = re.sub(r'\\([0-7]{1,3})', lambda x: "\\u%04x" % int(x.group(1), 8), v) + v = re.sub(r'(?s)\\.|"', lambda m: { '"': '\\"', "\\'": "'", '\\\n': '', '\\x': '\\u00', - }.get(m.group(0), m.group(0)), v[1:-1]) + }.get(m.group(0), m.group(0)), v) for regex, base in INTEGER_TABLE: im = re.match(regex, v) @@ -4001,8 +4010,8 @@ def js_to_json(code): return '"%s"' % v return re.sub(r'''(?sx) - "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| - '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| + "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]?))*[^"\\]*"| + '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]?))*[^'\\]*'| {comment}|,(?={skip}[\]}}])| (?:(?