From 286b416e96b88ba47f157458858fb35fe736401c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= Date: Tue, 31 Jan 2017 07:54:53 +0100 Subject: [PATCH 1/2] Fix extracting of JSON from javascript with comments --- test/test_utils.py | 3 +++ youtube_dl/utils.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index a74d59f34..954bb7d8b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -791,6 +791,9 @@ class TestUtil(unittest.TestCase): on = js_to_json('{ 0: /* " \n */ ",]" , }') self.assertEqual(json.loads(on), {'0': ',]'}) + on = js_to_json('{ 0: // comment\n1 }') + self.assertEqual(json.loads(on), {'0': 1}) + on = js_to_json(r'["

x<\/p>"]') self.assertEqual(json.loads(on), ['

x

']) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index cf46711b9..6c462625b 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2107,7 +2107,7 @@ def js_to_json(code): v = m.group(0) if v in ('true', 'false', 'null'): return v - elif v.startswith('/*') or v == ',': + elif v.startswith('/*') or v.startswith('//') or v == ',': return "" if v[0] in ("'", '"'): @@ -2134,7 +2134,7 @@ def js_to_json(code): return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| - /\*.*?\*/|,(?=\s*[\]}])| + /\*.*?\*/|//[^\n]*|,(?=\s*[\]}])| [a-zA-Z_][.a-zA-Z_0-9]*| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| [0-9]+(?=\s*:) From 8ea62e969eff92c47828710c5da0c64a77d26b1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= Date: Tue, 31 Jan 2017 07:59:55 +0100 Subject: [PATCH 2/2] Fix extraction for iprima The variable name has been changed. --- youtube_dl/extractor/iprima.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py index da2cdc656..1a1802a3d 100644 --- a/youtube_dl/extractor/iprima.py +++ b/youtube_dl/extractor/iprima.py @@ -65,7 +65,7 @@ class IPrimaIE(InfoExtractor): options = self._parse_json( self._search_regex( - r'(?s)var\s+playerOptions\s*=\s*({.+?});', + r'(?s)var\s+TDIPlayerOptions\s*=\s*({.+?});\s*\]\]', playerpage, 'player options', default='{}'), video_id, transform_source=js_to_json, fatal=False) if options: