From d06b02cb21261503346bcedcb69a058f35d338ac Mon Sep 17 00:00:00 2001 From: alistairboyer <33817920+alistairboyer@users.noreply.github.com> Date: Sat, 24 Nov 2018 23:54:34 +0000 Subject: [PATCH 1/2] Regex error in youtube for finding function name I was getting the following regex error: youtube_dl.utils.RegexNotFoundError Looking at the raw JS in jscode it seems that the script is trying to find the "sL" bit: [this obviously changes each time] ...yt.akamaized.net/)||d.set("alr","yes");c&&d.set(b,(0,window.encodeURIComponent)(sL((0,window.decodeURIComponent)(c))));return d}; But it failed to match the original regex (line 1194) because of the additional '(0,window.encodeURIComponent)(' part This new regex (added on line 1195) fixes the problem and it works for me! I find it really hard to force youtube-dl go via the _parse_sig_js function to test. But I have reproduced the error at least four times and confirmed the new regex solves the problem. --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 3f49f3889..de7ee71e5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1192,6 +1192,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]*\s*,\s*\([^)]*\)\((?P[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig') From e10bae10651959d7753657e116b5d29dc512bbb1 Mon Sep 17 00:00:00 2001 From: alistairboyer <33817920+alistairboyer@users.noreply.github.com> Date: Sun, 25 Nov 2018 00:49:42 +0000 Subject: [PATCH 2/2] Included whitespace matcher Previous update didn't have whitespace matcher here: \s*\((?P[a-zA-Z0-9$]+)\(', Added it in for robustness and to match the other regexes on the lines below. --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index de7ee71e5..d2c0726b6 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1192,7 +1192,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]*\s*,\s*\([^)]*\)\((?P[a-zA-Z0-9$]+)\(', + r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]*\s*,\s*\([^)]*\)\s*\((?P[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), jscode, 'Initial JS player signature function name', group='sig')