From 1951bc12dc0ad061093f53283820d434ecec443d Mon Sep 17 00:00:00 2001 From: aeph6Ee0 Date: Thu, 5 Apr 2018 12:47:24 +0200 Subject: [PATCH 1/4] Make JSON-LD @context parsing more robust --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 890232586..3fc6e21da 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1025,7 +1025,7 @@ class InfoExtractor(object): }) for e in json_ld: - if e.get('@context') == 'http://schema.org': + if re.match(r'^https?://schema.org/?$', e.get('@context')): item_type = e.get('@type') if expected_type is not None and expected_type != item_type: return info From 6c33309a3f2777ed735298e1a590ced60cc81a18 Mon Sep 17 00:00:00 2001 From: aeph6Ee0 Date: Fri, 6 Apr 2018 19:48:36 +0200 Subject: [PATCH 2/4] Check if '@context' is present in JSON-LD to avoid crash --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 3fc6e21da..c3a4dccaf 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1025,7 +1025,7 @@ class InfoExtractor(object): }) for e in json_ld: - if re.match(r'^https?://schema.org/?$', e.get('@context')): + if '@context' in e and re.match(r'^https?://schema.org/?$', e.get('@context')): item_type = e.get('@type') if expected_type is not None and expected_type != item_type: return info From f8ef225de4fcb3393642929d374d4d4928c5cec5 Mon Sep 17 00:00:00 2001 From: aeph6Ee0 Date: Sat, 7 Apr 2018 20:55:10 +0200 Subject: [PATCH 3/4] Check if '@context' value in JSON-LD is a String --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c3a4dccaf..38daef0a0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1025,7 +1025,7 @@ class InfoExtractor(object): }) for e in json_ld: - if '@context' in e and re.match(r'^https?://schema.org/?$', e.get('@context')): + if isinstance(e.get('@context'), str) and re.match(r'^https?://schema.org/?$', e.get('@context')): item_type = e.get('@type') if expected_type is not None and expected_type != item_type: return info From 349daa9203a2e5fccacb14ea30970b34a4c71cdc Mon Sep 17 00:00:00 2001 From: aeph6Ee0 Date: Sat, 7 Apr 2018 21:41:09 +0200 Subject: [PATCH 4/4] Check if '@context' value in JSON-LD is a compat_str --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 38daef0a0..59b9d3739 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1025,7 +1025,7 @@ class InfoExtractor(object): }) for e in json_ld: - if isinstance(e.get('@context'), str) and re.match(r'^https?://schema.org/?$', e.get('@context')): + if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')): item_type = e.get('@type') if expected_type is not None and expected_type != item_type: return info