From 45abe2051dafed38f4c8319a7bfe9bdbf3e372b4 Mon Sep 17 00:00:00 2001
From: Nehal Patel <nehalvpatels+github@gmail.com>
Date: Wed, 6 Jul 2016 23:36:29 -0500
Subject: [PATCH 1/5] [BrainPOP] Add new extractor

---
 youtube_dl/extractor/brainpop.py   | 47 ++++++++++++++++++++++++++++++
 youtube_dl/extractor/extractors.py |  1 +
 2 files changed, 48 insertions(+)
 create mode 100644 youtube_dl/extractor/brainpop.py
diff --git a/youtube_dl/extractor/brainpop.py b/youtube_dl/extractor/brainpop.py
new file mode 100644
index 000000000..6b3dd6a92
--- /dev/null
+++ b/youtube_dl/extractor/brainpop.py
@@ -0,0 +1,47 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class BrainPOPIE(InfoExtractor):
+    _VALID_URL = r'https?:\/\/(?:(.+)\.)?brainpop\.com\/(?P<id>[^\r\n]+)'
+    _TEST = {
+        'url': 'https://www.brainpop.com/english/freemovies/williamshakespeare/',
+        'md5': '676d936271b628dc05e4cec377751919',
+        'info_dict': {
+            'id': 'english/freemovies/williamshakespeare/',
+            'ext': 'mp4',
+            'title': 'William Shakespeare - BrainPOP',
+            'thumbnail': 're:^https?://.*\.png$',
+            'description': 'He could do comedies, tragedies, histories and poetry.  Learn about the greatest playwright in the history of the English language!',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        self.report_extraction(video_id)
+
+        ec_token = self._html_search_regex(r'ec_token : \'(.+)\'', webpage, "token")
+        movie_cdn_path = self._html_search_regex(r'movie_cdn_path : \'(.+)\'', webpage, "cdn path")
+        mp4 = self._html_search_regex(r'mp4":"([^"]*)', webpage, "mp4")
+
+        url = movie_cdn_path + mp4.replace("\\", "") + "?" + ec_token
+
+        title = self._html_search_regex(r'type":"Movie","name":"([^"]*)"', webpage, "title")
+
+        thumbnail_cdn = self._html_search_regex(r"'cdn_path' : '([^']*)'", webpage, "thumbnail cdn")
+        thumbnail_image = self._html_search_regex(r'type":"Movie","name":"[^"]*","image":"([^"]*)"', webpage, "thumbnail")
+        thumbnail = thumbnail_cdn + thumbnail_image.replace("\\", "")
+
+        description = self._html_search_regex(r'type":"Movie","name":"[^"]*","image":"[^"]*","description":"([^"]*)"', webpage, "description")
+
+        return {
+            'id': video_id,
+            'url': url,
+            'title': title,
+            'thumbnail': thumbnail,
+            'description': description,
+        }
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 4e2a2f2e9..cc45f5c23 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -93,6 +93,7 @@ from .bokecc import BokeCCIE
 from .bpb import BpbIE
 from .br import BRIE
 from .bravotv import BravoTVIE
+from .brainpop import BrainPOPIE
 from .breakcom import BreakIE
 from .brightcove import (
     BrightcoveLegacyIE,

From f56a9dbdbc20eebc7c93a5ea45ddcdf841236e9c Mon Sep 17 00:00:00 2001
From: Nehal Patel <nehalvpatels+github@gmail.com>
Date: Wed, 6 Jul 2016 23:53:10 -0500
Subject: [PATCH 2/5] [BrainPOP] Clean up code and account for non-mandatory
 fields

---
 youtube_dl/extractor/brainpop.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/brainpop.py b/youtube_dl/extractor/brainpop.py
index 6b3dd6a92..1dee770e8 100644
--- a/youtube_dl/extractor/brainpop.py
+++ b/youtube_dl/extractor/brainpop.py
@@ -24,19 +24,19 @@ class BrainPOPIE(InfoExtractor):
 
         self.report_extraction(video_id)
 
-        ec_token = self._html_search_regex(r'ec_token : \'(.+)\'', webpage, "token")
-        movie_cdn_path = self._html_search_regex(r'movie_cdn_path : \'(.+)\'', webpage, "cdn path")
-        mp4 = self._html_search_regex(r'mp4":"([^"]*)', webpage, "mp4")
+        ec_token = self._html_search_regex(r'ec_token : \'(.+)\'', webpage, 'token')
+        movie_cdn_path = self._html_search_regex(r'movie_cdn_path : \'(.+)\'', webpage, 'cdn path')
+        mp4 = self._html_search_regex(r'mp4":"([^"]*)', webpage, 'mp4')
 
-        url = movie_cdn_path + mp4.replace("\\", "") + "?" + ec_token
+        url = movie_cdn_path + mp4.replace('\\', '') + '?' + ec_token
 
-        title = self._html_search_regex(r'type":"Movie","name":"([^"]*)"', webpage, "title")
+        title = self._html_search_regex(r'type":"Movie","name":"([^"]*)"', webpage, 'title') or self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
 
-        thumbnail_cdn = self._html_search_regex(r"'cdn_path' : '([^']*)'", webpage, "thumbnail cdn")
-        thumbnail_image = self._html_search_regex(r'type":"Movie","name":"[^"]*","image":"([^"]*)"', webpage, "thumbnail")
-        thumbnail = thumbnail_cdn + thumbnail_image.replace("\\", "")
+        thumbnail_cdn = self._html_search_regex(r"'cdn_path' : '([^']*)'", webpage, 'thumbnail cdn', fatal=False)
+        thumbnail_image = self._html_search_regex(r'type":"Movie","name":"[^"]*","image":"([^"]*)"', webpage, 'thumbnail', fatal=False)
+        thumbnail = thumbnail_cdn + thumbnail_image.replace('\\', '')
 
-        description = self._html_search_regex(r'type":"Movie","name":"[^"]*","image":"[^"]*","description":"([^"]*)"', webpage, "description")
+        description = self._html_search_regex(r'type":"Movie","name":"[^"]*","image":"[^"]*","description":"([^"]*)"', webpage, 'description', fatal=False)
 
         return {
             'id': video_id,

From b00d17edeaaa18715472061857bf539a6a2f2bdf Mon Sep 17 00:00:00 2001
From: Nehal Patel <nehalvpatels+github@gmail.com>
Date: Fri, 8 Jul 2016 20:20:57 -0500
Subject: [PATCH 3/5] [BrainPOP] Switch from regex to parsing JSON and include
 both resolutions

---
 youtube_dl/extractor/brainpop.py | 41 +++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/youtube_dl/extractor/brainpop.py b/youtube_dl/extractor/brainpop.py
index 1dee770e8..a930942b2 100644
--- a/youtube_dl/extractor/brainpop.py
+++ b/youtube_dl/extractor/brainpop.py
@@ -24,24 +24,43 @@ class BrainPOPIE(InfoExtractor):
 
         self.report_extraction(video_id)
 
-        ec_token = self._html_search_regex(r'ec_token : \'(.+)\'', webpage, 'token')
-        movie_cdn_path = self._html_search_regex(r'movie_cdn_path : \'(.+)\'', webpage, 'cdn path')
-        mp4 = self._html_search_regex(r'mp4":"([^"]*)', webpage, 'mp4')
+        ec_token = self._html_search_regex(r"ec_token : '([^']*)'", webpage, 'token')
 
-        url = movie_cdn_path + mp4.replace('\\', '') + '?' + ec_token
+        settings = self._parse_json(self._html_search_regex(r'var settings = ([^;]*)', webpage, 'settings'), video_id)
+        title = settings['title']
+        description = settings['description']
 
-        title = self._html_search_regex(r'type":"Movie","name":"([^"]*)"', webpage, 'title') or self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
+        global_content = self._parse_json(self._html_search_regex(r'var global_content = ([^;]*)', webpage, 'global content').replace("'", '"'), video_id)
+        cdn_path = global_content['cdn_path']
+        movie_cdn_path = global_content['movie_cdn_path']
 
-        thumbnail_cdn = self._html_search_regex(r"'cdn_path' : '([^']*)'", webpage, 'thumbnail cdn', fatal=False)
-        thumbnail_image = self._html_search_regex(r'type":"Movie","name":"[^"]*","image":"([^"]*)"', webpage, 'thumbnail', fatal=False)
-        thumbnail = thumbnail_cdn + thumbnail_image.replace('\\', '')
+        content = self._parse_json(self._html_search_regex(r'var content = ([^;]*)', webpage, 'content'), video_id)
+        movies = content['category']['unit']['topic']['movies']
+        screenshots = content['category']['unit']['topic']['screenshots']
 
-        description = self._html_search_regex(r'type":"Movie","name":"[^"]*","image":"[^"]*","description":"([^"]*)"', webpage, 'description', fatal=False)
+        formats = []
+        formats.append({
+            'url': movie_cdn_path + movies['mp4'] + '?' + ec_token,
+            'height': 768,
+            'width': 768,
+        })
+        formats.append({
+            'url': movie_cdn_path + movies['mp4_small'] + '?' + ec_token,
+            'height': 480,
+            'width': 480,
+        })
+        self._sort_formats(formats)
+
+        thumbnails = []
+        for (i, screenshot) in enumerate(screenshots):
+            thumbnails.append({
+                'url': cdn_path + screenshot,
+            })
 
         return {
             'id': video_id,
-            'url': url,
             'title': title,
-            'thumbnail': thumbnail,
+            'formats': formats,
+            'thumbnails': thumbnails,
             'description': description,
         }

From 7022e24b1dc8897cbbdd807b32fbf2691b7ecf44 Mon Sep 17 00:00:00 2001
From: Nehal Patel <nehalvpatels+github@gmail.com>
Date: Tue, 12 Jul 2016 19:08:03 -0500
Subject: [PATCH 4/5] [BrainPOP] Optimize regex and extractor, improve
 metadata, and add subscription video detection

---
 youtube_dl/extractor/brainpop.py | 51 ++++++++++++++++----------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/youtube_dl/extractor/brainpop.py b/youtube_dl/extractor/brainpop.py
index a930942b2..f3fc66ee1 100644
--- a/youtube_dl/extractor/brainpop.py
+++ b/youtube_dl/extractor/brainpop.py
@@ -2,42 +2,44 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import (
+    remove_end
+)
 
 
 class BrainPOPIE(InfoExtractor):
-    _VALID_URL = r'https?:\/\/(?:(.+)\.)?brainpop\.com\/(?P<id>[^\r\n]+)'
+    _VALID_URL = r'https?:\/\/(?:(.+)\.)?brainpop\.com\/[^/]+/[^/]+/(?P<id>[^/?#&]+)'
     _TEST = {
         'url': 'https://www.brainpop.com/english/freemovies/williamshakespeare/',
         'md5': '676d936271b628dc05e4cec377751919',
         'info_dict': {
-            'id': 'english/freemovies/williamshakespeare/',
+            'id': '3026',
+            'display_id': 'williamshakespeare',
             'ext': 'mp4',
-            'title': 'William Shakespeare - BrainPOP',
+            'title': 'William Shakespeare',
             'thumbnail': 're:^https?://.*\.png$',
             'description': 'He could do comedies, tragedies, histories and poetry.  Learn about the greatest playwright in the history of the English language!',
         }
     }
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
 
-        self.report_extraction(video_id)
+        content = self._parse_json(self._html_search_regex(r'var content = ([^;]*)', webpage, 'content'), display_id)
+        
+        if content['category']['unit']['topic']['free'] == 'no':
+            self.raise_login_required('%s is only available for users with Subscriptions' % display_id)
 
+        global_content = self._parse_json(self._html_search_regex(r'var global_content = ([^;]*)', webpage, 'global content').replace("'", '"'), display_id)
+        cdn_path = global_content.get('cdn_path', 'https://cdn.brainpop.com')
+        movie_cdn_path = global_content.get('movie_cdn_path', 'https://svideos.brainpop.com')
         ec_token = self._html_search_regex(r"ec_token : '([^']*)'", webpage, 'token')
 
-        settings = self._parse_json(self._html_search_regex(r'var settings = ([^;]*)', webpage, 'settings'), video_id)
-        title = settings['title']
-        description = settings['description']
+        screenshots = content['category']['unit']['topic'].get('screenshots', {})
+        thumbnails = [{'url': cdn_path + screenshot} for screenshot in screenshots]
 
-        global_content = self._parse_json(self._html_search_regex(r'var global_content = ([^;]*)', webpage, 'global content').replace("'", '"'), video_id)
-        cdn_path = global_content['cdn_path']
-        movie_cdn_path = global_content['movie_cdn_path']
-
-        content = self._parse_json(self._html_search_regex(r'var content = ([^;]*)', webpage, 'content'), video_id)
         movies = content['category']['unit']['topic']['movies']
-        screenshots = content['category']['unit']['topic']['screenshots']
-
         formats = []
         formats.append({
             'url': movie_cdn_path + movies['mp4'] + '?' + ec_token,
@@ -50,17 +52,14 @@ class BrainPOPIE(InfoExtractor):
             'width': 480,
         })
         self._sort_formats(formats)
-
-        thumbnails = []
-        for (i, screenshot) in enumerate(screenshots):
-            thumbnails.append({
-                'url': cdn_path + screenshot,
-            })
+        
+        settings = self._parse_json(self._html_search_regex(r'var settings = ([^;]*)', webpage, 'settings'), display_id)
 
         return {
-            'id': video_id,
-            'title': title,
-            'formats': formats,
+            'id': content['category']['unit']['topic']['EntryID'],
+            'display_id': display_id,
+            'title': remove_end(settings['title'], ' - BrainPOP'),
+            'description': settings['description'],
             'thumbnails': thumbnails,
-            'description': description,
+            'formats': formats,
         }

From f02b57d5a7d59cbc63a3c36d9172d57fe0f315b7 Mon Sep 17 00:00:00 2001
From: Nehal Patel <nehalvpatels+github@gmail.com>
Date: Tue, 12 Jul 2016 19:51:50 -0500
Subject: [PATCH 5/5] [BrainPOP] Trim code and make optional metadata less
 brittle

---
 youtube_dl/extractor/brainpop.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/brainpop.py b/youtube_dl/extractor/brainpop.py
index f3fc66ee1..7f825c114 100644
--- a/youtube_dl/extractor/brainpop.py
+++ b/youtube_dl/extractor/brainpop.py
@@ -26,20 +26,20 @@ class BrainPOPIE(InfoExtractor):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
 
-        content = self._parse_json(self._html_search_regex(r'var content = ([^;]*)', webpage, 'content'), display_id)
-        
-        if content['category']['unit']['topic']['free'] == 'no':
+        content = self._parse_json(self._html_search_regex(r'var content = ([^;]*)', webpage, 'content JSON'), display_id)
+        topic = content['category']['unit']['topic']
+
+        if topic['free'] == 'no':
             self.raise_login_required('%s is only available for users with Subscriptions' % display_id)
 
-        global_content = self._parse_json(self._html_search_regex(r'var global_content = ([^;]*)', webpage, 'global content').replace("'", '"'), display_id)
+        global_content = self._parse_json(self._html_search_regex(r'var global_content = ([^;]*)', webpage, 'global content JSON').replace("'", '"'), display_id)
         cdn_path = global_content.get('cdn_path', 'https://cdn.brainpop.com')
         movie_cdn_path = global_content.get('movie_cdn_path', 'https://svideos.brainpop.com')
         ec_token = self._html_search_regex(r"ec_token : '([^']*)'", webpage, 'token')
 
-        screenshots = content['category']['unit']['topic'].get('screenshots', {})
-        thumbnails = [{'url': cdn_path + screenshot} for screenshot in screenshots]
+        thumbnails = [{'url': cdn_path + screenshot} for screenshot in topic.get('screenshots', {})]
 
-        movies = content['category']['unit']['topic']['movies']
+        movies = topic['movies']
         formats = []
         formats.append({
             'url': movie_cdn_path + movies['mp4'] + '?' + ec_token,
@@ -52,14 +52,14 @@ class BrainPOPIE(InfoExtractor):
             'width': 480,
         })
         self._sort_formats(formats)
-        
-        settings = self._parse_json(self._html_search_regex(r'var settings = ([^;]*)', webpage, 'settings'), display_id)
+
+        settings = self._parse_json(self._html_search_regex(r'var settings = ([^;]*)', webpage, 'settings JSON', '{}'), display_id)
 
         return {
-            'id': content['category']['unit']['topic']['EntryID'],
+            'id': topic['EntryID'],
             'display_id': display_id,
-            'title': remove_end(settings['title'], ' - BrainPOP'),
-            'description': settings['description'],
+            'title': remove_end(settings.get('title', display_id), ' - BrainPOP'),
+            'description': settings.get('description', ''),
             'thumbnails': thumbnails,
             'formats': formats,
         }