From ab72d143071cc253224b6e35962edb1dc9a34939 Mon Sep 17 00:00:00 2001
From: John Hawkinson <jhawk@mit.edu>
Date: Fri, 7 Apr 2017 23:23:31 -0400
Subject: [PATCH 1/4] [kaltura] Support iframe embeds, with test

Note that these need to back to through the Generic extractor because
the iframe URLs may be redirects that cannot be parsed by KalturaIE
without being followed, and Generic checks for such redirects and
follows them. Hence dropping the IE from url_result().
---
 youtube_dl/extractor/generic.py | 17 ++++++++++++++++-
 youtube_dl/extractor/kaltura.py |  7 +++++++
 2 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 73911940c..4bfa3f8a1 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1080,6 +1080,21 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Kaltura'],
         },
+        {
+            # Kaltura iframe embed
+            'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
+            'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
+            'info_dict': {
+                'id': '0_f2cfbpwy',
+                'ext': 'mp4',
+                'title': 'I. M. Pei: A Centennial Celebration',
+                'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
+                'upload_date': '20170403',
+                'uploader_id': 'batchUser',
+                'timestamp': 1491232186,
+            },
+            'add_ie': ['Kaltura'],
+        },
         # Eagle.Platform embed (generic URL)
         {
             'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -2290,7 +2305,7 @@ class GenericIE(InfoExtractor):
         # Look for Kaltura embeds
         kaltura_url = KalturaIE._extract_url(webpage)
         if kaltura_url:
-            return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
+            return self.url_result(smuggle_url(kaltura_url, {'source_url': url}))
 
         # Look for Eagle.Platform embeds
         eagleplatform_url = EaglePlatformIE._extract_url(webpage)
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index 54374ea76..f1e8b25cc 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -139,6 +139,13 @@ class KalturaIE(InfoExtractor):
                 url = smuggle_url(url, {'service_url': service_url.group(1)})
             return url
 
+        # Check for an iframe, which may require redirection.
+        mobj = re.search(
+            r"<iframe[^>]+src=['\"](?P<url>(https?:)?//www\.kaltura\.com/[^'\"]+)['\"]",
+            webpage)
+        if mobj:
+            return mobj.group('url')
+
     def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
         params = actions[0]
         if len(actions) > 1:

From 07970f3ae299e0ccfc1f1601ce22ad0121666019 Mon Sep 17 00:00:00 2001
From: John Hawkinson <jhawk@mit.edu>
Date: Sat, 8 Apr 2017 00:53:23 -0400
Subject: [PATCH 2/4] [Kaltura] Skip failing Kaltura_1 test

Fails like so:
> /Users/jhawk/src/youtube-dl/youtube_dl/extractor/kaltura.py(266)_real_extract()
-> data_url = info['dataUrl']
(Pdb) info
{u'message': u'Entry id "0_l5ye1133" not found', u'code': u'ENTRY_ID_NOT_FOUND', u'args': {u'ENTRY_ID': u'0_l5ye1133'}, u'objectType': u'KalturaAPIException'}

Also note another URL to the same video, which might be helpful in
figuring out the right Kaltura entry ID, at least maybe?
---
 youtube_dl/extractor/kaltura.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index f1e8b25cc..741dd8dc5 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -91,6 +91,7 @@ class KalturaIE(InfoExtractor):
                     }],
                 },
             },
+            'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
             'params': {
                 'skip_download': True,
             },

From 4905e589d74e72fccb6e050d4e0db0d96c8e3930 Mon Sep 17 00:00:00 2001
From: John Hawkinson <jhawk@mit.edu>
Date: Sat, 8 Apr 2017 04:21:52 -0400
Subject: [PATCH 3/4] [kaltura] Be rigorous on iframe

Per @dstftw, don't pull out just any kaltura.com iframes, make sure they
have /p/{PARTNER_ID} and &entry_id={ENTRY_ID} and return a kaltura: URL.

Go back to specifying the IE is Kaltura in url_result().
---
 youtube_dl/extractor/generic.py |  2 +-
 youtube_dl/extractor/kaltura.py | 19 +++++++++++--------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 4bfa3f8a1..658533cf6 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -2305,7 +2305,7 @@ class GenericIE(InfoExtractor):
         # Look for Kaltura embeds
         kaltura_url = KalturaIE._extract_url(webpage)
         if kaltura_url:
-            return self.url_result(smuggle_url(kaltura_url, {'source_url': url}))
+            return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
 
         # Look for Eagle.Platform embeds
         eagleplatform_url = EaglePlatformIE._extract_url(webpage)
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index 741dd8dc5..6e992ee4b 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -128,7 +128,17 @@ class KalturaIE(InfoExtractor):
                         (?P<q2>["\'])entry_?[Ii]d(?P=q2)
                     )\s*:\s*
                     (?P<q3>["\'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
-                ''', webpage))
+                ''', webpage) or
+            re.search(
+                # <iframe src="http://www.kaltura.com/p/{PARTNER_ID}/sp/{PARTNER_ID}00/embedIframeJs/uiconf_id/{UICONF_ID}/partner_id/{PARTNER_ID}?iframeembed=true&playerId={UNIQUE_OBJ_ID}&entry_id={ENTRY_ID}" width="400" height="330" allowfullscreen webkitallowfullscreen mozAllowFullScreen frameborder="0"></iframe>
+                r'''(?xs)
+                    (?P<q1>["\'])
+                      (?:https?:)?//(?:www\.)?kaltura\.com/p/(?P<partner_id>\d+)/
+                      (?:(?!(?P=q1)).)*
+                      [\?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
+                    (?P=q1)
+                ''', webpage)
+        )
         if mobj:
             embed_info = mobj.groupdict()
             url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
@@ -140,13 +150,6 @@ class KalturaIE(InfoExtractor):
                 url = smuggle_url(url, {'service_url': service_url.group(1)})
             return url
 
-        # Check for an iframe, which may require redirection.
-        mobj = re.search(
-            r"<iframe[^>]+src=['\"](?P<url>(https?:)?//www\.kaltura\.com/[^'\"]+)['\"]",
-            webpage)
-        if mobj:
-            return mobj.group('url')
-
     def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
         params = actions[0]
         if len(actions) > 1:

From a8df272cabe79552524759c7a1231c420603c9a9 Mon Sep 17 00:00:00 2001
From: John Hawkinson <jhawk@mit.edu>
Date: Sat, 8 Apr 2017 05:33:25 -0400
Subject: [PATCH 4/4] [Kaltura] comment; anchor RE to 'iframe'

Remove the example of an iframe spec from the spec page, and instead
link to it. Do so at the top of the method as it documents the
prviously two re.search()s as well.

Limit the iframe search to urls that actually appear in an <iframe
src="...  rather than inside any quoted string.
---
 youtube_dl/extractor/kaltura.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index 6e992ee4b..c6efdea93 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -109,6 +109,7 @@ class KalturaIE(InfoExtractor):
     @staticmethod
     def _extract_url(webpage):
         mobj = (
+            # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
             re.search(
                 r"""(?xs)
                     kWidget\.(?:thumb)?[Ee]mbed\(
@@ -130,9 +131,8 @@ class KalturaIE(InfoExtractor):
                     (?P<q3>["\'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
                 ''', webpage) or
             re.search(
-                # <iframe src="http://www.kaltura.com/p/{PARTNER_ID}/sp/{PARTNER_ID}00/embedIframeJs/uiconf_id/{UICONF_ID}/partner_id/{PARTNER_ID}?iframeembed=true&playerId={UNIQUE_OBJ_ID}&entry_id={ENTRY_ID}" width="400" height="330" allowfullscreen webkitallowfullscreen mozAllowFullScreen frameborder="0"></iframe>
                 r'''(?xs)
-                    (?P<q1>["\'])
+                    <iframe[^>]+src=(?P<q1>["\'])
                       (?:https?:)?//(?:www\.)?kaltura\.com/p/(?P<partner_id>\d+)/
                       (?:(?!(?P=q1)).)*
                       [\?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)