From dea102dd556bf842f7cfb26e7da60e048f68cf6e Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Fri, 26 Oct 2018 19:15:44 -0700 Subject: [PATCH 01/24] Enable smuggling metadata to Brightcove extractor --- youtube_dl/extractor/brightcove.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) mode change 100644 => 100755 youtube_dl/extractor/brightcove.py diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py old mode 100644 new mode 100755 index 465ae396e..8eee47283 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -596,8 +596,8 @@ class BrightcoveNewIE(AdobePassIE): return entries - def _parse_brightcove_metadata(self, json_data, video_id, headers={}): - title = json_data['name'].strip() + def _parse_brightcove_metadata(self, json_data, video_id, headers={}, smuggled_data={}): + title = smuggled_data.get('title') or json_data['name'].strip() formats = [] for source in json_data.get('sources', []): @@ -772,4 +772,4 @@ class BrightcoveNewIE(AdobePassIE): }) return self._parse_brightcove_metadata( - json_data, video_id, headers=headers) + json_data, video_id, headers=headers, smuggled_data=smuggled_data) From 8d948aec8956ee0385170f5324b1600f20718cbd Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Fri, 26 Oct 2018 19:16:14 -0700 Subject: [PATCH 02/24] [ciscolive] Add new extractor --- youtube_dl/extractor/ciscolive.py | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100755 youtube_dl/extractor/ciscolive.py diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py new file mode 100755 index 000000000..4d5d60a72 --- /dev/null +++ b/youtube_dl/extractor/ciscolive.py @@ -0,0 +1,52 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ..compat import compat_urllib_parse_urlencode +from .common import InfoExtractor +from ..utils import smuggle_url + + +class CiscoLiveIE(InfoExtractor): + IE_NAME = 'ciscolive' + _VALID_URL = r'https://ciscolive.cisco.com/on-demand-library/\??#/session/(?P.+)' + _TEST = { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', + 'md5': 'c98acf395ed9c9f766941c70f5352e22', + 'info_dict': { + 'id': '5803694304001', + 'ext': 'mp4', + 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', + 'timestamp': 1530305395, + 'uploader_id': '5647924234001', + 'upload_date': '20180629' + } + } + + # These appear to be constant across all Cisco Live presentations + # and are not tied to any user session or event + RAINFOCUS_SESSION_URL = 'https://events.rainfocus.com/api/session' + RAINFOCUS_APIPROFILEID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' + RAINFOCUS_WIDGETID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' + + def _real_extract(self, url): + session_id = self._match_id(url) + session_info_headers = { + 'Origin': 'https://ciscolive.cisco.com', + 'rfApiProfileId': self.RAINFOCUS_APIPROFILEID, + 'rfWidgetId': self.RAINFOCUS_WIDGETID + } + session_info_args = { + 'url_or_request': self.RAINFOCUS_SESSION_URL, + 'video_id': session_id, + 'headers': session_info_headers, + 'data': compat_urllib_parse_urlencode({'id': session_id}) + } + session_info = self._download_json(**session_info_args) + brightcove_id = session_info['items'][0]['videos'][0]['url'] + video_title = session_info['items'][0]['title'] + + return self.url_result( + smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'title': video_title}), + 'BrightcoveNew', brightcove_id, video_title) From d441f94cbbeae981d5bab6d9fc1cb093d7fc0bf9 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Fri, 26 Oct 2018 19:31:19 -0700 Subject: [PATCH 03/24] Updated valid URL regex --- youtube_dl/extractor/ciscolive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 4d5d60a72..59d336d60 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -8,7 +8,7 @@ from ..utils import smuggle_url class CiscoLiveIE(InfoExtractor): IE_NAME = 'ciscolive' - _VALID_URL = r'https://ciscolive.cisco.com/on-demand-library/\??#/session/(?P.+)' + _VALID_URL = r'https://ciscolive.cisco.com/on-demand-library/\??.*?#/session/(?P.+)' _TEST = { 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', 'md5': 'c98acf395ed9c9f766941c70f5352e22', From c53410fe10a999b008664eb9d535d7a0de69345b Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Fri, 26 Oct 2018 19:53:56 -0700 Subject: [PATCH 04/24] Add [ciscolive] to extractor imports --- youtube_dl/extractor/extractors.py | 1 + 1 file changed, 1 insertion(+) mode change 100644 => 100755 youtube_dl/extractor/extractors.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py old mode 100644 new mode 100755 index 17b576df3..9ef3db93b --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -198,6 +198,7 @@ from .chirbit import ( ChirbitProfileIE, ) from .cinchcast import CinchcastIE +from .ciscolive import CiscoLiveIE from .cjsw import CJSWIE from .cliphunter import CliphunterIE from .clippit import ClippitIE From 0b6e88fec39295969d992260fb9cf48535c39e5a Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Sat, 27 Oct 2018 07:57:12 -0700 Subject: [PATCH 05/24] [ciscolive] Added support for downloading filters/searches (playlists) --- youtube_dl/extractor/ciscolive.py | 151 ++++++++++++++++++++---------- 1 file changed, 99 insertions(+), 52 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 59d336d60..f233c4fec 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -1,52 +1,99 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from ..compat import compat_urllib_parse_urlencode -from .common import InfoExtractor -from ..utils import smuggle_url - - -class CiscoLiveIE(InfoExtractor): - IE_NAME = 'ciscolive' - _VALID_URL = r'https://ciscolive.cisco.com/on-demand-library/\??.*?#/session/(?P.+)' - _TEST = { - 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', - 'md5': 'c98acf395ed9c9f766941c70f5352e22', - 'info_dict': { - 'id': '5803694304001', - 'ext': 'mp4', - 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', - 'timestamp': 1530305395, - 'uploader_id': '5647924234001', - 'upload_date': '20180629' - } - } - - # These appear to be constant across all Cisco Live presentations - # and are not tied to any user session or event - RAINFOCUS_SESSION_URL = 'https://events.rainfocus.com/api/session' - RAINFOCUS_APIPROFILEID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' - RAINFOCUS_WIDGETID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' - - def _real_extract(self, url): - session_id = self._match_id(url) - session_info_headers = { - 'Origin': 'https://ciscolive.cisco.com', - 'rfApiProfileId': self.RAINFOCUS_APIPROFILEID, - 'rfWidgetId': self.RAINFOCUS_WIDGETID - } - session_info_args = { - 'url_or_request': self.RAINFOCUS_SESSION_URL, - 'video_id': session_id, - 'headers': session_info_headers, - 'data': compat_urllib_parse_urlencode({'id': session_id}) - } - session_info = self._download_json(**session_info_args) - brightcove_id = session_info['items'][0]['videos'][0]['url'] - video_title = session_info['items'][0]['title'] - - return self.url_result( - smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, - {'title': video_title}), - 'BrightcoveNew', brightcove_id, video_title) +# coding: utf-8 +from __future__ import unicode_literals + +import re +from ..compat import compat_urllib_parse_urlencode +from .common import InfoExtractor +from ..utils import smuggle_url + + +class CiscoLiveIE(InfoExtractor): + IE_NAME = 'ciscolive' + _VALID_URL = r'https://ciscolive.cisco.com/on-demand-library/\??(?P.*?)#/(?:session/(?P.+))?$' + _TESTS = [{ + 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', + 'md5': 'c98acf395ed9c9f766941c70f5352e22', + 'info_dict': { + 'id': '5803694304001', + 'ext': 'mp4', + 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', + 'timestamp': 1530305395, + 'uploader_id': '5647924234001', + 'upload_date': '20180629' + } + }, { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', + 'md5': '993d4cf051f6174059328b1dce8e94bd', + 'info_dict': { + 'id': '5803751616001', + 'ext': 'mp4', + 'timestamp': 1530316421, + 'title': 'DevNet Panel-Applying Design Thinking to Building Products in Cisco', + 'uploader_id': '5647924234001', + 'upload_date': '20180629', + } + }, { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', + 'md5': '80e0c3b87e373fe3a3316b934b8915bf', + 'info_dict': { + 'id': '5803735679001', + 'ext': 'mp4', + 'timestamp': 1530311842, + 'title': 'Beating the CCIE Routing & Switching', + 'uploader_id': '5647924234001', + 'upload_date': '20180629', + } + }] + + # These appear to be constant across all Cisco Live presentations + # and are not tied to any user session or event + RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s' + RAINFOCUS_APIPROFILEID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' + RAINFOCUS_WIDGETID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' + + def _get_brightcove_url(self, result): + """ Returns a Brightcove URL result from Rainfocus API result + + """ + bc_id = result['videos'][0]['url'] + video_title = result['title'] + return self.url_result( + smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % bc_id, + {'title': video_title}), + 'BrightcoveNew', bc_id, video_title) + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + rf_api_headers = { + 'Origin': 'https://ciscolive.cisco.com', + 'rfApiProfileId': self.RAINFOCUS_APIPROFILEID, + 'rfWidgetId': self.RAINFOCUS_WIDGETID, + 'Referer': url + } + rf_api_args = { + 'video_id': None, + 'headers': rf_api_headers + } + + # Single session URL (single video) + if m.group('id'): + rf_id = m.groups('id')[0] + rf_api_args['url_or_request'] = self.RAINFOCUS_API_URL % 'session' + rf_api_args['video_id'] = rf_id + rf_api_args['data'] = compat_urllib_parse_urlencode({'id': rf_id}) + rf_api_result = self._download_json(**rf_api_args) + rf_item = rf_api_result['items'][0] + return self._get_brightcove_url(rf_item) + else: + # Filter query URL (multiple videos) + if m.group('query'): + rf_query = m.groups('query')[0] + rf_query = str(rf_query + "&type=session&size=1000") + data = rf_query + rf_api_args['url_or_request'] = self.RAINFOCUS_API_URL % 'search' + rf_api_args['data'] = data + rf_api_args['video_id'] = None + rf_api_result = self._download_json(**rf_api_args) + entries = [self._get_brightcove_url(r) for r in rf_api_result['sectionList'][0]['items']] + return self.playlist_result(entries) From c9082fdedcefa0c706ed3ff8734445bf277e10c5 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Sat, 27 Oct 2018 08:12:24 -0700 Subject: [PATCH 06/24] Fixed re match logic for all unit tests --- youtube_dl/extractor/ciscolive.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index f233c4fec..5ce36c4bd 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -78,7 +78,7 @@ class CiscoLiveIE(InfoExtractor): # Single session URL (single video) if m.group('id'): - rf_id = m.groups('id')[0] + rf_id = m.group('id') rf_api_args['url_or_request'] = self.RAINFOCUS_API_URL % 'session' rf_api_args['video_id'] = rf_id rf_api_args['data'] = compat_urllib_parse_urlencode({'id': rf_id}) @@ -88,7 +88,7 @@ class CiscoLiveIE(InfoExtractor): else: # Filter query URL (multiple videos) if m.group('query'): - rf_query = m.groups('query')[0] + rf_query = m.group('query') rf_query = str(rf_query + "&type=session&size=1000") data = rf_query rf_api_args['url_or_request'] = self.RAINFOCUS_API_URL % 'search' From 143e35bceffb70a8602f3088062de017ea75360f Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Sat, 27 Oct 2018 11:31:02 -0700 Subject: [PATCH 07/24] Improved _VALID_URL regex --- youtube_dl/extractor/ciscolive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 5ce36c4bd..e0acd9edb 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -9,7 +9,7 @@ from ..utils import smuggle_url class CiscoLiveIE(InfoExtractor): IE_NAME = 'ciscolive' - _VALID_URL = r'https://ciscolive.cisco.com/on-demand-library/\??(?P.*?)#/(?:session/(?P.+))?$' + _VALID_URL = r'https://ciscolive.cisco.com/on-demand-library/\??(?P[^#]+)#/(?:session/(?P.+))?$' _TESTS = [{ 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', 'md5': 'c98acf395ed9c9f766941c70f5352e22', From fabaef4478ac55f72fd95b8647743a7dd22c83b2 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Sat, 27 Oct 2018 11:45:01 -0700 Subject: [PATCH 08/24] Added more verbose output --- youtube_dl/extractor/ciscolive.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index e0acd9edb..9413e647d 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -58,6 +58,8 @@ class CiscoLiveIE(InfoExtractor): """ bc_id = result['videos'][0]['url'] video_title = result['title'] + self.to_screen('Resolved Brightcove ID: %s' % bc_id) + self.to_screen('Found video "%s"' % video_title) return self.url_result( smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % bc_id, {'title': video_title}), @@ -79,6 +81,8 @@ class CiscoLiveIE(InfoExtractor): # Single session URL (single video) if m.group('id'): rf_id = m.group('id') + self.to_screen('Downloading video for Cisco Live session ID %s' % + rf_id) rf_api_args['url_or_request'] = self.RAINFOCUS_API_URL % 'session' rf_api_args['video_id'] = rf_id rf_api_args['data'] = compat_urllib_parse_urlencode({'id': rf_id}) @@ -89,6 +93,8 @@ class CiscoLiveIE(InfoExtractor): # Filter query URL (multiple videos) if m.group('query'): rf_query = m.group('query') + self.to_screen('Downloading video collection for query %s' % + rf_query) rf_query = str(rf_query + "&type=session&size=1000") data = rf_query rf_api_args['url_or_request'] = self.RAINFOCUS_API_URL % 'search' From 70f767da43f967b4c30c66827cd30d9486ee1ae3 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Sun, 28 Oct 2018 10:16:03 -0700 Subject: [PATCH 09/24] [ciscolive] Improved metadata extraction --- youtube_dl/extractor/ciscolive.py | 93 ++++++++++++++++++++++--------- 1 file changed, 67 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 9413e647d..3ac53a6f4 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -47,26 +47,68 @@ class CiscoLiveIE(InfoExtractor): # These appear to be constant across all Cisco Live presentations # and are not tied to any user session or event - RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s' - RAINFOCUS_APIPROFILEID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' - RAINFOCUS_WIDGETID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' - - def _get_brightcove_url(self, result): - """ Returns a Brightcove URL result from Rainfocus API result + RAINFOCUS_API_URL = "https://events.rainfocus.com/api/%s" + RAINFOCUS_APIPROFILEID = "Na3vqYdAlJFSxhYTYQGuMbpafMqftalz" + RAINFOCUS_WIDGETID = "n6l4Lo05R8fiy3RpUBm447dZN8uNWoye" + BRIGHTCOVE_ACCOUNT_ID = "5647924234001" + BRIGHTCOVE_URL_TEMPLATE = "http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s" + def _parse_rf_item(self, rf_item): + """ Parses metadata and passes to Brightcove extractor + """ - bc_id = result['videos'][0]['url'] - video_title = result['title'] - self.to_screen('Resolved Brightcove ID: %s' % bc_id) - self.to_screen('Found video "%s"' % video_title) + # Metadata parsed from Rainfocus API result + # Not all of which is appropriate to pass to Brightcove extractor + rf_result = { + "event_name": rf_item.get("eventName"), + # Full event name [Cisco Live EMEA 2016] + "event_label": rf_item.get("eventLabel"), + # Year/location [2016 Berlin] + "sess_rf_id": rf_item.get("eventId"), + # Rainfocus ID [14382715417240cleu16] + "sess_abbr": rf_item.get("abbreviation"), + # Shorthand session ID [BRKCRS-2501] + "sess_title": rf_item.get("title"), + # Full session title [Campus QoS Design-Simplified] + "sess_desc": rf_item.get("abstract"), + # Description [This session will apply Cisco's QoS strategy for rich media...] + "sess_pres_name": rf_item["participants"][0]["fullName"], # TODO: Needs safe get() method + # Presenter's full name [Tim Szigeti] + "sess_pres_title": rf_item["participants"][0]["jobTitle"], + # Presenter's job title [Principal Engineer - Technical Marketing] + "sess_pdf_url": rf_item["files"][0]["url"], + # Presentation PDF URL [https://clnv.s3.amazonaws.com/2016/eur/pdf/BRKCRS-2501.pdf] + "sess_bc_id": rf_item["videos"][0]["url"], + # Session Brightcove video ID [5803710412001] + "sess_bc_url": self.BRIGHTCOVE_URL_TEMPLATE % rf_item["videos"][0]["url"], + # Session Brightcove video URL [http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=5803710412001] + "sess_duration": rf_item["times"][0]["length"] * 60, + # Session duration in seconds [7200] + "sess_location": rf_item["times"][0]["room"] + # Session location [Hall 7.3 Breakout Room 732] + } + + # Metadata passed to final Brightcove extractor + # TODO: Only title is passed--need to work on how to best merge smuggled metadata + metadata = { + "id": rf_result.get("sess_abbr"), + "title": rf_result.get("sess_title"), + "creator": rf_result.get("sess_pres_name"), + "description": rf_result.get("sess_desc"), + "series": rf_result.get("event_name"), + "duration": rf_result["sess_duration"], + "location": rf_result["sess_location"] + } + self.to_screen("Session: %s [%s]" % (rf_result["sess_title"], rf_result["sess_abbr"])) + self.to_screen("Presenter: %s, %s" % (rf_result["sess_pres_name"], rf_result["sess_pres_title"])) + self.to_screen("Presentation PDF: %s" % rf_result["sess_pdf_url"]) return self.url_result( - smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % bc_id, - {'title': video_title}), - 'BrightcoveNew', bc_id, video_title) + smuggle_url(rf_result["sess_bc_url"], metadata), + 'BrightcoveNew', rf_result["sess_bc_id"], + rf_result["sess_title"]) def _real_extract(self, url): - m = re.match(self._VALID_URL, url) + mobj = re.match(self._VALID_URL, url) rf_api_headers = { 'Origin': 'https://ciscolive.cisco.com', 'rfApiProfileId': self.RAINFOCUS_APIPROFILEID, @@ -79,27 +121,26 @@ class CiscoLiveIE(InfoExtractor): } # Single session URL (single video) - if m.group('id'): - rf_id = m.group('id') - self.to_screen('Downloading video for Cisco Live session ID %s' % - rf_id) + if mobj.group('id'): + rf_id = mobj.group('id') rf_api_args['url_or_request'] = self.RAINFOCUS_API_URL % 'session' rf_api_args['video_id'] = rf_id rf_api_args['data'] = compat_urllib_parse_urlencode({'id': rf_id}) + self.to_screen('Video for session ID %s' % rf_id) rf_api_result = self._download_json(**rf_api_args) rf_item = rf_api_result['items'][0] - return self._get_brightcove_url(rf_item) + return self._parse_rf_item(rf_item) else: # Filter query URL (multiple videos) - if m.group('query'): - rf_query = m.group('query') - self.to_screen('Downloading video collection for query %s' % - rf_query) + if mobj.group('query'): + rf_query = mobj.group('query') rf_query = str(rf_query + "&type=session&size=1000") data = rf_query rf_api_args['url_or_request'] = self.RAINFOCUS_API_URL % 'search' rf_api_args['data'] = data - rf_api_args['video_id'] = None + # Query JSON results offer no obvious way to ID the search + rf_api_args['video_id'] = "Filter query" + self.to_screen('Video collection for query %s' % rf_query) rf_api_result = self._download_json(**rf_api_args) - entries = [self._get_brightcove_url(r) for r in rf_api_result['sectionList'][0]['items']] + entries = [self._parse_rf_item(rf_item) for rf_item in rf_api_result['sectionList'][0]['items']] return self.playlist_result(entries) From fe9791254e5660ebac6617d505e41a89510950b8 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Sun, 28 Oct 2018 14:03:36 -0700 Subject: [PATCH 10/24] [ciscolive] Exclude results without valid video URL --- youtube_dl/extractor/ciscolive.py | 51 ++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 3ac53a6f4..a5cb6772c 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -55,7 +55,7 @@ class CiscoLiveIE(InfoExtractor): def _parse_rf_item(self, rf_item): """ Parses metadata and passes to Brightcove extractor - + """ # Metadata parsed from Rainfocus API result # Not all of which is appropriate to pass to Brightcove extractor @@ -72,7 +72,7 @@ class CiscoLiveIE(InfoExtractor): # Full session title [Campus QoS Design-Simplified] "sess_desc": rf_item.get("abstract"), # Description [This session will apply Cisco's QoS strategy for rich media...] - "sess_pres_name": rf_item["participants"][0]["fullName"], # TODO: Needs safe get() method + "sess_pres_name": rf_item["participants"][0]["fullName"], # TODO: Needs safe get() method # Presenter's full name [Tim Szigeti] "sess_pres_title": rf_item["participants"][0]["jobTitle"], # Presenter's job title [Principal Engineer - Technical Marketing] @@ -87,7 +87,7 @@ class CiscoLiveIE(InfoExtractor): "sess_location": rf_item["times"][0]["room"] # Session location [Hall 7.3 Breakout Room 732] } - + # Metadata passed to final Brightcove extractor # TODO: Only title is passed--need to work on how to best merge smuggled metadata metadata = { @@ -96,16 +96,30 @@ class CiscoLiveIE(InfoExtractor): "creator": rf_result.get("sess_pres_name"), "description": rf_result.get("sess_desc"), "series": rf_result.get("event_name"), - "duration": rf_result["sess_duration"], - "location": rf_result["sess_location"] + "duration": rf_result.get("sess_duration"), + "location": rf_result.get("sess_location") } self.to_screen("Session: %s [%s]" % (rf_result["sess_title"], rf_result["sess_abbr"])) self.to_screen("Presenter: %s, %s" % (rf_result["sess_pres_name"], rf_result["sess_pres_title"])) self.to_screen("Presentation PDF: %s" % rf_result["sess_pdf_url"]) - return self.url_result( - smuggle_url(rf_result["sess_bc_url"], metadata), - 'BrightcoveNew', rf_result["sess_bc_id"], - rf_result["sess_title"]) + return self.url_result(smuggle_url(rf_result["sess_bc_url"], metadata), + 'BrightcoveNew', rf_result["sess_bc_id"], + rf_result["sess_title"]) + + def _check_bc_url_exists(self, rf_item): + """ Checks for the existence of a Brightcove URL + + """ + msg = "Skipping session that does not include a valid video URL: %s" % rf_item.get("title", "Unknown title") + try: + bc_id = rf_item["videos"][0]["url"] + mobj = re.match(r'\d+', bc_id) + if mobj: + return rf_item + else: + self.report_warning(msg) + except IndexError: + self.report_warning(msg) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -128,19 +142,20 @@ class CiscoLiveIE(InfoExtractor): rf_api_args['data'] = compat_urllib_parse_urlencode({'id': rf_id}) self.to_screen('Video for session ID %s' % rf_id) rf_api_result = self._download_json(**rf_api_args) - rf_item = rf_api_result['items'][0] + rf_item = self._check_bc_url_exists(rf_api_result['items'][0]) return self._parse_rf_item(rf_item) else: # Filter query URL (multiple videos) if mobj.group('query'): - rf_query = mobj.group('query') - rf_query = str(rf_query + "&type=session&size=1000") - data = rf_query + rf_query = str(rf_query + '&type=session&size=1000') rf_api_args['url_or_request'] = self.RAINFOCUS_API_URL % 'search' - rf_api_args['data'] = data + rf_api_args['data'] = rf_query # Query JSON results offer no obvious way to ID the search - rf_api_args['video_id'] = "Filter query" - self.to_screen('Video collection for query %s' % rf_query) - rf_api_result = self._download_json(**rf_api_args) - entries = [self._parse_rf_item(rf_item) for rf_item in rf_api_result['sectionList'][0]['items']] + rf_api_args['video_id'] = 'Filter query' + self.to_screen('Video collection for filter query "%s"' % rf_query) + rf_api_results = self._download_json(**rf_api_args) + # Not all sessions have videos; filter them out before moving on + rf_video_results = [rf_item for rf_item in rf_api_results["sectionList"][0]["items"] + if self._check_bc_url_exists(rf_item)] + entries = [self._parse_rf_item(rf_item) for rf_item in rf_video_results] return self.playlist_result(entries) From d9a07eabc7f53d84c796865f8aa40a06394fb41a Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Sun, 28 Oct 2018 14:05:21 -0700 Subject: [PATCH 11/24] [ciscolive] Fix regression --- youtube_dl/extractor/ciscolive.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index a5cb6772c..1ec8f9e03 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -147,6 +147,7 @@ class CiscoLiveIE(InfoExtractor): else: # Filter query URL (multiple videos) if mobj.group('query'): + rf_query = mobj.group('query') rf_query = str(rf_query + '&type=session&size=1000') rf_api_args['url_or_request'] = self.RAINFOCUS_API_URL % 'search' rf_api_args['data'] = rf_query From 28743f5a8b37c00a85b09b9b44a306cc7a1e8bc9 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Mon, 29 Oct 2018 09:49:31 -0700 Subject: [PATCH 12/24] [ciscolive] Refactored several items; print video URL --- youtube_dl/extractor/ciscolive.py | 131 ++++++++++++++++-------------- 1 file changed, 70 insertions(+), 61 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 1ec8f9e03..b788b02b5 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -43,15 +43,25 @@ class CiscoLiveIE(InfoExtractor): 'uploader_id': '5647924234001', 'upload_date': '20180629', } + }, { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.solutions=scpsSolutions_cleanair#/', + 'md5': '80e0c3b87e373fe3a3316b934b8915bf', + 'info_dict': { + 'id': '5803735679001', + 'ext': 'mp4', + 'timestamp': 1530311842, + 'title': 'Beating the CCIE Routing & Switching', + 'uploader_id': '5647924234001', + 'upload_date': '20180629', + } }] # These appear to be constant across all Cisco Live presentations # and are not tied to any user session or event - RAINFOCUS_API_URL = "https://events.rainfocus.com/api/%s" - RAINFOCUS_APIPROFILEID = "Na3vqYdAlJFSxhYTYQGuMbpafMqftalz" - RAINFOCUS_WIDGETID = "n6l4Lo05R8fiy3RpUBm447dZN8uNWoye" - BRIGHTCOVE_ACCOUNT_ID = "5647924234001" - BRIGHTCOVE_URL_TEMPLATE = "http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s" + RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s' + RAINFOCUS_APIPROFILEID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' + RAINFOCUS_WIDGETID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' def _parse_rf_item(self, rf_item): """ Parses metadata and passes to Brightcove extractor @@ -59,55 +69,55 @@ class CiscoLiveIE(InfoExtractor): """ # Metadata parsed from Rainfocus API result # Not all of which is appropriate to pass to Brightcove extractor - rf_result = { - "event_name": rf_item.get("eventName"), - # Full event name [Cisco Live EMEA 2016] - "event_label": rf_item.get("eventLabel"), - # Year/location [2016 Berlin] - "sess_rf_id": rf_item.get("eventId"), - # Rainfocus ID [14382715417240cleu16] - "sess_abbr": rf_item.get("abbreviation"), - # Shorthand session ID [BRKCRS-2501] - "sess_title": rf_item.get("title"), - # Full session title [Campus QoS Design-Simplified] - "sess_desc": rf_item.get("abstract"), - # Description [This session will apply Cisco's QoS strategy for rich media...] - "sess_pres_name": rf_item["participants"][0]["fullName"], # TODO: Needs safe get() method - # Presenter's full name [Tim Szigeti] - "sess_pres_title": rf_item["participants"][0]["jobTitle"], - # Presenter's job title [Principal Engineer - Technical Marketing] - "sess_pdf_url": rf_item["files"][0]["url"], - # Presentation PDF URL [https://clnv.s3.amazonaws.com/2016/eur/pdf/BRKCRS-2501.pdf] - "sess_bc_id": rf_item["videos"][0]["url"], - # Session Brightcove video ID [5803710412001] - "sess_bc_url": self.BRIGHTCOVE_URL_TEMPLATE % rf_item["videos"][0]["url"], - # Session Brightcove video URL [http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=5803710412001] - "sess_duration": rf_item["times"][0]["length"] * 60, - # Session duration in seconds [7200] - "sess_location": rf_item["times"][0]["room"] - # Session location [Hall 7.3 Breakout Room 732] - } + # but might be nice to print to output + + event_name = rf_item.get('eventName') + # Full event name [Cisco Live EMEA 2016] + # rf_id = rf_item.get('eventId') + # Rainfocus ID [14382715417240cleu16] + cl_id = rf_item.get('abbreviation') + # Cisco Live ID - Shorthand session ID [BRKCRS-2501] + title = rf_item.get('title') + # Full session title [Campus QoS Design-Simplified] + description = rf_item.get('abstract') + # Description [This session will apply Cisco's QoS strategy for rich media...] + presenter_name = rf_item['participants'][0]['fullName'] # TODO: Needs safe get() method + # Presenter's full name [Tim Szigeti] + presenter_title = rf_item['participants'][0]['jobTitle'] + # Presenter's job title [Principal Engineer - Technical Marketing] + pdf_url = rf_item['files'][0]['url'] + # Presentation PDF URL [https://clnv.s3.amazonaws.com/2016/eur/pdf/BRKCRS-2501.pdf] + bc_id = rf_item['videos'][0]['url'] + # Brightcove video ID [5803710412001] + bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id + # Brightcove video URL [http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=5803710412001] + duration = rf_item['times'][0]['length'] * 60 + # Duration. Provided in minutes * 60 = seconds [7200] + location = rf_item['times'][0]['room'] + # Location [Hall 7.3 Breakout Room 732] # Metadata passed to final Brightcove extractor # TODO: Only title is passed--need to work on how to best merge smuggled metadata metadata = { - "id": rf_result.get("sess_abbr"), - "title": rf_result.get("sess_title"), - "creator": rf_result.get("sess_pres_name"), - "description": rf_result.get("sess_desc"), - "series": rf_result.get("event_name"), - "duration": rf_result.get("sess_duration"), - "location": rf_result.get("sess_location") + 'id': cl_id, + 'title': title, + 'creator': presenter_name, + 'description': description, + 'series': event_name, + 'duration': duration, + 'location': location, } - self.to_screen("Session: %s [%s]" % (rf_result["sess_title"], rf_result["sess_abbr"])) - self.to_screen("Presenter: %s, %s" % (rf_result["sess_pres_name"], rf_result["sess_pres_title"])) - self.to_screen("Presentation PDF: %s" % rf_result["sess_pdf_url"]) - return self.url_result(smuggle_url(rf_result["sess_bc_url"], metadata), - 'BrightcoveNew', rf_result["sess_bc_id"], - rf_result["sess_title"]) + self.to_screen('Event Name: %s' % event_name) + self.to_screen('Session ID: %s' % cl_id) + self.to_screen('Session Title: %s' % title) + self.to_screen('Presenter: %s, %s' % (presenter_name, presenter_title)) + self.to_screen('Slide Deck URL: %s' % pdf_url) + self.to_screen('Video URL: %s' % bc_url) + return self.url_result(smuggle_url(bc_url, metadata), 'BrightcoveNew', bc_id, title) def _check_bc_url_exists(self, rf_item): - """ Checks for the existence of a Brightcove URL + """ Checks for the existence of a Brightcove URL in a + RainFocus result item """ msg = "Skipping session that does not include a valid video URL: %s" % rf_item.get("title", "Unknown title") @@ -123,40 +133,39 @@ class CiscoLiveIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - rf_api_headers = { + rf_headers = { 'Origin': 'https://ciscolive.cisco.com', 'rfApiProfileId': self.RAINFOCUS_APIPROFILEID, 'rfWidgetId': self.RAINFOCUS_WIDGETID, 'Referer': url } - rf_api_args = { + rf_args = { 'video_id': None, - 'headers': rf_api_headers + 'headers': rf_headers } # Single session URL (single video) if mobj.group('id'): rf_id = mobj.group('id') - rf_api_args['url_or_request'] = self.RAINFOCUS_API_URL % 'session' - rf_api_args['video_id'] = rf_id - rf_api_args['data'] = compat_urllib_parse_urlencode({'id': rf_id}) - self.to_screen('Video for session ID %s' % rf_id) - rf_api_result = self._download_json(**rf_api_args) - rf_item = self._check_bc_url_exists(rf_api_result['items'][0]) + rf_args['url_or_request'] = self.RAINFOCUS_API_URL % 'session' + rf_args['video_id'] = rf_id + rf_args['data'] = compat_urllib_parse_urlencode({'id': rf_id}) + rf_result = self._download_json(**rf_args) + rf_item = self._check_bc_url_exists(rf_result['items'][0]) return self._parse_rf_item(rf_item) else: # Filter query URL (multiple videos) if mobj.group('query'): rf_query = mobj.group('query') rf_query = str(rf_query + '&type=session&size=1000') - rf_api_args['url_or_request'] = self.RAINFOCUS_API_URL % 'search' - rf_api_args['data'] = rf_query + rf_args['url_or_request'] = self.RAINFOCUS_API_URL % 'search' + rf_args['data'] = rf_query # Query JSON results offer no obvious way to ID the search - rf_api_args['video_id'] = 'Filter query' + rf_args['video_id'] = 'Filter query' self.to_screen('Video collection for filter query "%s"' % rf_query) - rf_api_results = self._download_json(**rf_api_args) + rf_results = self._download_json(**rf_args) # Not all sessions have videos; filter them out before moving on - rf_video_results = [rf_item for rf_item in rf_api_results["sectionList"][0]["items"] + rf_video_results = [rf_item for rf_item in rf_results["sectionList"][0]["items"] if self._check_bc_url_exists(rf_item)] entries = [self._parse_rf_item(rf_item) for rf_item in rf_video_results] return self.playlist_result(entries) From b02413655e1d319af1b9d76a7409b84eb2961ab4 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Mon, 29 Oct 2018 10:06:20 -0700 Subject: [PATCH 13/24] [ciscolive] Updated formatting with Black --- youtube_dl/extractor/ciscolive.py | 209 ++++++++++++++++-------------- 1 file changed, 110 insertions(+), 99 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index b788b02b5..5c776b530 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -8,60 +8,65 @@ from ..utils import smuggle_url class CiscoLiveIE(InfoExtractor): - IE_NAME = 'ciscolive' - _VALID_URL = r'https://ciscolive.cisco.com/on-demand-library/\??(?P[^#]+)#/(?:session/(?P.+))?$' - _TESTS = [{ - 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', - 'md5': 'c98acf395ed9c9f766941c70f5352e22', - 'info_dict': { - 'id': '5803694304001', - 'ext': 'mp4', - 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', - 'timestamp': 1530305395, - 'uploader_id': '5647924234001', - 'upload_date': '20180629' - } - }, { - 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', - 'md5': '993d4cf051f6174059328b1dce8e94bd', - 'info_dict': { - 'id': '5803751616001', - 'ext': 'mp4', - 'timestamp': 1530316421, - 'title': 'DevNet Panel-Applying Design Thinking to Building Products in Cisco', - 'uploader_id': '5647924234001', - 'upload_date': '20180629', - } - }, { - 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', - 'md5': '80e0c3b87e373fe3a3316b934b8915bf', - 'info_dict': { - 'id': '5803735679001', - 'ext': 'mp4', - 'timestamp': 1530311842, - 'title': 'Beating the CCIE Routing & Switching', - 'uploader_id': '5647924234001', - 'upload_date': '20180629', - } - }, { - 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.solutions=scpsSolutions_cleanair#/', - 'md5': '80e0c3b87e373fe3a3316b934b8915bf', - 'info_dict': { - 'id': '5803735679001', - 'ext': 'mp4', - 'timestamp': 1530311842, - 'title': 'Beating the CCIE Routing & Switching', - 'uploader_id': '5647924234001', - 'upload_date': '20180629', - } - }] + IE_NAME = "ciscolive" + _VALID_URL = r"https://ciscolive.cisco.com/on-demand-library/\??(?P[^#]+)#/(?:session/(?P.+))?$" + _TESTS = [ + { + "url": "https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs", + "md5": "c98acf395ed9c9f766941c70f5352e22", + "info_dict": { + "id": "5803694304001", + "ext": "mp4", + "title": "13 Smart Automations to Monitor Your Cisco IOS Network", + "timestamp": 1530305395, + "uploader_id": "5647924234001", + "upload_date": "20180629", + }, + }, + { + "url": "https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/", + "md5": "993d4cf051f6174059328b1dce8e94bd", + "info_dict": { + "id": "5803751616001", + "ext": "mp4", + "timestamp": 1530316421, + "title": "DevNet Panel-Applying Design Thinking to Building Products in Cisco", + "uploader_id": "5647924234001", + "upload_date": "20180629", + }, + }, + { + "url": "https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/", + "md5": "80e0c3b87e373fe3a3316b934b8915bf", + "info_dict": { + "id": "5803735679001", + "ext": "mp4", + "timestamp": 1530311842, + "title": "Beating the CCIE Routing & Switching", + "uploader_id": "5647924234001", + "upload_date": "20180629", + }, + }, + { + "url": "https://ciscolive.cisco.com/on-demand-library/?search.solutions=scpsSolutions_cleanair#/", + "md5": "80e0c3b87e373fe3a3316b934b8915bf", + "info_dict": { + "id": "5803735679001", + "ext": "mp4", + "timestamp": 1530311842, + "title": "Beating the CCIE Routing & Switching", + "uploader_id": "5647924234001", + "upload_date": "20180629", + }, + }, + ] # These appear to be constant across all Cisco Live presentations # and are not tied to any user session or event - RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s' - RAINFOCUS_APIPROFILEID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' - RAINFOCUS_WIDGETID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' + RAINFOCUS_API_URL = "https://events.rainfocus.com/api/%s" + RAINFOCUS_APIPROFILEID = "Na3vqYdAlJFSxhYTYQGuMbpafMqftalz" + RAINFOCUS_WIDGETID = "n6l4Lo05R8fiy3RpUBm447dZN8uNWoye" + BRIGHTCOVE_URL_TEMPLATE = "http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s" def _parse_rf_item(self, rf_item): """ Parses metadata and passes to Brightcove extractor @@ -71,59 +76,65 @@ class CiscoLiveIE(InfoExtractor): # Not all of which is appropriate to pass to Brightcove extractor # but might be nice to print to output - event_name = rf_item.get('eventName') + event_name = rf_item.get("eventName") # Full event name [Cisco Live EMEA 2016] # rf_id = rf_item.get('eventId') # Rainfocus ID [14382715417240cleu16] - cl_id = rf_item.get('abbreviation') + cl_id = rf_item.get("abbreviation") # Cisco Live ID - Shorthand session ID [BRKCRS-2501] - title = rf_item.get('title') + title = rf_item.get("title") # Full session title [Campus QoS Design-Simplified] - description = rf_item.get('abstract') + description = rf_item.get("abstract") # Description [This session will apply Cisco's QoS strategy for rich media...] - presenter_name = rf_item['participants'][0]['fullName'] # TODO: Needs safe get() method + # TODO: Needs safe get() method + presenter_name = rf_item["participants"][0]["fullName"] # Presenter's full name [Tim Szigeti] - presenter_title = rf_item['participants'][0]['jobTitle'] + presenter_title = rf_item["participants"][0]["jobTitle"] # Presenter's job title [Principal Engineer - Technical Marketing] - pdf_url = rf_item['files'][0]['url'] + pdf_url = rf_item["files"][0]["url"] # Presentation PDF URL [https://clnv.s3.amazonaws.com/2016/eur/pdf/BRKCRS-2501.pdf] - bc_id = rf_item['videos'][0]['url'] + bc_id = rf_item["videos"][0]["url"] # Brightcove video ID [5803710412001] bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id # Brightcove video URL [http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=5803710412001] - duration = rf_item['times'][0]['length'] * 60 + duration = rf_item["times"][0]["length"] * 60 # Duration. Provided in minutes * 60 = seconds [7200] - location = rf_item['times'][0]['room'] + location = rf_item["times"][0]["room"] # Location [Hall 7.3 Breakout Room 732] # Metadata passed to final Brightcove extractor # TODO: Only title is passed--need to work on how to best merge smuggled metadata metadata = { - 'id': cl_id, - 'title': title, - 'creator': presenter_name, - 'description': description, - 'series': event_name, - 'duration': duration, - 'location': location, + "id": cl_id, + "title": title, + "creator": presenter_name, + "description": description, + "series": event_name, + "duration": duration, + "location": location, } - self.to_screen('Event Name: %s' % event_name) - self.to_screen('Session ID: %s' % cl_id) - self.to_screen('Session Title: %s' % title) - self.to_screen('Presenter: %s, %s' % (presenter_name, presenter_title)) - self.to_screen('Slide Deck URL: %s' % pdf_url) - self.to_screen('Video URL: %s' % bc_url) - return self.url_result(smuggle_url(bc_url, metadata), 'BrightcoveNew', bc_id, title) + self.to_screen("Event Name: %s" % event_name) + self.to_screen("Session ID: %s" % cl_id) + self.to_screen("Session Title: %s" % title) + self.to_screen("Presenter: %s, %s" % (presenter_name, presenter_title)) + self.to_screen("Slide Deck URL: %s" % pdf_url) + self.to_screen("Video URL: %s" % bc_url) + return self.url_result( + smuggle_url(bc_url, metadata), "BrightcoveNew", bc_id, title + ) def _check_bc_url_exists(self, rf_item): """ Checks for the existence of a Brightcove URL in a RainFocus result item """ - msg = "Skipping session that does not include a valid video URL: %s" % rf_item.get("title", "Unknown title") + msg = ( + "Skipping session that does not include a valid video URL: %s" + % rf_item.get("title", "Unknown title") + ) try: bc_id = rf_item["videos"][0]["url"] - mobj = re.match(r'\d+', bc_id) + mobj = re.match(r"\d+", bc_id) if mobj: return rf_item else: @@ -134,38 +145,38 @@ class CiscoLiveIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) rf_headers = { - 'Origin': 'https://ciscolive.cisco.com', - 'rfApiProfileId': self.RAINFOCUS_APIPROFILEID, - 'rfWidgetId': self.RAINFOCUS_WIDGETID, - 'Referer': url - } - rf_args = { - 'video_id': None, - 'headers': rf_headers + "Origin": "https://ciscolive.cisco.com", + "rfApiProfileId": self.RAINFOCUS_APIPROFILEID, + "rfWidgetId": self.RAINFOCUS_WIDGETID, + "Referer": url, } + rf_args = {"video_id": None, "headers": rf_headers} # Single session URL (single video) - if mobj.group('id'): - rf_id = mobj.group('id') - rf_args['url_or_request'] = self.RAINFOCUS_API_URL % 'session' - rf_args['video_id'] = rf_id - rf_args['data'] = compat_urllib_parse_urlencode({'id': rf_id}) + if mobj.group("id"): + rf_id = mobj.group("id") + rf_args["url_or_request"] = self.RAINFOCUS_API_URL % "session" + rf_args["video_id"] = rf_id + rf_args["data"] = compat_urllib_parse_urlencode({"id": rf_id}) rf_result = self._download_json(**rf_args) - rf_item = self._check_bc_url_exists(rf_result['items'][0]) + rf_item = self._check_bc_url_exists(rf_result["items"][0]) return self._parse_rf_item(rf_item) else: # Filter query URL (multiple videos) - if mobj.group('query'): - rf_query = mobj.group('query') - rf_query = str(rf_query + '&type=session&size=1000') - rf_args['url_or_request'] = self.RAINFOCUS_API_URL % 'search' - rf_args['data'] = rf_query + if mobj.group("query"): + rf_query = mobj.group("query") + rf_query = str(rf_query + "&type=session&size=1000") + rf_args["url_or_request"] = self.RAINFOCUS_API_URL % "search" + rf_args["data"] = rf_query # Query JSON results offer no obvious way to ID the search - rf_args['video_id'] = 'Filter query' + rf_args["video_id"] = "Filter query" self.to_screen('Video collection for filter query "%s"' % rf_query) rf_results = self._download_json(**rf_args) # Not all sessions have videos; filter them out before moving on - rf_video_results = [rf_item for rf_item in rf_results["sectionList"][0]["items"] - if self._check_bc_url_exists(rf_item)] + rf_video_results = [ + rf_item + for rf_item in rf_results["sectionList"][0]["items"] + if self._check_bc_url_exists(rf_item) + ] entries = [self._parse_rf_item(rf_item) for rf_item in rf_video_results] return self.playlist_result(entries) From 8f7465ef543cf92e4804edad99cde4f43c26cc77 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Mon, 29 Oct 2018 14:02:44 -0700 Subject: [PATCH 14/24] [ciscolive] Revert unnecessary changes to Brightcove extractor --- youtube_dl/extractor/brightcove.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 8eee47283..465ae396e 100755 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -596,8 +596,8 @@ class BrightcoveNewIE(AdobePassIE): return entries - def _parse_brightcove_metadata(self, json_data, video_id, headers={}, smuggled_data={}): - title = smuggled_data.get('title') or json_data['name'].strip() + def _parse_brightcove_metadata(self, json_data, video_id, headers={}): + title = json_data['name'].strip() formats = [] for source in json_data.get('sources', []): @@ -772,4 +772,4 @@ class BrightcoveNewIE(AdobePassIE): }) return self._parse_brightcove_metadata( - json_data, video_id, headers=headers, smuggled_data=smuggled_data) + json_data, video_id, headers=headers) From 1b394685e87c44ebc6812d529ac07658a417b833 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Mon, 29 Oct 2018 14:03:17 -0700 Subject: [PATCH 15/24] [ciscolive] Changes requested by upstream maintainers --- youtube_dl/extractor/ciscolive.py | 214 +++++++++++++----------------- 1 file changed, 95 insertions(+), 119 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 5c776b530..55ef8f808 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -4,179 +4,155 @@ from __future__ import unicode_literals import re from ..compat import compat_urllib_parse_urlencode from .common import InfoExtractor -from ..utils import smuggle_url class CiscoLiveIE(InfoExtractor): - IE_NAME = "ciscolive" - _VALID_URL = r"https://ciscolive.cisco.com/on-demand-library/\??(?P[^#]+)#/(?:session/(?P.+))?$" + IE_NAME = 'ciscolive' + _VALID_URL = r'https://ciscolive.cisco.com/on-demand-library/\??(?P[^#]+)#/(?:session/(?P.+))?$' _TESTS = [ { - "url": "https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs", - "md5": "c98acf395ed9c9f766941c70f5352e22", - "info_dict": { - "id": "5803694304001", - "ext": "mp4", - "title": "13 Smart Automations to Monitor Your Cisco IOS Network", - "timestamp": 1530305395, - "uploader_id": "5647924234001", - "upload_date": "20180629", + 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', + 'md5': 'c98acf395ed9c9f766941c70f5352e22', + 'info_dict': { + 'id': '5803694304001', + 'ext': 'mp4', + 'title': '13 Smart Automations to Monitor Your Cisco IOS Network [BRKNMS-2465]', + 'description': 'md5:9c8b286dea1e3cb479c4562f1c3e5000', + 'timestamp': 1530305395, + 'uploader_id': '5647924234001', + 'upload_date': '20180629', + 'location': '16B Mezz.', }, }, { - "url": "https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/", - "md5": "993d4cf051f6174059328b1dce8e94bd", - "info_dict": { - "id": "5803751616001", - "ext": "mp4", - "timestamp": 1530316421, - "title": "DevNet Panel-Applying Design Thinking to Building Products in Cisco", - "uploader_id": "5647924234001", - "upload_date": "20180629", + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', + 'md5': '993d4cf051f6174059328b1dce8e94bd', + 'info_dict': { + 'upload_date': '20180629', + 'title': 'DevNet Panel-Applying Design Thinking to Building Products in Cisco [DEVNET-1794]', + 'timestamp': 1530316421, + 'uploader_id': '5647924234001', + 'id': '5803751616001', + 'description': 'md5:df02755cc961cc38950c36f53849ff1b', + 'location': 'WoS, DevNet Theater', + 'ext': 'mp4', }, }, { - "url": "https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/", - "md5": "80e0c3b87e373fe3a3316b934b8915bf", - "info_dict": { - "id": "5803735679001", - "ext": "mp4", - "timestamp": 1530311842, - "title": "Beating the CCIE Routing & Switching", - "uploader_id": "5647924234001", - "upload_date": "20180629", + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', + 'md5': '80e0c3b87e373fe3a3316b934b8915bf', + 'info_dict': { + 'upload_date': '20180629', + 'title': 'Beating the CCIE Routing & Switching [BRKCCIE-9162]', + 'timestamp': 1530311842, + 'uploader_id': '5647924234001', + 'id': '5803735679001', + 'description': 'md5:9e05b6772263276a5b8feef6f04887a1', + 'location': 'Tulúm 02', + 'ext': 'mp4', }, - }, - { - "url": "https://ciscolive.cisco.com/on-demand-library/?search.solutions=scpsSolutions_cleanair#/", - "md5": "80e0c3b87e373fe3a3316b934b8915bf", - "info_dict": { - "id": "5803735679001", - "ext": "mp4", - "timestamp": 1530311842, - "title": "Beating the CCIE Routing & Switching", - "uploader_id": "5647924234001", - "upload_date": "20180629", - }, - }, + } ] # These appear to be constant across all Cisco Live presentations # and are not tied to any user session or event - RAINFOCUS_API_URL = "https://events.rainfocus.com/api/%s" - RAINFOCUS_APIPROFILEID = "Na3vqYdAlJFSxhYTYQGuMbpafMqftalz" - RAINFOCUS_WIDGETID = "n6l4Lo05R8fiy3RpUBm447dZN8uNWoye" - BRIGHTCOVE_URL_TEMPLATE = "http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s" + RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s' + RAINFOCUS_APIPROFILEID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' + RAINFOCUS_WIDGETID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' def _parse_rf_item(self, rf_item): - """ Parses metadata and passes to Brightcove extractor + ''' Parses metadata and passes to Brightcove extractor - """ + ''' # Metadata parsed from Rainfocus API result # Not all of which is appropriate to pass to Brightcove extractor # but might be nice to print to output - event_name = rf_item.get("eventName") + event_name = rf_item.get('eventName') # Full event name [Cisco Live EMEA 2016] # rf_id = rf_item.get('eventId') # Rainfocus ID [14382715417240cleu16] - cl_id = rf_item.get("abbreviation") + cl_id = rf_item.get('abbreviation') # Cisco Live ID - Shorthand session ID [BRKCRS-2501] - title = rf_item.get("title") + title = rf_item.get('title') # Full session title [Campus QoS Design-Simplified] - description = rf_item.get("abstract") + description = rf_item.get('abstract') # Description [This session will apply Cisco's QoS strategy for rich media...] - # TODO: Needs safe get() method - presenter_name = rf_item["participants"][0]["fullName"] + presenter_name = rf_item.get('participants')[0].get('fullName') # Presenter's full name [Tim Szigeti] - presenter_title = rf_item["participants"][0]["jobTitle"] + presenter_title = rf_item.get('participants')[0].get('jobTitle') # Presenter's job title [Principal Engineer - Technical Marketing] - pdf_url = rf_item["files"][0]["url"] + pdf_url = rf_item.get('files')[0].get('url') # Presentation PDF URL [https://clnv.s3.amazonaws.com/2016/eur/pdf/BRKCRS-2501.pdf] - bc_id = rf_item["videos"][0]["url"] + bc_id = rf_item.get('videos')[0].get('url') # Brightcove video ID [5803710412001] bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id # Brightcove video URL [http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=5803710412001] - duration = rf_item["times"][0]["length"] * 60 + duration = rf_item.get('times')[0].get('length') * 60 # Duration. Provided in minutes * 60 = seconds [7200] - location = rf_item["times"][0]["room"] + location = rf_item.get('times')[0].get('room') # Location [Hall 7.3 Breakout Room 732] - # Metadata passed to final Brightcove extractor - # TODO: Only title is passed--need to work on how to best merge smuggled metadata - metadata = { - "id": cl_id, - "title": title, - "creator": presenter_name, - "description": description, - "series": event_name, - "duration": duration, - "location": location, + return { + '_type': 'url_transparent', + 'url': bc_url, + 'id': cl_id, + 'title': '%s [%s]' % (title, cl_id), + 'creator': '%s, %s' % (presenter_name, presenter_title), + 'description': '%s\nSlide Deck: %s' % (description, pdf_url), + 'series': event_name, + 'duration': duration, + 'location': location, + 'ie_key': 'BrightcoveNew', } - self.to_screen("Event Name: %s" % event_name) - self.to_screen("Session ID: %s" % cl_id) - self.to_screen("Session Title: %s" % title) - self.to_screen("Presenter: %s, %s" % (presenter_name, presenter_title)) - self.to_screen("Slide Deck URL: %s" % pdf_url) - self.to_screen("Video URL: %s" % bc_url) - return self.url_result( - smuggle_url(bc_url, metadata), "BrightcoveNew", bc_id, title - ) def _check_bc_url_exists(self, rf_item): - """ Checks for the existence of a Brightcove URL in a + ''' Checks for the existence of a Brightcove URL in a RainFocus result item - """ - msg = ( - "Skipping session that does not include a valid video URL: %s" - % rf_item.get("title", "Unknown title") - ) + ''' try: - bc_id = rf_item["videos"][0]["url"] - mobj = re.match(r"\d+", bc_id) + bc_id = rf_item['videos'][0]['url'] + mobj = re.match(r'\d+', bc_id) if mobj: return rf_item else: - self.report_warning(msg) + pass except IndexError: - self.report_warning(msg) + pass def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - rf_headers = { - "Origin": "https://ciscolive.cisco.com", - "rfApiProfileId": self.RAINFOCUS_APIPROFILEID, - "rfWidgetId": self.RAINFOCUS_WIDGETID, - "Referer": url, + headers = { + 'Origin': 'https://ciscolive.cisco.com', + 'rfApiProfileId': self.RAINFOCUS_APIPROFILEID, + 'rfWidgetId': self.RAINFOCUS_WIDGETID, + 'Referer': url, } - rf_args = {"video_id": None, "headers": rf_headers} # Single session URL (single video) - if mobj.group("id"): - rf_id = mobj.group("id") - rf_args["url_or_request"] = self.RAINFOCUS_API_URL % "session" - rf_args["video_id"] = rf_id - rf_args["data"] = compat_urllib_parse_urlencode({"id": rf_id}) - rf_result = self._download_json(**rf_args) - rf_item = self._check_bc_url_exists(rf_result["items"][0]) + if mobj.group('id'): + rf_id = mobj.group('id') + request = self.RAINFOCUS_API_URL % 'session' + data = compat_urllib_parse_urlencode({'id': rf_id}) + rf_result = self._download_json(request, rf_id, data=data, + headers=headers) + rf_item = self._check_bc_url_exists(rf_result.get('items')[0]) return self._parse_rf_item(rf_item) else: # Filter query URL (multiple videos) - if mobj.group("query"): - rf_query = mobj.group("query") - rf_query = str(rf_query + "&type=session&size=1000") - rf_args["url_or_request"] = self.RAINFOCUS_API_URL % "search" - rf_args["data"] = rf_query - # Query JSON results offer no obvious way to ID the search - rf_args["video_id"] = "Filter query" - self.to_screen('Video collection for filter query "%s"' % rf_query) - rf_results = self._download_json(**rf_args) - # Not all sessions have videos; filter them out before moving on - rf_video_results = [ - rf_item - for rf_item in rf_results["sectionList"][0]["items"] - if self._check_bc_url_exists(rf_item) - ] - entries = [self._parse_rf_item(rf_item) for rf_item in rf_video_results] - return self.playlist_result(entries) + rf_query = mobj.group('query') + rf_query = str(rf_query + '&type=session&size=1000') + request = self.RAINFOCUS_API_URL % 'search' + # Query JSON results offer no obvious way to ID the search + rf_results = self._download_json(request, 'Filter query', + data=rf_query, headers=headers) + # Not all sessions have videos; filter them out before moving on + rf_video_results = [ + rf_item + for rf_item in rf_results.get('sectionList')[0].get('items') + if self._check_bc_url_exists(rf_item) + ] + entries = [self._parse_rf_item(rf_item) for rf_item in rf_video_results] + return self.playlist_result(entries) From b207f39ee53b0124f4f7cc618c8fe9ff6c907712 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Mon, 29 Oct 2018 14:11:04 -0700 Subject: [PATCH 16/24] [ciscolive] Include video URL and slide deck URL in description --- youtube_dl/extractor/ciscolive.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 55ef8f808..e8355d2c1 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -100,7 +100,7 @@ class CiscoLiveIE(InfoExtractor): 'id': cl_id, 'title': '%s [%s]' % (title, cl_id), 'creator': '%s, %s' % (presenter_name, presenter_title), - 'description': '%s\nSlide Deck: %s' % (description, pdf_url), + 'description': '%s\nVideo Player: %s\nSlide Deck: %s' % (description, bc_url, pdf_url), 'series': event_name, 'duration': duration, 'location': location, @@ -155,4 +155,4 @@ class CiscoLiveIE(InfoExtractor): if self._check_bc_url_exists(rf_item) ] entries = [self._parse_rf_item(rf_item) for rf_item in rf_video_results] - return self.playlist_result(entries) + return self.playlist_result(entries, 'Filter query') From 7554015f86a9d4709a457dfb9e499e3c24a87e53 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Mon, 29 Oct 2018 16:22:03 -0700 Subject: [PATCH 17/24] [ciscolive] Requested changes --- youtube_dl/extractor/ciscolive.py | 52 ++++++++++++++++--------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index e8355d2c1..ac578ac2c 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -2,8 +2,12 @@ from __future__ import unicode_literals import re -from ..compat import compat_urllib_parse_urlencode from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlencode +from ..utils import ( + try_get, + clean_html +) class CiscoLiveIE(InfoExtractor): @@ -17,7 +21,7 @@ class CiscoLiveIE(InfoExtractor): 'id': '5803694304001', 'ext': 'mp4', 'title': '13 Smart Automations to Monitor Your Cisco IOS Network [BRKNMS-2465]', - 'description': 'md5:9c8b286dea1e3cb479c4562f1c3e5000', + 'description': 'md5:171c3a1c0469c126d01f083a83d6c60b', 'timestamp': 1530305395, 'uploader_id': '5647924234001', 'upload_date': '20180629', @@ -33,7 +37,7 @@ class CiscoLiveIE(InfoExtractor): 'timestamp': 1530316421, 'uploader_id': '5647924234001', 'id': '5803751616001', - 'description': 'md5:df02755cc961cc38950c36f53849ff1b', + 'description': 'md5:291dbd447bf745d1f61d944d9508538f', 'location': 'WoS, DevNet Theater', 'ext': 'mp4', }, @@ -47,7 +51,7 @@ class CiscoLiveIE(InfoExtractor): 'timestamp': 1530311842, 'uploader_id': '5647924234001', 'id': '5803735679001', - 'description': 'md5:9e05b6772263276a5b8feef6f04887a1', + 'description': 'md5:18bf6e8a634df0a51290401f209089b0', 'location': 'Tulúm 02', 'ext': 'mp4', }, @@ -77,50 +81,48 @@ class CiscoLiveIE(InfoExtractor): # Cisco Live ID - Shorthand session ID [BRKCRS-2501] title = rf_item.get('title') # Full session title [Campus QoS Design-Simplified] - description = rf_item.get('abstract') + description = clean_html(rf_item.get('abstract')) # Description [This session will apply Cisco's QoS strategy for rich media...] - presenter_name = rf_item.get('participants')[0].get('fullName') + presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName']) # Presenter's full name [Tim Szigeti] - presenter_title = rf_item.get('participants')[0].get('jobTitle') + presenter_title = try_get(rf_item, lambda x: x['participants'][0]['jobTitle']) # Presenter's job title [Principal Engineer - Technical Marketing] - pdf_url = rf_item.get('files')[0].get('url') + pdf_url = try_get(rf_item, lambda x: x['files'][0]['url']) # Presentation PDF URL [https://clnv.s3.amazonaws.com/2016/eur/pdf/BRKCRS-2501.pdf] - bc_id = rf_item.get('videos')[0].get('url') + bc_id = try_get(rf_item, lambda x: x['videos'][0]['url']) # Brightcove video ID [5803710412001] bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id # Brightcove video URL [http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=5803710412001] - duration = rf_item.get('times')[0].get('length') * 60 + duration = try_get(rf_item, lambda x: x['times'][0]['length']) # Duration. Provided in minutes * 60 = seconds [7200] - location = rf_item.get('times')[0].get('room') + location = try_get(rf_item, lambda x: x['times'][0]['room']) # Location [Hall 7.3 Breakout Room 732] + if duration: + duration = duration * 60 + return { '_type': 'url_transparent', 'url': bc_url, 'id': cl_id, 'title': '%s [%s]' % (title, cl_id), 'creator': '%s, %s' % (presenter_name, presenter_title), - 'description': '%s\nVideo Player: %s\nSlide Deck: %s' % (description, bc_url, pdf_url), + 'description': '%s\n\nVideo Player: %s\nSlide Deck: %s' % (description, bc_url, pdf_url), 'series': event_name, 'duration': duration, 'location': location, 'ie_key': 'BrightcoveNew', } - def _check_bc_url_exists(self, rf_item): + def _check_bc_id_exists(self, rf_item): ''' Checks for the existence of a Brightcove URL in a RainFocus result item ''' - try: - bc_id = rf_item['videos'][0]['url'] - mobj = re.match(r'\d+', bc_id) - if mobj: - return rf_item - else: - pass - except IndexError: - pass + bc_id = try_get(rf_item, lambda x: x['videos'][0]['url']) + mobj = re.match(r'\d+', bc_id) + if mobj: + return rf_item def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -138,7 +140,7 @@ class CiscoLiveIE(InfoExtractor): data = compat_urllib_parse_urlencode({'id': rf_id}) rf_result = self._download_json(request, rf_id, data=data, headers=headers) - rf_item = self._check_bc_url_exists(rf_result.get('items')[0]) + rf_item = self._check_bc_id_exists(try_get(rf_result, lambda x: x['items'][0], dict)) return self._parse_rf_item(rf_item) else: # Filter query URL (multiple videos) @@ -151,8 +153,8 @@ class CiscoLiveIE(InfoExtractor): # Not all sessions have videos; filter them out before moving on rf_video_results = [ rf_item - for rf_item in rf_results.get('sectionList')[0].get('items') - if self._check_bc_url_exists(rf_item) + for rf_item in try_get(rf_results, lambda x: x['sectionList'][0]['items'], list) + if self._check_bc_id_exists(rf_item) ] entries = [self._parse_rf_item(rf_item) for rf_item in rf_video_results] return self.playlist_result(entries, 'Filter query') From c41ef57a760ef52ec075bf9c730a4d11bdc585aa Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Mon, 29 Oct 2018 21:05:31 -0700 Subject: [PATCH 18/24] [ciscolive] Improve _VALID_URL regex --- youtube_dl/extractor/ciscolive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index ac578ac2c..ed4f3e493 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -12,7 +12,7 @@ from ..utils import ( class CiscoLiveIE(InfoExtractor): IE_NAME = 'ciscolive' - _VALID_URL = r'https://ciscolive.cisco.com/on-demand-library/\??(?P[^#]+)#/(?:session/(?P.+))?$' + _VALID_URL = r'(?:https?://)?ciscolive.cisco.com/on-demand-library/\??(?P[^#]+)#/(?:session/(?P.+))?$' _TESTS = [ { 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', From 759f720df7a1cc2c17ec5e90f5d0c26c674abe3e Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Mon, 29 Oct 2018 21:17:54 -0700 Subject: [PATCH 19/24] [ciscolive] Cleanup --- youtube_dl/extractor/ciscolive.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index ed4f3e493..6da3e5681 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -12,7 +12,7 @@ from ..utils import ( class CiscoLiveIE(InfoExtractor): IE_NAME = 'ciscolive' - _VALID_URL = r'(?:https?://)?ciscolive.cisco.com/on-demand-library/\??(?P[^#]+)#/(?:session/(?P.+))?$' + _VALID_URL = r'(?:https?://)?ciscolive\.cisco\.com/on-demand-library/\??(?P[^#]+)#/(?:session/(?P.+))?$' _TESTS = [ { 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', @@ -69,34 +69,17 @@ class CiscoLiveIE(InfoExtractor): ''' Parses metadata and passes to Brightcove extractor ''' - # Metadata parsed from Rainfocus API result - # Not all of which is appropriate to pass to Brightcove extractor - # but might be nice to print to output - event_name = rf_item.get('eventName') - # Full event name [Cisco Live EMEA 2016] - # rf_id = rf_item.get('eventId') - # Rainfocus ID [14382715417240cleu16] cl_id = rf_item.get('abbreviation') - # Cisco Live ID - Shorthand session ID [BRKCRS-2501] title = rf_item.get('title') - # Full session title [Campus QoS Design-Simplified] description = clean_html(rf_item.get('abstract')) - # Description [This session will apply Cisco's QoS strategy for rich media...] presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName']) - # Presenter's full name [Tim Szigeti] presenter_title = try_get(rf_item, lambda x: x['participants'][0]['jobTitle']) - # Presenter's job title [Principal Engineer - Technical Marketing] pdf_url = try_get(rf_item, lambda x: x['files'][0]['url']) - # Presentation PDF URL [https://clnv.s3.amazonaws.com/2016/eur/pdf/BRKCRS-2501.pdf] bc_id = try_get(rf_item, lambda x: x['videos'][0]['url']) - # Brightcove video ID [5803710412001] bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id - # Brightcove video URL [http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=5803710412001] duration = try_get(rf_item, lambda x: x['times'][0]['length']) - # Duration. Provided in minutes * 60 = seconds [7200] location = try_get(rf_item, lambda x: x['times'][0]['room']) - # Location [Hall 7.3 Breakout Room 732] if duration: duration = duration * 60 From 10ab8802898959042d0c5b8159dd466141da7b4f Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Fri, 2 Nov 2018 12:27:22 -0700 Subject: [PATCH 20/24] [ciscolive] Changes requested by maintainers --- youtube_dl/extractor/ciscolive.py | 32 +++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 6da3e5681..c75f122a6 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -3,10 +3,16 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode +from ..compat import ( + compat_urllib_parse_urlparse, + compat_parse_qs +) from ..utils import ( try_get, - clean_html + clean_html, + urlencode_postdata, + int_or_none, + ExtractorError ) @@ -78,7 +84,7 @@ class CiscoLiveIE(InfoExtractor): pdf_url = try_get(rf_item, lambda x: x['files'][0]['url']) bc_id = try_get(rf_item, lambda x: x['videos'][0]['url']) bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id - duration = try_get(rf_item, lambda x: x['times'][0]['length']) + duration = int_or_none(try_get(rf_item, lambda x: x['times'][0]['length'])) location = try_get(rf_item, lambda x: x['times'][0]['room']) if duration: @@ -115,28 +121,30 @@ class CiscoLiveIE(InfoExtractor): 'rfWidgetId': self.RAINFOCUS_WIDGETID, 'Referer': url, } - # Single session URL (single video) if mobj.group('id'): rf_id = mobj.group('id') request = self.RAINFOCUS_API_URL % 'session' - data = compat_urllib_parse_urlencode({'id': rf_id}) - rf_result = self._download_json(request, rf_id, data=data, - headers=headers) + data = urlencode_postdata({'id': rf_id}) + rf_result = self._download_json(request, rf_id, data=data, headers=headers) rf_item = self._check_bc_id_exists(try_get(rf_result, lambda x: x['items'][0], dict)) + if not rf_item: + msg = 'Rain Focus JSON response did not return a Brightcove video ID' + raise ExtractorError(msg) return self._parse_rf_item(rf_item) else: # Filter query URL (multiple videos) - rf_query = mobj.group('query') - rf_query = str(rf_query + '&type=session&size=1000') + rf_query = compat_parse_qs((compat_urllib_parse_urlparse(url).query)) + rf_query['type'] = 'session' + rf_query['size'] = 1000 + data = urlencode_postdata(rf_query) request = self.RAINFOCUS_API_URL % 'search' # Query JSON results offer no obvious way to ID the search - rf_results = self._download_json(request, 'Filter query', - data=rf_query, headers=headers) + rf_results = self._download_json(request, 'Filter query', data=data, headers=headers) # Not all sessions have videos; filter them out before moving on rf_video_results = [ rf_item - for rf_item in try_get(rf_results, lambda x: x['sectionList'][0]['items'], list) + for rf_item in rf_results['sectionList'][0]['items'] if self._check_bc_id_exists(rf_item) ] entries = [self._parse_rf_item(rf_item) for rf_item in rf_video_results] From 10ce83e1d598a85e0a4ada84de6b1f3f4a3964c5 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Fri, 2 Nov 2018 13:53:07 -0700 Subject: [PATCH 21/24] [ciscolive] Changes requested by maintainers --- youtube_dl/extractor/ciscolive.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index c75f122a6..f202b83e3 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -82,7 +82,7 @@ class CiscoLiveIE(InfoExtractor): presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName']) presenter_title = try_get(rf_item, lambda x: x['participants'][0]['jobTitle']) pdf_url = try_get(rf_item, lambda x: x['files'][0]['url']) - bc_id = try_get(rf_item, lambda x: x['videos'][0]['url']) + bc_id = rf_item['videos'][0]['url'] bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id duration = int_or_none(try_get(rf_item, lambda x: x['times'][0]['length'])) location = try_get(rf_item, lambda x: x['times'][0]['room']) @@ -127,10 +127,7 @@ class CiscoLiveIE(InfoExtractor): request = self.RAINFOCUS_API_URL % 'session' data = urlencode_postdata({'id': rf_id}) rf_result = self._download_json(request, rf_id, data=data, headers=headers) - rf_item = self._check_bc_id_exists(try_get(rf_result, lambda x: x['items'][0], dict)) - if not rf_item: - msg = 'Rain Focus JSON response did not return a Brightcove video ID' - raise ExtractorError(msg) + rf_item = self._check_bc_id_exists(rf_result['items'][0]) return self._parse_rf_item(rf_item) else: # Filter query URL (multiple videos) From 240ac9b0cc2d407a25e4c9138eb09ec65ca0ed99 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Fri, 2 Nov 2018 13:53:52 -0700 Subject: [PATCH 22/24] [ciscolive] Remove unnecessary ExtractorError import --- youtube_dl/extractor/ciscolive.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index f202b83e3..2d490dd05 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -11,8 +11,7 @@ from ..utils import ( try_get, clean_html, urlencode_postdata, - int_or_none, - ExtractorError + int_or_none ) From 112db624e1e94651b34093887c6ad45e76276834 Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Mon, 5 Nov 2018 16:21:10 -0800 Subject: [PATCH 23/24] [ciscolive] Fix bug in _check_bc_id_exists() --- youtube_dl/extractor/ciscolive.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py index 2d490dd05..53c2a8c72 100755 --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -108,9 +108,10 @@ class CiscoLiveIE(InfoExtractor): ''' bc_id = try_get(rf_item, lambda x: x['videos'][0]['url']) - mobj = re.match(r'\d+', bc_id) - if mobj: - return rf_item + if bc_id: + mobj = re.match(r'\d+', bc_id) + if mobj: + return rf_item def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 2f72407593d15549a1ec6830a3cb5d7b4ad0b22b Mon Sep 17 00:00:00 2001 From: Austin de Coup-Crank Date: Mon, 19 Nov 2018 09:35:20 -0800 Subject: [PATCH 24/24] [ciscolive] Changes requested by maintainers --- youtube_dl/extractor/brightcove.py | 0 youtube_dl/extractor/ciscolive.py | 62 ++++++++++++------------------ youtube_dl/extractor/extractors.py | 0 3 files changed, 25 insertions(+), 37 deletions(-) mode change 100755 => 100644 youtube_dl/extractor/brightcove.py mode change 100755 => 100644 youtube_dl/extractor/ciscolive.py mode change 100755 => 100644 youtube_dl/extractor/extractors.py diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py old mode 100755 new mode 100644 diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py old mode 100755 new mode 100644 index 53c2a8c72..2db7aad2c --- a/youtube_dl/extractor/ciscolive.py +++ b/youtube_dl/extractor/ciscolive.py @@ -8,10 +8,10 @@ from ..compat import ( compat_parse_qs ) from ..utils import ( - try_get, clean_html, + int_or_none, + try_get, urlencode_postdata, - int_or_none ) @@ -25,8 +25,8 @@ class CiscoLiveIE(InfoExtractor): 'info_dict': { 'id': '5803694304001', 'ext': 'mp4', - 'title': '13 Smart Automations to Monitor Your Cisco IOS Network [BRKNMS-2465]', - 'description': 'md5:171c3a1c0469c126d01f083a83d6c60b', + 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', + 'description': 'md5:ec4a436019e09a918dec17714803f7cc', 'timestamp': 1530305395, 'uploader_id': '5647924234001', 'upload_date': '20180629', @@ -38,11 +38,11 @@ class CiscoLiveIE(InfoExtractor): 'md5': '993d4cf051f6174059328b1dce8e94bd', 'info_dict': { 'upload_date': '20180629', - 'title': 'DevNet Panel-Applying Design Thinking to Building Products in Cisco [DEVNET-1794]', + 'title': 'DevNet Panel-Applying Design Thinking to Building Products in Cisco', 'timestamp': 1530316421, 'uploader_id': '5647924234001', 'id': '5803751616001', - 'description': 'md5:291dbd447bf745d1f61d944d9508538f', + 'description': 'md5:5f144575cd6848117fe2f756855b038b', 'location': 'WoS, DevNet Theater', 'ext': 'mp4', }, @@ -52,11 +52,11 @@ class CiscoLiveIE(InfoExtractor): 'md5': '80e0c3b87e373fe3a3316b934b8915bf', 'info_dict': { 'upload_date': '20180629', - 'title': 'Beating the CCIE Routing & Switching [BRKCCIE-9162]', + 'title': 'Beating the CCIE Routing & Switching', 'timestamp': 1530311842, 'uploader_id': '5647924234001', 'id': '5803735679001', - 'description': 'md5:18bf6e8a634df0a51290401f209089b0', + 'description': 'md5:e71970799e92d7f5ff57ae23f64b0929', 'location': 'Tulúm 02', 'ext': 'mp4', }, @@ -71,16 +71,11 @@ class CiscoLiveIE(InfoExtractor): BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' def _parse_rf_item(self, rf_item): - ''' Parses metadata and passes to Brightcove extractor - - ''' + ''' Parses metadata and passes to Brightcove extractor ''' event_name = rf_item.get('eventName') - cl_id = rf_item.get('abbreviation') - title = rf_item.get('title') + title = rf_item['title'] description = clean_html(rf_item.get('abstract')) presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName']) - presenter_title = try_get(rf_item, lambda x: x['participants'][0]['jobTitle']) - pdf_url = try_get(rf_item, lambda x: x['files'][0]['url']) bc_id = rf_item['videos'][0]['url'] bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id duration = int_or_none(try_get(rf_item, lambda x: x['times'][0]['length'])) @@ -91,31 +86,26 @@ class CiscoLiveIE(InfoExtractor): return { '_type': 'url_transparent', - 'url': bc_url, - 'id': cl_id, - 'title': '%s [%s]' % (title, cl_id), - 'creator': '%s, %s' % (presenter_name, presenter_title), - 'description': '%s\n\nVideo Player: %s\nSlide Deck: %s' % (description, bc_url, pdf_url), - 'series': event_name, + 'creator': presenter_name, + 'description': description, 'duration': duration, - 'location': location, 'ie_key': 'BrightcoveNew', + 'location': location, + 'series': event_name, + 'title': title, + 'url': bc_url, } def _check_bc_id_exists(self, rf_item): - ''' Checks for the existence of a Brightcove URL in a - RainFocus result item - - ''' + ''' Checks for the existence of a Brightcove URL in an API result ''' bc_id = try_get(rf_item, lambda x: x['videos'][0]['url']) if bc_id: - mobj = re.match(r'\d+', bc_id) - if mobj: + if bc_id.strip().isdigit(): return rf_item def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - headers = { + HEADERS = { 'Origin': 'https://ciscolive.cisco.com', 'rfApiProfileId': self.RAINFOCUS_APIPROFILEID, 'rfWidgetId': self.RAINFOCUS_WIDGETID, @@ -126,7 +116,7 @@ class CiscoLiveIE(InfoExtractor): rf_id = mobj.group('id') request = self.RAINFOCUS_API_URL % 'session' data = urlencode_postdata({'id': rf_id}) - rf_result = self._download_json(request, rf_id, data=data, headers=headers) + rf_result = self._download_json(request, rf_id, data=data, headers=HEADERS) rf_item = self._check_bc_id_exists(rf_result['items'][0]) return self._parse_rf_item(rf_item) else: @@ -136,13 +126,11 @@ class CiscoLiveIE(InfoExtractor): rf_query['size'] = 1000 data = urlencode_postdata(rf_query) request = self.RAINFOCUS_API_URL % 'search' - # Query JSON results offer no obvious way to ID the search - rf_results = self._download_json(request, 'Filter query', data=data, headers=headers) - # Not all sessions have videos; filter them out before moving on - rf_video_results = [ - rf_item - for rf_item in rf_results['sectionList'][0]['items'] + rf_results = self._download_json(request, 'Filter query', data=data, headers=HEADERS) + entries = [ + self._parse_rf_item(rf_item) + for rf_item + in rf_results['sectionList'][0]['items'] if self._check_bc_id_exists(rf_item) ] - entries = [self._parse_rf_item(rf_item) for rf_item in rf_video_results] return self.playlist_result(entries, 'Filter query') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py old mode 100755 new mode 100644