From 6352304f39bc0bdac1bc9827939383782bd6ac80 Mon Sep 17 00:00:00 2001 From: msrimat Date: Sun, 8 Dec 2019 18:46:02 -0500 Subject: [PATCH] Create ibmThink.py [ibmThink] for school project, attempted to implement ibm Think 2020 extraction access --- youtube_dl/extractor/ibmThink.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 youtube_dl/extractor/ibmThink.py diff --git a/youtube_dl/extractor/ibmThink.py b/youtube_dl/extractor/ibmThink.py new file mode 100644 index 000000000..daa0e8dee --- /dev/null +++ b/youtube_dl/extractor/ibmThink.py @@ -0,0 +1,30 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .generic import GenericIE + + +class IbmThinkPlaylistIE(InfoExtractor): + IE_DESC = 'IBM Think Playlist' + IE_NAME = 'IBMThink:playlist' + _VALID_URL = r'https?://(?:www\.)?ibm\.com/events/think/watch/playlist/(?P[0-9]+)/?' + _TESTS = [{ + 'url': 'https://www.ibm.com/events/think/watch/playlist/468067/', + 'info_dict': { + 'id': '468067', + 'title': 'Think 2020', + 'description': 'Keynotes' + }, + 'playlist_mincount': 5 + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + entries = [self.url_result(m, GenericIE.ie_key()) for m in re.findall(r'', webpage)] + title = self._html_search_regex(r'.+?\s\|\s.+?\s\|\s(.+?)', webpage, 'title', fatal=False) + description = self._og_search_description(webpage) + return self.playlist_result(entries, playlist_id, title, description)