From 58f21537bff2c6fbc42c4e7cf2dd1305aeac58f4 Mon Sep 17 00:00:00 2001 From: Unknown Date: Wed, 30 May 2018 13:23:40 +0100 Subject: [PATCH 1/3] [skillshare:course] Add new extractor Added new extractor for skillshare.com classes (you can't link to a specific video). --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/skillshare.py | 122 +++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 youtube_dl/extractor/skillshare.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5f829c72c..5df442158 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -972,6 +972,7 @@ from .shared import ( from .showroomlive import ShowRoomLiveIE from .sina import SinaIE from .sixplay import SixPlayIE +from .skillshare import SkillshareCourseIE from .skylinewebcams import SkylineWebcamsIE from .skynewsarabia import ( SkyNewsArabiaIE, diff --git a/youtube_dl/extractor/skillshare.py b/youtube_dl/extractor/skillshare.py new file mode 100644 index 000000000..ab54ecea1 --- /dev/null +++ b/youtube_dl/extractor/skillshare.py @@ -0,0 +1,122 @@ +from __future__ import unicode_literals + +from datetime import datetime +import json + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ExtractorError +from ..utils import int_or_none + + +class SkillshareBaseIE(InfoExtractor): + _NETRC_MACHINE = 'udemy' + + _TN_RE = r"uploads/video/thumbnails/[0-9a-f]+/(?P[0-9]+)-(?P[0-9]+)" + _LOGIN_URL = "https://api.skillshare.com/login" + _VIDEO_URL = "https://api.skillshare.com/sessions/%s/download" + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None or password is None: + self.raise_login_required("An email and password is needed to download any video (even non-premium ones)") + + data = { + "email": username, + "password": password + } + headers = { + "Content-Type": "application/json" + } + user_json = self._download_json(self._LOGIN_URL, + None, + note="Logging in", + errnote="Error logging in, make sure the email and password is correct", + data=json.dumps(data).encode(), + headers=headers) + + user_type = user_json.get("membership_label", "Premium Member") + if user_type == "Basic Member": + self._user_type = 0 + elif user_type == "Premium Member": + self._user_type = 2 + else: + raise ExtractorError("User type %s unknown" % user_json["membership_label"]) + + +# I can find no way of linking to a specific video so only entire course downloads are available. +class SkillshareCourseIE(SkillshareBaseIE): + IE_NAME = 'skillshare:course' + IE_DESC = 'skillshare.com classes' + _VALID_URL = r'https?://(?:www\.)?skillshare\.com/classes/[^/]+/(?P[0-9]+)' + + _CLASS_URL = "https://api.skillshare.com/classes/%s" + + _TEST = { + "url": "https://www.skillshare.com/classes/Blender-3D-Fire-Smoke-Simulation-Guide/1850126092", + "only_matching": True + } + + def _real_extract(self, url): + # Technically the SKU, not ID but the SKU is a more universal identifier. + class_id = self._match_id(url) + class_json = self._download_json(self._CLASS_URL % class_id, + None, + note="Getting class details", + errnote="Error getting class details") + + if class_json.get("enrollment_type", 0) > self._user_type: + raise ExtractorError("This course requires a premium account and thus can't be downloaded") + + lessons_json = [] + # Pretty sure all classes only have one unit but flattening just in case. + for unit_json in class_json["_embedded"]["units"]["_embedded"]["units"]: + lessons_json += (unit_json["_embedded"]["sessions"]["_embedded"]["sessions"]) + + videos = [] + for lesson_json in lessons_json: + lesson_thumbnail_urls = [ + lesson_json.get("video_thumbnail_url", ""), + lesson_json.get("video_thumbnail_url", ""), + lesson_json.get("image_thumbnail", "") + ] + lesson_thumbnails_json = [] + for lesson_thumbnail_url in lesson_thumbnail_urls: + lesson_thumbnails_json.append({ + "url": lesson_thumbnail_url, + "width": int_or_none(self._search_regex(self._TN_RE, lesson_thumbnail_url, "width", fatal=False)), + "height": int_or_none(self._search_regex(self._TN_RE, lesson_thumbnail_url, "height", fatal=False)), + }) + + try: + lesson_timestamp_dt = datetime.strptime(lesson_json.get("create_time", ""), "%Y-%m-%d %H:%M:%S") + lesson_timestamp = int(lesson_timestamp_dt.strftime("%s")) + except ValueError: + lesson_timestamp = None + + videos.append({ + "id": str(lesson_json["id"]), + "title": lesson_json.get("title"), + "url": self._VIDEO_URL % str(lesson_json["id"]), + "ext": "mp4", + "thumbnails": lesson_thumbnails_json, + "uploader": class_json["_embedded"].get("teacher", {}).get("full_name"), + "creator": class_json["_embedded"].get("teacher", {}).get("full_name"), + "timestamp": lesson_timestamp, + "uploader_id": str(class_json["_embedded"].get("teacher", {}).get("username", 0)), + "categories": [class_json.get("category")], + "chapter": lesson_json.get("_links", {}).get("unit", {}).get("title"), + "chapter_id": compat_str(lesson_json.get("unit_id")) + }) + + return { + "id": class_id, + "title": class_json.get("title"), + "uploader": class_json["_embedded"].get("teacher", {}).get("full_name"), + "uploader_id": str(class_json["_embedded"].get("teacher", {}).get("username", 0)), + "_type": "playlist", + "entries": videos + } From c90aa852c2fbf9aba2399ab0f7d31860df832985 Mon Sep 17 00:00:00 2001 From: Unknown Date: Sat, 2 Jun 2018 09:40:14 +0100 Subject: [PATCH 2/3] [skillshare:course] Made most fixes Fixed all noted issued except cookie authentication (desktop and mobile don't correlate) and base class (to allow more functionality to be added in the future). --- youtube_dl/extractor/skillshare.py | 52 ++++++++++++++++-------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/skillshare.py b/youtube_dl/extractor/skillshare.py index ab54ecea1..0f65667e6 100644 --- a/youtube_dl/extractor/skillshare.py +++ b/youtube_dl/extractor/skillshare.py @@ -1,16 +1,19 @@ from __future__ import unicode_literals -from datetime import datetime import json from .common import InfoExtractor from ..compat import compat_str -from ..utils import ExtractorError -from ..utils import int_or_none +from ..utils import ( + ExtractorError, + int_or_none, + try_get, + unified_timestamp +) class SkillshareBaseIE(InfoExtractor): - _NETRC_MACHINE = 'udemy' + _NETRC_MACHINE = "skillshare" _TN_RE = r"uploads/video/thumbnails/[0-9a-f]+/(?P[0-9]+)-(?P[0-9]+)" _LOGIN_URL = "https://api.skillshare.com/login" @@ -66,7 +69,7 @@ class SkillshareCourseIE(SkillshareBaseIE): class_json = self._download_json(self._CLASS_URL % class_id, None, note="Getting class details", - errnote="Error getting class details") + errnote="Downloading class JSON") if class_json.get("enrollment_type", 0) > self._user_type: raise ExtractorError("This course requires a premium account and thus can't be downloaded") @@ -79,10 +82,11 @@ class SkillshareCourseIE(SkillshareBaseIE): videos = [] for lesson_json in lessons_json: lesson_thumbnail_urls = [ - lesson_json.get("video_thumbnail_url", ""), - lesson_json.get("video_thumbnail_url", ""), - lesson_json.get("image_thumbnail", "") + lesson_json.get("video_thumbnail_url"), + lesson_json.get("video_thumbnail_url"), + lesson_json.get("image_thumbnail") ] + lesson_thumbnail_urls = filter(None, lesson_thumbnail_urls) lesson_thumbnails_json = [] for lesson_thumbnail_url in lesson_thumbnail_urls: lesson_thumbnails_json.append({ @@ -90,33 +94,33 @@ class SkillshareCourseIE(SkillshareBaseIE): "width": int_or_none(self._search_regex(self._TN_RE, lesson_thumbnail_url, "width", fatal=False)), "height": int_or_none(self._search_regex(self._TN_RE, lesson_thumbnail_url, "height", fatal=False)), }) + if not lesson_thumbnails_json: + lesson_thumbnails_json = None - try: - lesson_timestamp_dt = datetime.strptime(lesson_json.get("create_time", ""), "%Y-%m-%d %H:%M:%S") - lesson_timestamp = int(lesson_timestamp_dt.strftime("%s")) - except ValueError: - lesson_timestamp = None + lesson_categories = [class_json.get("category")] + if lesson_categories == [None]: + lesson_categories = None videos.append({ - "id": str(lesson_json["id"]), + "id": compat_str(lesson_json["id"]), "title": lesson_json.get("title"), - "url": self._VIDEO_URL % str(lesson_json["id"]), + "url": self._VIDEO_URL % compat_str(lesson_json["id"]), "ext": "mp4", "thumbnails": lesson_thumbnails_json, - "uploader": class_json["_embedded"].get("teacher", {}).get("full_name"), - "creator": class_json["_embedded"].get("teacher", {}).get("full_name"), - "timestamp": lesson_timestamp, - "uploader_id": str(class_json["_embedded"].get("teacher", {}).get("username", 0)), - "categories": [class_json.get("category")], - "chapter": lesson_json.get("_links", {}).get("unit", {}).get("title"), + "uploader": try_get(class_json, lambda x: x["_embedded"]["teacher"]["full_name"]), + "creator": try_get(class_json, lambda x: x["_embedded"]["teacher"]["full_name"]), + "timestamp": unified_timestamp(lesson_json.get("create_time")), + "uploader_id": compat_str(try_get(class_json, lambda x: x["_embedded"]["teacher"]["username"])), + "categories": lesson_categories, + "chapter": try_get(lesson_json, lambda x: x["_links"]["unit"]["title"]), "chapter_id": compat_str(lesson_json.get("unit_id")) }) return { "id": class_id, - "title": class_json.get("title"), - "uploader": class_json["_embedded"].get("teacher", {}).get("full_name"), - "uploader_id": str(class_json["_embedded"].get("teacher", {}).get("username", 0)), + "title": class_json["title"], + "uploader": try_get(class_json, lambda x: x["_embedded"]["teacher"]["full_name"]), + "uploader_id": compat_str(try_get(class_json, lambda x: x["_embedded"]["teacher"]["username"])), "_type": "playlist", "entries": videos } From 7b48736c3fa7d4f8994f5bddeca6f8a7e4474504 Mon Sep 17 00:00:00 2001 From: Unknown Date: Sat, 2 Jun 2018 09:44:27 +0100 Subject: [PATCH 3/3] [skillshare:course] Missed one change Improved JSON download message. --- youtube_dl/extractor/skillshare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/skillshare.py b/youtube_dl/extractor/skillshare.py index 0f65667e6..defbf2113 100644 --- a/youtube_dl/extractor/skillshare.py +++ b/youtube_dl/extractor/skillshare.py @@ -68,8 +68,8 @@ class SkillshareCourseIE(SkillshareBaseIE): class_id = self._match_id(url) class_json = self._download_json(self._CLASS_URL % class_id, None, - note="Getting class details", - errnote="Downloading class JSON") + note="Downloading class JSON", + errnote="Error downloading class JSON") if class_json.get("enrollment_type", 0) > self._user_type: raise ExtractorError("This course requires a premium account and thus can't be downloaded")