diff --git a/AUTHORS b/AUTHORS index 7e012247c..3940f0faa 100644 --- a/AUTHORS +++ b/AUTHORS @@ -231,3 +231,4 @@ John Dong Tatsuyuki Ishi Daniel Weber Kay Bouché +mk-pmb diff --git a/test/test_utils.py b/test/test_utils.py index efa73d0f4..f2b132f96 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -540,6 +540,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('87 Min.'), 5220) self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) self.assertEqual(parse_duration('PT00H03M30SZ'), 210) + self.assertEqual(parse_duration('P0Y0M0DT0H2M4.567S'), 124.567) def test_fix_xml_ampersands(self): self.assertEqual( diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py index e4a3046af..0ba041e26 100644 --- a/youtube_dl/extractor/egghead.py +++ b/youtube_dl/extractor/egghead.py @@ -9,7 +9,46 @@ from ..utils import ( ) -class EggheadCourseIE(InfoExtractor): +class EggheadShared(): + def extract_lesson_metadata(self, lesson): + info = { + 'title': lesson.get('title'), + 'description': lesson.get('summary'), + 'thumbnail': lesson.get('thumb_nail'), + 'timestamp': unified_timestamp(lesson.get('published_at')), + 'duration': int_or_none(lesson.get('duration')), + 'view_count': int_or_none(lesson.get('plays_count')), + 'tags': try_get(lesson, lambda x: x['tag_list'], list), + } + + def find_id_and_dlurl(): + vid_id = lesson.get('wistia_id') + if vid_id: + return {'ie_key': 'Wistia', '_type': 'url_transparent', + 'id': vid_id, 'url': 'wistia:' + vid_id} + + self.report_warning('Cannot find an proper ID, will use lesson name URL slug') + vid_id = self._html_search_regex( + r'^https?://egghead\.io/lessons/([A-Za-z0-9][A-Za-z0-9-]*)$', + lesson.get('http_url'), + 'lesson name URL part as ID of last resort', + group=1) + + mu = lesson.get('media_urls') + if mu: + src = mu.get('dash_url') + if src: + return {'id': vid_id, 'formats': self._extract_mpd_formats(src, vid_id)} + src = mu.get('hls_url') + if src: + return {'id': vid_id, 'formats': self._extract_m3u8_formats(src, vid_id, entry_protocol='m3u8_native', m3u8_id='hls')} + raise NotImplementedError('Unable to detect download URL') + info.update(find_id_and_dlurl()) + + return info + + +class EggheadCourseIE(InfoExtractor, EggheadShared): IE_DESC = 'egghead.io course' IE_NAME = 'egghead:course' _VALID_URL = r'https://egghead\.io/courses/(?P[^/?#&]+)' @@ -25,22 +64,16 @@ class EggheadCourseIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - course = self._download_json( - 'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id) - - entries = [ - self.url_result( - 'wistia:%s' % lesson['wistia_id'], ie='Wistia', - video_id=lesson['wistia_id'], video_title=lesson.get('title')) - for lesson in course['lessons'] if lesson.get('wistia_id')] - + 'https://egghead.io/api/v1/series/' + playlist_id, playlist_id) + entries = [self.extract_lesson_metadata(lesson) + for lesson in course['lessons']] return self.playlist_result( entries, playlist_id, course.get('title'), course.get('description')) -class EggheadLessonIE(InfoExtractor): +class EggheadLessonIE(InfoExtractor, EggheadShared): IE_DESC = 'egghead.io lesson' IE_NAME = 'egghead:lesson' _VALID_URL = r'https://egghead\.io/lessons/(?P[^/?#&]+)' @@ -65,20 +98,6 @@ class EggheadLessonIE(InfoExtractor): def _real_extract(self, url): lesson_id = self._match_id(url) - lesson = self._download_json( - 'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id) - - return { - '_type': 'url_transparent', - 'ie_key': 'Wistia', - 'url': 'wistia:%s' % lesson['wistia_id'], - 'id': lesson['wistia_id'], - 'title': lesson.get('title'), - 'description': lesson.get('summary'), - 'thumbnail': lesson.get('thumb_nail'), - 'timestamp': unified_timestamp(lesson.get('published_at')), - 'duration': int_or_none(lesson.get('duration')), - 'view_count': int_or_none(lesson.get('plays_count')), - 'tags': try_get(lesson, lambda x: x['tag_list'], list), - } + 'https://egghead.io/api/v1/lessons/' + lesson_id, lesson_id) + return self.extract_lesson_metadata(lesson) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 59fb33435..47a864ab2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1830,15 +1830,30 @@ def parse_duration(s): s = s.strip() days, hours, mins, secs, ms = [None] * 5 - m = re.match(r'(?:(?:(?:(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+):)?(?P[0-9]+)(?P\.[0-9]+)?Z?$', s) + m = re.match(r'''(?x) + (?: + (?: + (?: + (?P[0-9]+): + )? + (?P[0-9]+): + )? + (?P[0-9]+): + )? + (?P[0-9]+) + (?P\.[0-9]+)? + Z?$''', s) if m: days, hours, mins, secs, ms = m.groups() else: m = re.match( - r'''(?ix)(?:P?T)? + r'''(?ix)P?T? + (?:0Y)? + (?:0M)? (?: (?P[0-9]+)\s*d(?:ays?)?\s* )? + T? (?: (?P[0-9]+)\s*h(?:ours?)?\s* )? @@ -1851,7 +1866,12 @@ def parse_duration(s): if m: days, hours, mins, secs, ms = m.groups() else: - m = re.match(r'(?i)(?:(?P[0-9.]+)\s*(?:hours?)|(?P[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s) + m = re.match(r'''(?ix) + (?: + (?P[0-9.]+)\s*(?:hours?) + |(?P[0-9.]+)\s*(?:mins?\.?|minutes?)\s* + ) + Z?$''', s) if m: hours, mins = m.groups() else: