1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-24 06:15:39 +08:00

[egghead:lesson] detect dash and m3u8 URLs

This commit is contained in:
mk-pmb 2017-10-03 02:09:12 +02:00
parent 9e71f88105
commit e9ed3309fb
4 changed files with 71 additions and 30 deletions

View File

@ -231,3 +231,4 @@ John Dong
Tatsuyuki Ishi Tatsuyuki Ishi
Daniel Weber Daniel Weber
Kay Bouché Kay Bouché
mk-pmb

View File

@ -540,6 +540,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('87 Min.'), 5220) self.assertEqual(parse_duration('87 Min.'), 5220)
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
self.assertEqual(parse_duration('PT00H03M30SZ'), 210) self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
self.assertEqual(parse_duration('P0Y0M0DT0H2M4.567S'), 124.567)
def test_fix_xml_ampersands(self): def test_fix_xml_ampersands(self):
self.assertEqual( self.assertEqual(

View File

@ -9,7 +9,46 @@ from ..utils import (
) )
class EggheadCourseIE(InfoExtractor): class EggheadShared():
def extract_lesson_metadata(self, lesson):
info = {
'title': lesson.get('title'),
'description': lesson.get('summary'),
'thumbnail': lesson.get('thumb_nail'),
'timestamp': unified_timestamp(lesson.get('published_at')),
'duration': int_or_none(lesson.get('duration')),
'view_count': int_or_none(lesson.get('plays_count')),
'tags': try_get(lesson, lambda x: x['tag_list'], list),
}
def find_id_and_dlurl():
vid_id = lesson.get('wistia_id')
if vid_id:
return {'ie_key': 'Wistia', '_type': 'url_transparent',
'id': vid_id, 'url': 'wistia:' + vid_id}
self.report_warning('Cannot find an proper ID, will use lesson name URL slug')
vid_id = self._html_search_regex(
r'^https?://egghead\.io/lessons/([A-Za-z0-9][A-Za-z0-9-]*)$',
lesson.get('http_url'),
'lesson name URL part as ID of last resort',
group=1)
mu = lesson.get('media_urls')
if mu:
src = mu.get('dash_url')
if src:
return {'id': vid_id, 'formats': self._extract_mpd_formats(src, vid_id)}
src = mu.get('hls_url')
if src:
return {'id': vid_id, 'formats': self._extract_m3u8_formats(src, vid_id, entry_protocol='m3u8_native', m3u8_id='hls')}
raise NotImplementedError('Unable to detect download URL')
info.update(find_id_and_dlurl())
return info
class EggheadCourseIE(InfoExtractor, EggheadShared):
IE_DESC = 'egghead.io course' IE_DESC = 'egghead.io course'
IE_NAME = 'egghead:course' IE_NAME = 'egghead:course'
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)' _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
@ -25,22 +64,16 @@ class EggheadCourseIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
course = self._download_json( course = self._download_json(
'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id) 'https://egghead.io/api/v1/series/' + playlist_id, playlist_id)
entries = [self.extract_lesson_metadata(lesson)
entries = [ for lesson in course['lessons']]
self.url_result(
'wistia:%s' % lesson['wistia_id'], ie='Wistia',
video_id=lesson['wistia_id'], video_title=lesson.get('title'))
for lesson in course['lessons'] if lesson.get('wistia_id')]
return self.playlist_result( return self.playlist_result(
entries, playlist_id, course.get('title'), entries, playlist_id, course.get('title'),
course.get('description')) course.get('description'))
class EggheadLessonIE(InfoExtractor): class EggheadLessonIE(InfoExtractor, EggheadShared):
IE_DESC = 'egghead.io lesson' IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson' IE_NAME = 'egghead:lesson'
_VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)' _VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
@ -65,20 +98,6 @@ class EggheadLessonIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
lesson_id = self._match_id(url) lesson_id = self._match_id(url)
lesson = self._download_json( lesson = self._download_json(
'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id) 'https://egghead.io/api/v1/lessons/' + lesson_id, lesson_id)
return self.extract_lesson_metadata(lesson)
return {
'_type': 'url_transparent',
'ie_key': 'Wistia',
'url': 'wistia:%s' % lesson['wistia_id'],
'id': lesson['wistia_id'],
'title': lesson.get('title'),
'description': lesson.get('summary'),
'thumbnail': lesson.get('thumb_nail'),
'timestamp': unified_timestamp(lesson.get('published_at')),
'duration': int_or_none(lesson.get('duration')),
'view_count': int_or_none(lesson.get('plays_count')),
'tags': try_get(lesson, lambda x: x['tag_list'], list),
}

View File

@ -1830,15 +1830,30 @@ def parse_duration(s):
s = s.strip() s = s.strip()
days, hours, mins, secs, ms = [None] * 5 days, hours, mins, secs, ms = [None] * 5
m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s) m = re.match(r'''(?x)
(?:
(?:
(?:
(?P<days>[0-9]+):
)?
(?P<hours>[0-9]+):
)?
(?P<mins>[0-9]+):
)?
(?P<secs>[0-9]+)
(?P<ms>\.[0-9]+)?
Z?$''', s)
if m: if m:
days, hours, mins, secs, ms = m.groups() days, hours, mins, secs, ms = m.groups()
else: else:
m = re.match( m = re.match(
r'''(?ix)(?:P?T)? r'''(?ix)P?T?
(?:0Y)?
(?:0M)?
(?: (?:
(?P<days>[0-9]+)\s*d(?:ays?)?\s* (?P<days>[0-9]+)\s*d(?:ays?)?\s*
)? )?
T?
(?: (?:
(?P<hours>[0-9]+)\s*h(?:ours?)?\s* (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
)? )?
@ -1851,7 +1866,12 @@ def parse_duration(s):
if m: if m:
days, hours, mins, secs, ms = m.groups() days, hours, mins, secs, ms = m.groups()
else: else:
m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s) m = re.match(r'''(?ix)
(?:
(?P<hours>[0-9.]+)\s*(?:hours?)
|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*
)
Z?$''', s)
if m: if m:
hours, mins = m.groups() hours, mins = m.groups()
else: else: