mirror of
https://github.com/l1ving/youtube-dl
synced 2025-01-24 06:15:39 +08:00
[egghead:lesson] detect dash and m3u8 URLs
This commit is contained in:
parent
9e71f88105
commit
e9ed3309fb
1
AUTHORS
1
AUTHORS
@ -231,3 +231,4 @@ John Dong
|
|||||||
Tatsuyuki Ishi
|
Tatsuyuki Ishi
|
||||||
Daniel Weber
|
Daniel Weber
|
||||||
Kay Bouché
|
Kay Bouché
|
||||||
|
mk-pmb
|
||||||
|
@ -540,6 +540,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(parse_duration('87 Min.'), 5220)
|
self.assertEqual(parse_duration('87 Min.'), 5220)
|
||||||
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
|
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
|
||||||
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
|
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
|
||||||
|
self.assertEqual(parse_duration('P0Y0M0DT0H2M4.567S'), 124.567)
|
||||||
|
|
||||||
def test_fix_xml_ampersands(self):
|
def test_fix_xml_ampersands(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
@ -9,7 +9,46 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class EggheadCourseIE(InfoExtractor):
|
class EggheadShared():
|
||||||
|
def extract_lesson_metadata(self, lesson):
|
||||||
|
info = {
|
||||||
|
'title': lesson.get('title'),
|
||||||
|
'description': lesson.get('summary'),
|
||||||
|
'thumbnail': lesson.get('thumb_nail'),
|
||||||
|
'timestamp': unified_timestamp(lesson.get('published_at')),
|
||||||
|
'duration': int_or_none(lesson.get('duration')),
|
||||||
|
'view_count': int_or_none(lesson.get('plays_count')),
|
||||||
|
'tags': try_get(lesson, lambda x: x['tag_list'], list),
|
||||||
|
}
|
||||||
|
|
||||||
|
def find_id_and_dlurl():
|
||||||
|
vid_id = lesson.get('wistia_id')
|
||||||
|
if vid_id:
|
||||||
|
return {'ie_key': 'Wistia', '_type': 'url_transparent',
|
||||||
|
'id': vid_id, 'url': 'wistia:' + vid_id}
|
||||||
|
|
||||||
|
self.report_warning('Cannot find an proper ID, will use lesson name URL slug')
|
||||||
|
vid_id = self._html_search_regex(
|
||||||
|
r'^https?://egghead\.io/lessons/([A-Za-z0-9][A-Za-z0-9-]*)$',
|
||||||
|
lesson.get('http_url'),
|
||||||
|
'lesson name URL part as ID of last resort',
|
||||||
|
group=1)
|
||||||
|
|
||||||
|
mu = lesson.get('media_urls')
|
||||||
|
if mu:
|
||||||
|
src = mu.get('dash_url')
|
||||||
|
if src:
|
||||||
|
return {'id': vid_id, 'formats': self._extract_mpd_formats(src, vid_id)}
|
||||||
|
src = mu.get('hls_url')
|
||||||
|
if src:
|
||||||
|
return {'id': vid_id, 'formats': self._extract_m3u8_formats(src, vid_id, entry_protocol='m3u8_native', m3u8_id='hls')}
|
||||||
|
raise NotImplementedError('Unable to detect download URL')
|
||||||
|
info.update(find_id_and_dlurl())
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class EggheadCourseIE(InfoExtractor, EggheadShared):
|
||||||
IE_DESC = 'egghead.io course'
|
IE_DESC = 'egghead.io course'
|
||||||
IE_NAME = 'egghead:course'
|
IE_NAME = 'egghead:course'
|
||||||
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
|
||||||
@ -25,22 +64,16 @@ class EggheadCourseIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
course = self._download_json(
|
course = self._download_json(
|
||||||
'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
|
'https://egghead.io/api/v1/series/' + playlist_id, playlist_id)
|
||||||
|
entries = [self.extract_lesson_metadata(lesson)
|
||||||
entries = [
|
for lesson in course['lessons']]
|
||||||
self.url_result(
|
|
||||||
'wistia:%s' % lesson['wistia_id'], ie='Wistia',
|
|
||||||
video_id=lesson['wistia_id'], video_title=lesson.get('title'))
|
|
||||||
for lesson in course['lessons'] if lesson.get('wistia_id')]
|
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, course.get('title'),
|
entries, playlist_id, course.get('title'),
|
||||||
course.get('description'))
|
course.get('description'))
|
||||||
|
|
||||||
|
|
||||||
class EggheadLessonIE(InfoExtractor):
|
class EggheadLessonIE(InfoExtractor, EggheadShared):
|
||||||
IE_DESC = 'egghead.io lesson'
|
IE_DESC = 'egghead.io lesson'
|
||||||
IE_NAME = 'egghead:lesson'
|
IE_NAME = 'egghead:lesson'
|
||||||
_VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https://egghead\.io/lessons/(?P<id>[^/?#&]+)'
|
||||||
@ -65,20 +98,6 @@ class EggheadLessonIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
lesson_id = self._match_id(url)
|
lesson_id = self._match_id(url)
|
||||||
|
|
||||||
lesson = self._download_json(
|
lesson = self._download_json(
|
||||||
'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
|
'https://egghead.io/api/v1/lessons/' + lesson_id, lesson_id)
|
||||||
|
return self.extract_lesson_metadata(lesson)
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'ie_key': 'Wistia',
|
|
||||||
'url': 'wistia:%s' % lesson['wistia_id'],
|
|
||||||
'id': lesson['wistia_id'],
|
|
||||||
'title': lesson.get('title'),
|
|
||||||
'description': lesson.get('summary'),
|
|
||||||
'thumbnail': lesson.get('thumb_nail'),
|
|
||||||
'timestamp': unified_timestamp(lesson.get('published_at')),
|
|
||||||
'duration': int_or_none(lesson.get('duration')),
|
|
||||||
'view_count': int_or_none(lesson.get('plays_count')),
|
|
||||||
'tags': try_get(lesson, lambda x: x['tag_list'], list),
|
|
||||||
}
|
|
||||||
|
@ -1830,15 +1830,30 @@ def parse_duration(s):
|
|||||||
s = s.strip()
|
s = s.strip()
|
||||||
|
|
||||||
days, hours, mins, secs, ms = [None] * 5
|
days, hours, mins, secs, ms = [None] * 5
|
||||||
m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
|
m = re.match(r'''(?x)
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
(?P<days>[0-9]+):
|
||||||
|
)?
|
||||||
|
(?P<hours>[0-9]+):
|
||||||
|
)?
|
||||||
|
(?P<mins>[0-9]+):
|
||||||
|
)?
|
||||||
|
(?P<secs>[0-9]+)
|
||||||
|
(?P<ms>\.[0-9]+)?
|
||||||
|
Z?$''', s)
|
||||||
if m:
|
if m:
|
||||||
days, hours, mins, secs, ms = m.groups()
|
days, hours, mins, secs, ms = m.groups()
|
||||||
else:
|
else:
|
||||||
m = re.match(
|
m = re.match(
|
||||||
r'''(?ix)(?:P?T)?
|
r'''(?ix)P?T?
|
||||||
|
(?:0Y)?
|
||||||
|
(?:0M)?
|
||||||
(?:
|
(?:
|
||||||
(?P<days>[0-9]+)\s*d(?:ays?)?\s*
|
(?P<days>[0-9]+)\s*d(?:ays?)?\s*
|
||||||
)?
|
)?
|
||||||
|
T?
|
||||||
(?:
|
(?:
|
||||||
(?P<hours>[0-9]+)\s*h(?:ours?)?\s*
|
(?P<hours>[0-9]+)\s*h(?:ours?)?\s*
|
||||||
)?
|
)?
|
||||||
@ -1851,7 +1866,12 @@ def parse_duration(s):
|
|||||||
if m:
|
if m:
|
||||||
days, hours, mins, secs, ms = m.groups()
|
days, hours, mins, secs, ms = m.groups()
|
||||||
else:
|
else:
|
||||||
m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
|
m = re.match(r'''(?ix)
|
||||||
|
(?:
|
||||||
|
(?P<hours>[0-9.]+)\s*(?:hours?)
|
||||||
|
|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*
|
||||||
|
)
|
||||||
|
Z?$''', s)
|
||||||
if m:
|
if m:
|
||||||
hours, mins = m.groups()
|
hours, mins = m.groups()
|
||||||
else:
|
else:
|
||||||
|
Loading…
Reference in New Issue
Block a user