diff --git a/youtube_dl/extractor/ellentube.py b/youtube_dl/extractor/ellentube.py
index c29cbe166..d4f07f34f 100644
--- a/youtube_dl/extractor/ellentube.py
+++ b/youtube_dl/extractor/ellentube.py
@@ -1,10 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
+ clean_html,
int_or_none,
urljoin,
)
@@ -13,24 +12,21 @@ from ..utils import (
class EllenTubeBaseIE(InfoExtractor):
API_URL = 'https://api-prod.ellentube.com/'
- def _extract_from_video_id(self, video_id, display_id=None):
- video_data = self._download_json(
- urljoin(self.API_URL, 'ellenapi/api/item/%s' % video_id), video_id)
- title = video_data['title']
- description = video_data.get('description')
- publish_time = int_or_none(video_data.get('publishTime'))
- thumbnail = video_data.get('thumbnail')
+ def _extract_video_from_json(self, data, video_id, display_id=None):
+ title = data['title']
+ description = data.get('description')
+ publish_time = int_or_none(data.get('publishTime'))
+ thumbnail = data.get('thumbnail')
formats = []
duration = None
- for entry in video_data.get('media'):
+ for entry in data.get('media'):
if entry.get('id') == 'm3u8':
- formats = self._extract_m3u8_formats(entry.get(
- 'url'), video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+ formats = self._extract_m3u8_formats(
+ entry.get('url'), video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
duration = int_or_none(entry.get('duration'))
break
self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
@@ -42,10 +38,31 @@ class EllenTubeBaseIE(InfoExtractor):
'formats': formats,
}
- def _extract_video_ids_from_api_search(self, api_search, display_id):
- feed_data = self._download_json(
+ def _extract_playlist_entries_from_json(self, data, display_id):
+ return [self._extract_video_from_json(elem, elem['id'])
+ for elem in data if elem.get('type') == 'VIDEO']
+
+ def _extract_from_video_id(self, video_id, display_id=None):
+ api_data = self._download_json(
+ urljoin(self.API_URL, 'ellenapi/api/item/%s' % video_id), video_id)
+ return self._extract_video_from_json(api_data, video_id, display_id)
+
+ def _extract_playlist(self, url, display_id, extract_description=True):
+ webpage = self._download_webpage(url, display_id)
+ playlist_data = self._html_search_regex(
+ r'
', webpage, 'playlist data')
+ playlist_title = self._search_regex(
+ r'"title"\s*:\s*"(.+?)"', playlist_data, 'playlist title')
+ playlist_description = clean_html(self._search_regex(
+ r'"description"\s*:\s*"(.+?)"', playlist_data, 'playlist description',
+ fatal=False)) if extract_description else None
+ api_search = self._search_regex(
+ r'"filter"\s*:\s*"(.+?)"', playlist_data, 'playlist api request')
+ api_data = self._download_json(
urljoin(self.API_URL, 'ellenapi/api/feed/?%s' % api_search), display_id)
- return [entry.get('id') for entry in feed_data if entry.get('type') == 'VIDEO']
+ return self.playlist_result(
+ self._extract_playlist_entries_from_json(api_data, display_id),
+ display_id, playlist_title, playlist_description)
class EllenTubeVideoIE(EllenTubeBaseIE):
@@ -74,41 +91,36 @@ class EllenTubeVideoIE(EllenTubeBaseIE):
return self._extract_from_video_id(video_id, display_id)
-class EllenTubePlaylistIE(EllenTubeBaseIE):
- _VALID_URL = r'https?://(?:www\.)?ellentube\.com/(?:episode|studios)/(?P
.+)\.html'
+class EllenTubeEpisodeIE(EllenTubeBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?ellentube\.com/episode/(?P.+)\.html'
- _TESTS = [{
+ _TEST = {
'url': 'https://www.ellentube.com/episode/dax-shepard-jordan-fisher-haim.html',
'info_dict': {
'id': 'dax-shepard-jordan-fisher-haim',
'title': 'Dax Shepard, \'DWTS\' Team Jordan Fisher & Lindsay Arnold, HAIM',
+ 'description': 'md5:aed85d42892f6126e71ec5ed2aea2a0d'
},
'playlist_count': 6,
- }, {
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ return self._extract_playlist(url, display_id)
+
+
+class EllenTubeStudioIE(EllenTubeBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?ellentube\.com/studios/(?P.+)\.html'
+
+ _TEST = {
'url': 'https://www.ellentube.com/studios/macey-goes-rving0.html',
'info_dict': {
'id': 'macey-goes-rving0',
'title': 'Macey Goes RVing',
},
'playlist_mincount': 3,
- }]
+ }
def _real_extract(self, url):
display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
-
- playlist_data = self._html_search_regex(
- r'', webpage, 'episode data')
- playlist_title = self._search_regex(
- r'title"\s*:\s*"(.+?)"', playlist_data, 'playlist title')
- entries = [self._extract_from_video_id(m.group('vid')) for m in re.finditer(
- r'pid=(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', playlist_data)]
- if not entries:
- api_search = self._search_regex(
- r'filter"\s*:\s*"(.+?)"', playlist_data, 'api search')
- video_ids = self._extract_video_ids_from_api_search(
- api_search, display_id)
- entries = [self._extract_from_video_id(
- vid, display_id) for vid in video_ids]
-
- return self.playlist_result(entries, display_id, playlist_title)
+ return self._extract_playlist(url, display_id, False)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index cb92475c7..ac75607fe 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -309,7 +309,8 @@ from .eighttracks import EightTracksIE
from .einthusan import EinthusanIE
from .eitb import EitbIE
from .ellentube import (
- EllenTubePlaylistIE,
+ EllenTubeEpisodeIE,
+ EllenTubeStudioIE,
EllenTubeVideoIE,
)
from .elpais import ElPaisIE