From 36e1332783b309a58baa37c032b5aea7c21edba0 Mon Sep 17 00:00:00 2001 From: felix Date: Fri, 4 Nov 2016 08:27:26 +0100 Subject: [PATCH] [screenwavemedia] remove; convert TeamFourStarIE and NormalbootsIE to JWPlatform extractors --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/generic.py | 16 --- youtube_dl/extractor/normalboots.py | 12 +- youtube_dl/extractor/screenwavemedia.py | 146 ------------------------ youtube_dl/extractor/teamfourstar.py | 48 ++++++++ 5 files changed, 54 insertions(+), 170 deletions(-) delete mode 100644 youtube_dl/extractor/screenwavemedia.py create mode 100644 youtube_dl/extractor/teamfourstar.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 578359a5e..5723ace8e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -804,7 +804,6 @@ from .scivee import SciVeeIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .screenjunkies import ScreenJunkiesIE -from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .seeker import SeekerIE from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE @@ -897,6 +896,7 @@ from .teachertube import ( ) from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE +from .teamfourstar import TeamFourStarIE from .techtalks import TechTalksIE from .ted import TEDIE from .tele13 import Tele13IE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index bde65fa27..63e196284 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -56,7 +56,6 @@ from .dailymotion import ( ) from .onionstudios import OnionStudiosIE from .viewlift import ViewLiftEmbedIE -from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE from .pladform import PladformIE from .videomore import VideomoreIE @@ -1189,16 +1188,6 @@ class GenericIE(InfoExtractor): 'duration': 248.667, }, }, - # ScreenwaveMedia embed - { - 'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1', - 'md5': '24ace5baba0d35d55c6810b51f34e9e0', - 'info_dict': { - 'id': 'cinemasnob-55d26273809dd', - 'ext': 'mp4', - 'title': 'cinemasnob', - }, - }, # BrightcoveInPageEmbed embed { 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/', @@ -2206,11 +2195,6 @@ class GenericIE(InfoExtractor): if jwplatform_url: return self.url_result(jwplatform_url, 'JWPlatform') - # Look for ScreenwaveMedia embeds - mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage) - if mobj is not None: - return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia') - # Look for Digiteka embeds digiteka_url = DigitekaIE._extract_url(webpage) if digiteka_url: diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 6aa0895b8..61fe571df 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .screenwavemedia import ScreenwaveMediaIE +from .jwplatform import JWPlatformIE from ..utils import ( unified_strdate, @@ -25,7 +25,7 @@ class NormalbootsIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - 'add_ie': ['ScreenwaveMedia'], + 'add_ie': ['JWPlatform'], } def _real_extract(self, url): @@ -39,15 +39,13 @@ class NormalbootsIE(InfoExtractor): r'[A-Za-z]+, (?P.*)', webpage, 'date', fatal=False)) - screenwavemedia_url = self._html_search_regex( - ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL', - group='url') + jwplatform_url = JWPlatformIE._extract_url(webpage) return { '_type': 'url_transparent', 'id': video_id, - 'url': screenwavemedia_url, - 'ie_key': ScreenwaveMediaIE.ie_key(), + 'url': jwplatform_url, + 'ie_key': JWPlatformIE.ie_key(), 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py deleted file mode 100644 index 7d77e8825..000000000 --- a/youtube_dl/extractor/screenwavemedia.py +++ /dev/null @@ -1,146 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_strdate, - js_to_json, -) - - -class ScreenwaveMediaIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P[A-Za-z0-9-]+)' - EMBED_PATTERN = r'src=(["\'])(?P(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1' - _TESTS = [{ - 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - playerdata = self._download_webpage( - 'http://player.screenwavemedia.com/player.php?id=%s' % video_id, - video_id, 'Downloading player webpage') - - vidtitle = self._search_regex( - r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/') - - playerconfig = self._download_webpage( - 'http://player.screenwavemedia.com/player.js', - video_id, 'Downloading playerconfig webpage') - - videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver') - - sources = self._parse_json( - js_to_json( - re.sub( - r'(?s)/\*.*?\*/', '', - self._search_regex( - r'sources\s*:\s*(\[[^\]]+?\])', playerconfig, - 'sources', - ).replace( - "' + thisObj.options.videoserver + '", - videoserver - ).replace( - "' + playerVidId + '", - video_id - ) - ) - ), - video_id, fatal=False - ) - - # Fallback to hardcoded sources if JS changes again - if not sources: - self.report_warning('Falling back to a hardcoded list of streams') - sources = [{ - 'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id), - 'type': 'mp4', - 'label': format_label, - } for format_id, format_label in ( - ('low', '144p Low'), ('med', '160p Med'), ('high', '360p High'), ('hd1', '720p HD1'))] - sources.append({ - 'file': 'http://%s/vod/smil:%s.smil/playlist.m3u8' % (videoserver, video_id), - 'type': 'hls', - }) - - formats = [] - for source in sources: - file_ = source.get('file') - if not file_: - continue - if source.get('type') == 'hls': - formats.extend(self._extract_m3u8_formats(file_, video_id, ext='mp4')) - else: - format_id = self._search_regex( - r'_(.+?)\.[^.]+$', file_, 'format id', default=None) - if not self._is_valid_url(file_, video_id, format_id or 'video'): - continue - format_label = source.get('label') - height = int_or_none(self._search_regex( - r'^(\d+)[pP]', format_label, 'height', default=None)) - formats.append({ - 'url': file_, - 'format_id': format_id, - 'format': format_label, - 'ext': source.get('type'), - 'height': height, - }) - self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) - - return { - 'id': video_id, - 'title': vidtitle, - 'formats': formats, - } - - -class TeamFourIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P[a-z0-9\-]+)/?' - _TEST = { - 'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/', - 'info_dict': { - 'id': 'TeamFourStar-5292a02f20bfa', - 'ext': 'mp4', - 'upload_date': '20130401', - 'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar', - 'title': 'A Moment With TFS Episode 4', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - playerdata_url = self._search_regex( - r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', - webpage, 'player data URL') - - video_title = self._html_search_regex( - r'
(?P.+?)</div>', - webpage, 'title') - video_date = unified_strdate(self._html_search_regex( - r'<div class="heroheadingdate">(?P<date>.+?)</div>', - webpage, 'date', fatal=False)) - video_description = self._html_search_regex( - r'(?s)<div class="postcontent">(?P<description>.+?)</div>', - webpage, 'description', fatal=False) - video_thumbnail = self._og_search_thumbnail(webpage) - - return { - '_type': 'url_transparent', - 'display_id': display_id, - 'title': video_title, - 'description': video_description, - 'upload_date': video_date, - 'thumbnail': video_thumbnail, - 'url': playerdata_url, - } diff --git a/youtube_dl/extractor/teamfourstar.py b/youtube_dl/extractor/teamfourstar.py new file mode 100644 index 000000000..a4db2ca98 --- /dev/null +++ b/youtube_dl/extractor/teamfourstar.py @@ -0,0 +1,48 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .jwplatform import JWPlatformIE +from ..utils import unified_strdate + + +class TeamFourStarIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)/?' + _TEST = { + 'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/', + 'info_dict': { + 'id': '0WdZO31W', + 'title': 'TFS Abridged Parody Episode 1', + 'description': 'Episode 1: The Return of Raditz! … Wait…\nCast\nMasakoX – Goku, Roshi\nLanipator – Piccolo, Radditz, Krillin, Vegeta\nVegeta3986 – Radditz, Yamcha, Oolong, Gohan\nHbi2k – Farmer with Shotgun\nMegami33 – Bulma, Puar\nTakahata101 – Nappa\nKaiserNeko – SpacePod\nSongs\nMorgenstemning by Edvard Hagerup Grieg\nCha-La-Head-Cha-La by Kageyama Hiranobu\nWE DO NOT OWN DRAGONBALL. DragonBall is Owned by TOEI ANIMATION, Ltd. and Licensed by FUNimation Productions, Ltd.. All Rights Reserved. DragonBall, DragonBall Z, DragonBall GT and all logos, character names and distinctive likenesses thereof are trademarks of TOEI ANIMATION, Ltd.\nThis is nothing more than a Parody made for entertainment purposes only.', + 'ext': 'mp4', + 'timestamp': 1394168400, + 'upload_date': '20080508', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + jwplatform_url = JWPlatformIE._extract_url(webpage) + + video_title = self._html_search_regex( + r'<h1 class="entry-title">(?P<title>.+?)</h1>', + webpage, 'title') + video_date = unified_strdate(self._html_search_regex( + r'<span class="meta-date date updated">(?P<date>.+?)</span>', + webpage, 'date', fatal=False)) + video_description = self._html_search_regex( + r'(?s)<div class="content-inner">.*?(?P<description><p>.+?)</div>', + webpage, 'description', fatal=False) + video_thumbnail = self._og_search_thumbnail(webpage) + + return { + '_type': 'url_transparent', + 'display_id': display_id, + 'title': video_title, + 'description': video_description, + 'upload_date': video_date, + 'thumbnail': video_thumbnail, + 'url': jwplatform_url, + }