From 1db3d482185261152aa40d0935a88add504bc69d Mon Sep 17 00:00:00 2001 From: gkoelln Date: Wed, 25 Jan 2017 10:49:24 -0600 Subject: [PATCH 01/11] [Bandcamp] Extract additional fields (#1) * [bandcamp] Add additional extraction fields Adding additional extraction fields * [bandcamp] remove redundant changes Removed redundant changes * [bandcamp] Remove extra spaces Removed extra spaces * [bandcamp] Minor corrections Correct order of return fields, minimize coding lines --- youtube_dl/extractor/bandcamp.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 88c590e98..8861c8b35 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -47,6 +47,15 @@ class BandcampIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) title = mobj.group('title') webpage = self._download_webpage(url, title) + album = self._search_regex( + r'(?ms).*?title\s*?:\s*?"(?P.*?)",', + webpage, 'album') + album_artist = self._search_regex( + r'(?ms)var EmbedData = .*?[{,]\s*artist:\s*?"(?P.*?)",$', + webpage, 'album artist') + release_year = self._search_regex( + r'(?ms).*?release_date"?:\s*?"\d+ \w+ (?P\d+)\s*?.*?GMT",', + webpage, 'release year') m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) if not m_download: m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) @@ -77,6 +86,12 @@ class BandcampIE(InfoExtractor): 'title': data['title'], 'formats': formats, 'duration': float_or_none(data.get('duration')), + 'track': data['title'], + 'track_number': data['track_num'], + 'track_id': track_id, + 'album': album, + 'album_artist': album_artist, + 'release_year': release_year, } else: raise ExtractorError('No free songs found') @@ -86,6 +101,10 @@ class BandcampIE(InfoExtractor): r'(?ms)var TralbumData = .*?[{,]\s*id: (?P\d+),?$', webpage, 'video id') + track_number = self._search_regex( + r'"track_num":(?P\d+),', + webpage, 'track number') + download_webpage = self._download_webpage( download_link, video_id, 'Downloading free downloads page') @@ -148,6 +167,10 @@ class BandcampIE(InfoExtractor): 'artist': artist, 'track': track, 'formats': formats, + 'track_number': track_number, + 'album': album, + 'album_artist': album_artist, + 'release_year': release_year, } @@ -233,5 +256,6 @@ class BandcampAlbumIE(InfoExtractor): 'uploader_id': uploader_id, 'id': playlist_id, 'title': title, + 'album': title, 'entries': entries, } From b6661befd5bb8bafa980c023e0993ca1403eb0cb Mon Sep 17 00:00:00 2001 From: gkoelln Date: Wed, 25 Jan 2017 11:22:08 -0600 Subject: [PATCH 02/11] [bandcamp] Make new extraction fields optional Made new extraction fields optional --- youtube_dl/extractor/bandcamp.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 8861c8b35..5f87ae3ac 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -49,13 +49,13 @@ class BandcampIE(InfoExtractor): webpage = self._download_webpage(url, title) album = self._search_regex( r'(?ms).*?title\s*?:\s*?"(?P.*?)",', - webpage, 'album') + webpage, 'album', fatal=False) album_artist = self._search_regex( r'(?ms)var EmbedData = .*?[{,]\s*artist:\s*?"(?P.*?)",$', - webpage, 'album artist') + webpage, 'album artist', fatal=False) release_year = self._search_regex( r'(?ms).*?release_date"?:\s*?"\d+ \w+ (?P\d+)\s*?.*?GMT",', - webpage, 'release year') + webpage, 'release year', fatal=False) m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage) if not m_download: m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage) @@ -103,7 +103,7 @@ class BandcampIE(InfoExtractor): track_number = self._search_regex( r'"track_num":(?P\d+),', - webpage, 'track number') + webpage, 'track number', fatal=False) download_webpage = self._download_webpage( download_link, video_id, 'Downloading free downloads page') From 150fa802eda31fbec0a4e7f7e0d4a2da10e9ad2b Mon Sep 17 00:00:00 2001 From: gkoelln Date: Thu, 26 Jan 2017 10:15:03 -0600 Subject: [PATCH 03/11] [bandcamp] Adhere to youtube-dl specifications Making optional fields optional --- youtube_dl/extractor/bandcamp.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 5f87ae3ac..8fbadc644 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -62,8 +62,7 @@ class BandcampIE(InfoExtractor): if m_trackinfo: json_code = m_trackinfo.group(1) data = json.loads(json_code)[0] - track_id = compat_str(data['id']) - + if not data.get('file'): raise ExtractorError('Not streamable', video_id=track_id, expected=True) @@ -86,9 +85,9 @@ class BandcampIE(InfoExtractor): 'title': data['title'], 'formats': formats, 'duration': float_or_none(data.get('duration')), - 'track': data['title'], - 'track_number': data['track_num'], - 'track_id': track_id, + 'track': data.get('title'), + 'track_number': data.get('track_num'), + 'track_id': data.get('id'), 'album': album, 'album_artist': album_artist, 'release_year': release_year, From 3ca8c223150a289f57594da5447c2c59b5eeb4a4 Mon Sep 17 00:00:00 2001 From: gkoelln Date: Thu, 26 Jan 2017 10:28:00 -0600 Subject: [PATCH 04/11] [bandcamp] add extracted fields Fixed what I broke in the id fields --- youtube_dl/extractor/bandcamp.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 8fbadc644..3b2e482ff 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -62,7 +62,8 @@ class BandcampIE(InfoExtractor): if m_trackinfo: json_code = m_trackinfo.group(1) data = json.loads(json_code)[0] - + track_id = compat_str(data['id']) + if not data.get('file'): raise ExtractorError('Not streamable', video_id=track_id, expected=True) @@ -87,7 +88,7 @@ class BandcampIE(InfoExtractor): 'duration': float_or_none(data.get('duration')), 'track': data.get('title'), 'track_number': data.get('track_num'), - 'track_id': data.get('id'), + 'track_id': track_id, 'album': album, 'album_artist': album_artist, 'release_year': release_year, From 8866c7594bd08c1f46c8287ccc79fe59596cba65 Mon Sep 17 00:00:00 2001 From: gkoelln Date: Thu, 26 Jan 2017 13:27:56 -0600 Subject: [PATCH 05/11] [bandbamp] Add track_id to free download Added track_id to free download --- youtube_dl/extractor/bandcamp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 3b2e482ff..8b72e60bc 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -168,6 +168,7 @@ class BandcampIE(InfoExtractor): 'track': track, 'formats': formats, 'track_number': track_number, + 'track_id': video_id, 'album': album, 'album_artist': album_artist, 'release_year': release_year, From 61103eee0a39bb929a4402e7a27d6f9be5a3d982 Mon Sep 17 00:00:00 2001 From: gkoelln Date: Thu, 26 Jan 2017 13:29:23 -0600 Subject: [PATCH 06/11] [bandcamp] Replace tabs with spaces My editor puts tabs and I forgot to replace them with spaces earlier --- youtube_dl/extractor/bandcamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 8b72e60bc..7579c4108 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -168,7 +168,7 @@ class BandcampIE(InfoExtractor): 'track': track, 'formats': formats, 'track_number': track_number, - 'track_id': video_id, + 'track_id': video_id, 'album': album, 'album_artist': album_artist, 'release_year': release_year, From 45d8613023af808e0cc8e180fb83d612cc3b4b92 Mon Sep 17 00:00:00 2001 From: gkoelln Date: Fri, 27 Jan 2017 15:27:10 -0600 Subject: [PATCH 07/11] [ShoutFactoryTV] New extractor New extractor --- youtube_dl/extractor/shoutfactorytv.py | 54 ++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 youtube_dl/extractor/shoutfactorytv.py diff --git a/youtube_dl/extractor/shoutfactorytv.py b/youtube_dl/extractor/shoutfactorytv.py new file mode 100644 index 000000000..740138c81 --- /dev/null +++ b/youtube_dl/extractor/shoutfactorytv.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + parse_m3u8_attributes, +) + + +class ShoutFactoryTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?shoutfactorytv\.com/.*?/(?P[0-9a-d]+)' + _TEST = { + 'url': 'http://www.shoutfactorytv.com/mst3k-shorts/mst3k-short-x-marks-the-spot/57473979e0a6b40d7300809a', + 'md5': 'a04c5394947cead82be3808ec6285f71', + 'info_dict': { + 'id': '57473979e0a6b40d7300809a', + 'ext': 'mp4', + 'title': 'MST3K Short: X Marks The Spot', + 'series': 'MST3K Shorts', + 'description': 'Poor Joe gets grilled in a heavenly court in this WWII era film promoting road safety in New Jersey.', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r'

(.+).+

', webpage, 'title') + series = self._html_search_regex( + r'

.+ (.+)

', webpage, 'series', default=None) + + player_embed = re.search( + r'