From 7642b142b406bbc630bfaa38a6e34e10496360f5 Mon Sep 17 00:00:00 2001 From: ZerataX Date: Wed, 19 Dec 2018 00:04:03 +0100 Subject: [PATCH 1/4] [soundgasm] add internet archive support --- youtube_dl/extractor/soundgasm.py | 57 +++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py index 3d78a9d76..e83e2c0f0 100644 --- a/youtube_dl/extractor/soundgasm.py +++ b/youtube_dl/extractor/soundgasm.py @@ -8,18 +8,33 @@ from .common import InfoExtractor class SoundgasmIE(InfoExtractor): IE_NAME = 'soundgasm' - _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P[0-9a-zA-Z_-]+)/(?P[0-9a-zA-Z_-]+)' - _TEST = { - 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', - 'md5': '010082a2c802c5275bb00030743e75ad', - 'info_dict': { - 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', - 'ext': 'm4a', - 'title': 'Piano sample', - 'description': 'Royalty Free Sample Music', - 'uploader': 'ytdl', + _VALID_URL = r'(?Phttps?://web\.archive\.org/web/\d+/)?' + \ + r'https?://(?:www\.)?soundgasm\.net(?::80)?/u/' + \ + r'(?P[0-9a-zA-Z_-]+)/(?P[0-9a-zA-Z_-]+)' + _TESTS = [{ + 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', + 'md5': '010082a2c802c5275bb00030743e75ad', + 'info_dict': { + 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', + 'ext': 'm4a', + 'title': 'Piano sample', + 'description': 'Royalty Free Sample Music', + 'uploader': 'ytdl', + } + }, + { + 'url': 'http://web.archive.org/web/20181218221507/' + + 'https://soundgasm.net/u/ytdl/Piano-sample', + 'md5': '010082a2c802c5275bb00030743e75ad', + 'info_dict': { + 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', + 'ext': 'm4a', + 'title': 'Piano sample', + 'description': 'Royalty Free Sample Music', + 'uploader': 'ytdl', + } } - } + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -31,6 +46,9 @@ class SoundgasmIE(InfoExtractor): r'(?s)m4a\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'audio URL', group='url') + if mobj.group('archive'): + audio_url = audio_url[:41] + "if_" + audio_url[41:] + title = self._search_regex( r']+\bclass=["\']jp-title[^>]+>([^<]+)', webpage, 'title', default=display_id) @@ -56,14 +74,25 @@ class SoundgasmIE(InfoExtractor): class SoundgasmProfileIE(InfoExtractor): IE_NAME = 'soundgasm:profile' - _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P[^/]+)/?(?:\#.*)?$' - _TEST = { + _VALID_URL = r'(?Phttps?://web\.archive\.org/web/\d+/)?' + \ + r'https?://(?:www\.)?soundgasm\.net/u/' + \ + r'(?P[^/]+)/?(?:\#.*)?$' + _TESTS = [{ 'url': 'http://soundgasm.net/u/ytdl', 'info_dict': { 'id': 'ytdl', }, 'playlist_count': 1, - } + }, + { + 'url': 'http://web.archive.org/web/20181218222843/' + + 'https://soundgasm.net/u/ytdl', + 'info_dict': { + 'id': 'ytdl', + }, + 'playlist_count': 1, + } + ] def _real_extract(self, url): profile_id = self._match_id(url) From ee8e4aac78e66a7241e71c322173425272685f15 Mon Sep 17 00:00:00 2001 From: Jona Abdinghoff Date: Wed, 19 Dec 2018 02:39:12 +0100 Subject: [PATCH 2/4] [soundgasm] adjust _VALID_URL --- youtube_dl/extractor/soundgasm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py index e83e2c0f0..5f811444a 100644 --- a/youtube_dl/extractor/soundgasm.py +++ b/youtube_dl/extractor/soundgasm.py @@ -8,7 +8,7 @@ from .common import InfoExtractor class SoundgasmIE(InfoExtractor): IE_NAME = 'soundgasm' - _VALID_URL = r'(?Phttps?://web\.archive\.org/web/\d+/)?' + \ + _VALID_URL = r'(?Phttps?://web\.archive\.org/web/\d+(?:if_)?/)?' + \ r'https?://(?:www\.)?soundgasm\.net(?::80)?/u/' + \ r'(?P[0-9a-zA-Z_-]+)/(?P[0-9a-zA-Z_-]+)' _TESTS = [{ @@ -74,7 +74,7 @@ class SoundgasmIE(InfoExtractor): class SoundgasmProfileIE(InfoExtractor): IE_NAME = 'soundgasm:profile' - _VALID_URL = r'(?Phttps?://web\.archive\.org/web/\d+/)?' + \ + _VALID_URL = r'(?Phttps?://web\.archive\.org/web/\d+(?:if_)?/)?' + \ r'https?://(?:www\.)?soundgasm\.net/u/' + \ r'(?P[^/]+)/?(?:\#.*)?$' _TESTS = [{ From e67981428189aef0f92c871284e6986e81b85fc8 Mon Sep 17 00:00:00 2001 From: ZerataX Date: Thu, 2 Apr 2020 09:03:25 +0200 Subject: [PATCH 3/4] [soundgasm] reformat code --- youtube_dl/extractor/soundgasm.py | 58 ++++++++++++++----------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py index 5f811444a..f9db15ec9 100644 --- a/youtube_dl/extractor/soundgasm.py +++ b/youtube_dl/extractor/soundgasm.py @@ -12,29 +12,26 @@ class SoundgasmIE(InfoExtractor): r'https?://(?:www\.)?soundgasm\.net(?::80)?/u/' + \ r'(?P[0-9a-zA-Z_-]+)/(?P[0-9a-zA-Z_-]+)' _TESTS = [{ - 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', - 'md5': '010082a2c802c5275bb00030743e75ad', - 'info_dict': { - 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', - 'ext': 'm4a', - 'title': 'Piano sample', - 'description': 'Royalty Free Sample Music', - 'uploader': 'ytdl', - } - }, - { - 'url': 'http://web.archive.org/web/20181218221507/' + - 'https://soundgasm.net/u/ytdl/Piano-sample', - 'md5': '010082a2c802c5275bb00030743e75ad', - 'info_dict': { - 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', - 'ext': 'm4a', - 'title': 'Piano sample', - 'description': 'Royalty Free Sample Music', - 'uploader': 'ytdl', - } + 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', + 'md5': '010082a2c802c5275bb00030743e75ad', + 'info_dict': { + 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', + 'ext': 'm4a', + 'title': 'Piano sample', + 'description': 'Royalty Free Sample Music', + 'uploader': 'ytdl' } - ] + }, { + 'url': 'http://web.archive.org/web/20181218221507/https://soundgasm.net/u/ytdl/Piano-sample', + 'md5': '010082a2c802c5275bb00030743e75ad', + 'info_dict': { + 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', + 'ext': 'm4a', + 'title': 'Piano sample', + 'description': 'Royalty Free Sample Music', + 'uploader': 'ytdl' + } + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -47,7 +44,8 @@ class SoundgasmIE(InfoExtractor): 'audio URL', group='url') if mobj.group('archive'): - audio_url = audio_url[:41] + "if_" + audio_url[41:] + pos = mobj.span('archive')[1] - 1 + audio_url = audio_url[:pos] + "if_" + audio_url[pos:] title = self._search_regex( r']+\bclass=["\']jp-title[^>]+>([^<]+)', @@ -82,17 +80,15 @@ class SoundgasmProfileIE(InfoExtractor): 'info_dict': { 'id': 'ytdl', }, - 'playlist_count': 1, - }, + 'playlist_count': 1 + }, { - 'url': 'http://web.archive.org/web/20181218222843/' + - 'https://soundgasm.net/u/ytdl', + 'url': 'http://web.archive.org/web/20181218222843/https://soundgasm.net/u/ytdl', 'info_dict': { - 'id': 'ytdl', + 'id': 'ytdl' }, - 'playlist_count': 1, - } - ] + 'playlist_count': 1 + }] def _real_extract(self, url): profile_id = self._match_id(url) From c66df63b59c710f960cbd68705bf9b4b8e73046a Mon Sep 17 00:00:00 2001 From: ZerataX Date: Thu, 2 Apr 2020 09:06:41 +0200 Subject: [PATCH 4/4] [soundgasm] properly format tests --- youtube_dl/extractor/soundgasm.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py index f9db15ec9..f40b29204 100644 --- a/youtube_dl/extractor/soundgasm.py +++ b/youtube_dl/extractor/soundgasm.py @@ -81,13 +81,12 @@ class SoundgasmProfileIE(InfoExtractor): 'id': 'ytdl', }, 'playlist_count': 1 - }, - { - 'url': 'http://web.archive.org/web/20181218222843/https://soundgasm.net/u/ytdl', - 'info_dict': { - 'id': 'ytdl' - }, - 'playlist_count': 1 + }, { + 'url': 'http://web.archive.org/web/20181218222843/https://soundgasm.net/u/ytdl', + 'info_dict': { + 'id': 'ytdl' + }, + 'playlist_count': 1 }] def _real_extract(self, url):