From 78daf100fa890b36f58e3bdc4dbfd0975f183e6f Mon Sep 17 00:00:00 2001 From: rubyist Date: Tue, 3 Mar 2020 12:27:13 -0800 Subject: [PATCH 1/3] Initial support for Pony.FM site. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/ponyfm.py | 54 ++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 youtube_dl/extractor/ponyfm.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 64d1fa251..d67a683bd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -849,6 +849,7 @@ from .polskieradio import ( PolskieRadioIE, PolskieRadioCategoryIE, ) +from .ponyfm import PonyFMIE from .popcorntimes import PopcorntimesIE from .popcorntv import PopcornTVIE from .porn91 import Porn91IE diff --git a/youtube_dl/extractor/ponyfm.py b/youtube_dl/extractor/ponyfm.py new file mode 100644 index 000000000..ed5e380e8 --- /dev/null +++ b/youtube_dl/extractor/ponyfm.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class PonyFMIE(InfoExtractor): + """ + InfoExtractor for PonyFM + + This extractor is for tracks. Playlists and Albums may be defined at a later + point using a separate class that will be in this file. + """ + _VALID_URL = r'https?://pony\.fm/tracks/(?P\d+)-.+' + _TESTS = [] + + def _real_extract(self, url): + track_id = self._match_id(url) + + # Extract required fields from webpage + webpage = self._download_webpage(url, track_id) + title = self._html_search_meta( + ['og:title', 'twitter:title'], webpage) or self._html_search_regex( + r'

([^<]+

', webpage, "title" + ) + + # Create formats array from track_id + formats = [] + base_url = "https://pony.fm/t%s" % track_id + formats.extend([ + {'url': "%s/dl.mp3" % base_url}, + {'url': "%s/dl.m4a" % base_url}, + {'url': "%s/dl.ogg" % base_url}, + {'url': "%s/dl.flac" % base_url} + ]) + + extracted = { + 'id': track_id, + 'title': title, + 'formats': formats, + } + + # Extract optional metadata (author, album art) from webpage + artwork = self._html_search_meta(['og:image'], webpage) + author = self._search_regex( + r'by: ]+>([^<]+)', webpage, "author", fatal=False + ) + + if artwork: + extracted['thumbnail'] = artwork + if author: + extracted['uploader'] = author + + return extracted From e517a8338f8051d0d8171e92d1899ba606120cc0 Mon Sep 17 00:00:00 2001 From: rubyist Date: Wed, 4 Mar 2020 23:37:29 -0800 Subject: [PATCH 2/3] Use JSON API to actually get the right available formats --- youtube_dl/extractor/ponyfm.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/ponyfm.py b/youtube_dl/extractor/ponyfm.py index ed5e380e8..b0ec6300c 100644 --- a/youtube_dl/extractor/ponyfm.py +++ b/youtube_dl/extractor/ponyfm.py @@ -17,22 +17,17 @@ class PonyFMIE(InfoExtractor): def _real_extract(self, url): track_id = self._match_id(url) - # Extract required fields from webpage - webpage = self._download_webpage(url, track_id) - title = self._html_search_meta( - ['og:title', 'twitter:title'], webpage) or self._html_search_regex( - r'

([^<]+

', webpage, "title" - ) + # Extract required fields from JSON API + apiurl = "https://pony.fm/api/web/tracks/%s" % track_id + json = self._download_json(apiurl, track_id)['track'] - # Create formats array from track_id + title = json['title'] formats = [] - base_url = "https://pony.fm/t%s" % track_id - formats.extend([ - {'url': "%s/dl.mp3" % base_url}, - {'url': "%s/dl.m4a" % base_url}, - {'url': "%s/dl.ogg" % base_url}, - {'url': "%s/dl.flac" % base_url} - ]) + for f in json['formats']: + formats.append({ + 'format': f['name'], + 'url': f['url'], + }) extracted = { 'id': track_id, @@ -40,11 +35,9 @@ class PonyFMIE(InfoExtractor): 'formats': formats, } - # Extract optional metadata (author, album art) from webpage - artwork = self._html_search_meta(['og:image'], webpage) - author = self._search_regex( - r'by: ]+>([^<]+)', webpage, "author", fatal=False - ) + # Extract optional metadata (author, album art) from JSON API + author = json.get('user', {}).get('name') + artwork = json.get('covers', {}).get('original') if artwork: extracted['thumbnail'] = artwork From 51db764448466807368a986ba5024cc80a845a0a Mon Sep 17 00:00:00 2001 From: rubyist Date: Thu, 5 Mar 2020 17:01:52 -0800 Subject: [PATCH 3/3] Added some tests --- youtube_dl/extractor/ponyfm.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ponyfm.py b/youtube_dl/extractor/ponyfm.py index b0ec6300c..11d8b9a2f 100644 --- a/youtube_dl/extractor/ponyfm.py +++ b/youtube_dl/extractor/ponyfm.py @@ -12,7 +12,23 @@ class PonyFMIE(InfoExtractor): point using a separate class that will be in this file. """ _VALID_URL = r'https?://pony\.fm/tracks/(?P\d+)-.+' - _TESTS = [] + _TESTS = [{ + 'url': 'https://pony.fm/tracks/43462-summer-wind-fallout-equestria-skybolt', + 'info_dict': { + 'id': '43462', + 'ext': 'flac', + 'title': 'Summer Wind (Fallout: Equestria) - SkyBolt', + 'uploader': 'SkyBoltsMusic', + } + }, { + 'url': 'https://pony.fm/tracks/43852-kirin-ts', + 'info_dict': { + 'id': '43852', + 'ext': 'mp3', + 'title': 'KIRIN TS', + 'uploader': '7TAIL3DFOXX' + } + }] def _real_extract(self, url): track_id = self._match_id(url)