From 9d96a65eafd77645ce73506fdd277a28a406c27b Mon Sep 17 00:00:00 2001 From: HugoTai100 Date: Mon, 2 Apr 2018 01:53:30 +0800 Subject: [PATCH 1/2] [Joox] Add new extractor --- youtube_dl/extractor/extractors.py | 3 +- youtube_dl/extractor/joox.py | 94 ++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/joox.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de48a37ad..7633c73ed 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -491,6 +491,7 @@ from .jamendo import ( JamendoAlbumIE, ) from .jeuxvideo import JeuxVideoIE +from .joox import JooxIE from .jove import JoveIE from .joj import JojIE from .jwplatform import JWPlatformIE @@ -1327,7 +1328,7 @@ from .webofstories import ( WebOfStoriesPlaylistIE, ) from .weibo import ( - WeiboIE, + WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE diff --git a/youtube_dl/extractor/joox.py b/youtube_dl/extractor/joox.py new file mode 100644 index 000000000..153ea2d06 --- /dev/null +++ b/youtube_dl/extractor/joox.py @@ -0,0 +1,94 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import time + +from .common import InfoExtractor +import base64 + + +from ..compat import ( + compat_urllib_parse_urlencode, +) + + +class JooxIE(InfoExtractor): + IE_NAME = 'jooxmusic:single' + IE_DESC = 'Joox' + _VALID_URL = r'https?://www\.joox\.com/(?P[a-z]*)/(?P[a-z]*_?[a-z]*)/single/(?P[a-zA-z0-9+]*==)' + _TESTS = [{ + 'url': 'http://www.joox.com/hk/zh_hk/single/WYL92NDGHMhCs3GdDZBsMQ==', + 'md5': '81a3d00b7422edb16a59ee9fe3dcb5dd', + 'info_dict': { + 'id': 'WYL92NDGHMhCs3GdDZBsMQ==', + 'ext': 'mp3', + 'title': '忍', + 'release_date': '20180214', + 'creator': '林欣彤', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + + def _real_extract(self, url): + _VALID_URL = r'https?://www\.joox\.com/(?P[a-z]*)/(?P[a-z]*_?[a-z]*)/single/(?P[a-zA-z0-9+]*==)' + song_id = self._match_id(url) + p = re.compile(_VALID_URL) + m = p.search(url) + _country = m.group('country') + _lang = m.group('language') + _code = int(time.time() * 1000) + + query = { + 'songid': song_id, + 'lang': _lang, + 'country': _country, + 'form_type': -1, + 'channel_id': -1, + '_': _code + } + + detail_info_page = self._download_webpage( + "http://api.joox.com/web-fcgi-bin/web_get_songinfo?" + compat_urllib_parse_urlencode(query), song_id) + detail_info_page = detail_info_page[18:-1] + song_json = self._parse_json(detail_info_page, song_id) + song320mp3 = song_json.get('r320Url') + song192mp3 = song_json.get('r192Url') + songmp3 = song_json.get('mp3Url') + songm4a = song_json.get('m4aUrl') + song_title = song_json.get('msong') + _duration = song_json.get('minterval') + album_thumbnail = song_json.get('album_url') + size128 = song_json.get('size128') + size320 = song_json.get('size320') + singer = song_json.get('singer_list') + singer = singer[0].get('name') + singer = base64.b64decode(singer).decode('UTF-8') + publish_time = song_json.get('public_time') + publish_time = publish_time.replace('-', '') + formats = [] + _FORMATS = { + '128m4a': {'url': song192mp3, 'abr': 128}, + 'm4a': {'url': songm4a}, + 'mp3': {'url': songmp3, 'abr': 128, 'filesize': size128}, + '320mp3': {'url': song320mp3, 'preference': -1, 'abr': 320, 'filesize': size320}, + } + for format_id, details in _FORMATS.items(): + formats.append({ + 'url': details['url'], + 'format': format_id, + 'format_id': format_id, + 'preference': details.get('preference'), + 'abr': details.get('abr'), + 'filesize': details.get('filesize'), + 'resolution': 'audio only' + }) + return { + 'id': song_id, + 'title': song_title, + 'formats': formats, + 'thumbnail': album_thumbnail, + 'release_date': publish_time, + 'duration': _duration, + 'creator': singer, + } From 1ad86b22bfc87f7a90b33cb00d743bf4e4a6b0bb Mon Sep 17 00:00:00 2001 From: HugoTai100 Date: Mon, 2 Apr 2018 13:38:31 +0800 Subject: [PATCH 2/2] [Joox] Add new extractor (fixes #16062) --- youtube_dl/extractor/joox.py | 42 ++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/joox.py b/youtube_dl/extractor/joox.py index 153ea2d06..2ceba4d0c 100644 --- a/youtube_dl/extractor/joox.py +++ b/youtube_dl/extractor/joox.py @@ -12,6 +12,8 @@ from ..compat import ( compat_urllib_parse_urlencode, ) +from ..utils import ExtractorError + class JooxIE(InfoExtractor): IE_NAME = 'jooxmusic:single' @@ -31,15 +33,14 @@ class JooxIE(InfoExtractor): }] def _real_extract(self, url): - _VALID_URL = r'https?://www\.joox\.com/(?P[a-z]*)/(?P[a-z]*_?[a-z]*)/single/(?P[a-zA-z0-9+]*==)' song_id = self._match_id(url) - p = re.compile(_VALID_URL) + p = re.compile(self._VALID_URL) m = p.search(url) _country = m.group('country') _lang = m.group('language') _code = int(time.time() * 1000) - query = { + parameter = { 'songid': song_id, 'lang': _lang, 'country': _country, @@ -49,15 +50,18 @@ class JooxIE(InfoExtractor): } detail_info_page = self._download_webpage( - "http://api.joox.com/web-fcgi-bin/web_get_songinfo?" + compat_urllib_parse_urlencode(query), song_id) + "http://api.joox.com/web-fcgi-bin/web_get_songinfo?" + compat_urllib_parse_urlencode(parameter), song_id) detail_info_page = detail_info_page[18:-1] song_json = self._parse_json(detail_info_page, song_id) + if song_json.get('code') != 0: + raise ExtractorError('%s said: %s' % (self.IE_NAME, "invalid songid"), expected=True) + song320mp3 = song_json.get('r320Url') song192mp3 = song_json.get('r192Url') songmp3 = song_json.get('mp3Url') songm4a = song_json.get('m4aUrl') song_title = song_json.get('msong') - _duration = song_json.get('minterval') + duration = song_json.get('minterval') album_thumbnail = song_json.get('album_url') size128 = song_json.get('size128') size320 = song_json.get('size320') @@ -67,28 +71,20 @@ class JooxIE(InfoExtractor): publish_time = song_json.get('public_time') publish_time = publish_time.replace('-', '') formats = [] - _FORMATS = { - '128m4a': {'url': song192mp3, 'abr': 128}, - 'm4a': {'url': songm4a}, - 'mp3': {'url': songmp3, 'abr': 128, 'filesize': size128}, - '320mp3': {'url': song320mp3, 'preference': -1, 'abr': 320, 'filesize': size320}, - } - for format_id, details in _FORMATS.items(): - formats.append({ - 'url': details['url'], - 'format': format_id, - 'format_id': format_id, - 'preference': details.get('preference'), - 'abr': details.get('abr'), - 'filesize': details.get('filesize'), - 'resolution': 'audio only' - }) + formats.extend([ + {'url': song192mp3, 'format_id': '128m4a', 'abr': 128, }, + {'url': songm4a, 'format_id': 'm4a', }, + {'url': songmp3, 'format_id': 'mp3', 'abr': 128, 'filesize': int(size128)}, + {'url': song320mp3, 'format_id': '320mp3', 'abr': 320, 'preference': -1, 'filesize': int(size320), } + ]) + formats = [x for x in formats if x['url'] != ''] + return { 'id': song_id, 'title': song_title, 'formats': formats, 'thumbnail': album_thumbnail, 'release_date': publish_time, - 'duration': _duration, - 'creator': singer, + 'duration': int(duration), + 'artist': singer, }