From 59e2803cb09807f65aa068fd421ff710c20261d4 Mon Sep 17 00:00:00 2001 From: Gouranga Das <38029649+GourangaDas@users.noreply.github.com> Date: Sat, 15 Dec 2018 12:01:30 +0530 Subject: [PATCH 1/4] Support for ganna.com added --- youtube_dl/extractor/extractors.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6a5d12ab1..e6c28c349 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -418,6 +418,11 @@ from .gameone import ( ) from .gamespot import GameSpotIE from .gamestar import GameStarIE +from .gaana import ( + GaanaIE, + GaanaAlbumIE, + GaanaArtistIE, +) from .gaskrank import GaskrankIE from .gazeta import GazetaIE from .gdcvault import GDCVaultIE From b22aadb3a86481521991b2ee3e343e81295e8f2b Mon Sep 17 00:00:00 2001 From: Gouranga Das <38029649+GourangaDas@users.noreply.github.com> Date: Sat, 15 Dec 2018 12:02:35 +0530 Subject: [PATCH 2/4] Infoextractor for gaana.com --- youtube_dl/extractor/gaana.py | 174 ++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 youtube_dl/extractor/gaana.py diff --git a/youtube_dl/extractor/gaana.py b/youtube_dl/extractor/gaana.py new file mode 100644 index 000000000..c5e15b97c --- /dev/null +++ b/youtube_dl/extractor/gaana.py @@ -0,0 +1,174 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from ..aes import aes_cbc_decrypt +from ..compat import ( + compat_b64decode, +) + +from .common import InfoExtractor +from ..utils import ( + bytes_to_intlist, + intlist_to_bytes, + int_or_none, +) + + +class GaanaBaseIE(InfoExtractor): + _BASE_URL = 'https://gaana.com' + + def _Decrypt(self, data): + + key = 'Z0AxbiEoZjEjci4wJCkmJQ==' + iv = 'YXNkIUAjIUAjQCExMjMxMg==' + + stream_url = intlist_to_bytes(aes_cbc_decrypt( + bytes_to_intlist(compat_b64decode(data)), + bytes_to_intlist(compat_b64decode(key)), + bytes_to_intlist(compat_b64decode(iv)))).decode() + + s = stream_url[:-ord(stream_url[len(stream_url) - 1:])] + return s + + def _create_entry(self, data, video_id): + + raw_data = self._parse_json(data, video_id) + + video_data = raw_data.get('path') + title = raw_data.get('title') + if not title: + print("No title found.") + thumbnail = raw_data.get('atw', '') or raw_data.get('albumartwork', '') + duration = raw_data.get('duration') + + formats = [] + if isinstance(video_data, dict): + for value in video_data.keys(): + # need to skip auto + # this format and quaity is too dificult to handle for audio player. + # currently, it has been skipped + # in future this format also be going to available + if not value == 'auto': + content = video_data.get(value) + for k in content: + format_url = self._Decrypt(k.get('message')) + format_id = value + + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'ext': 'mp4' + }) + + artist = raw_data.get('artist') + + # Remove unwanted # character from string + if not artist: + artist = artist.replace(artist, "###", ', ') + + return { + 'id': video_id, + 'title': title, + 'description': raw_data.get('description'), + 'duration': int_or_none(duration), + 'formats': formats, + 'album': raw_data.get('albumtitle'), + 'thumbnail': thumbnail, + 'artist': artist, + 'release_date': raw_data.get('release_date'), + 'language': raw_data.get('language') + } + else: + # we are here, beacause gaana.com uses cloudfont.com also + # alongwith some other sites for storage purpose. + # that will be implemented soon. + return None + + +class GaanaIE(GaanaBaseIE): + IE_NAME = 'gaana' + _VALID_URL = r'https?://(?:www\.)?gaana\.com/song/(?P[^/#?]+)' + _TESTS = [{ + 'url': 'https://gaana.com/song/jeeye-to-jeeye-kaise', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + raw_data = self._search_regex( + r'class="parentnode sourcelist_\d+"> (.*?) ', + webpage, 'raw data') + entry = self._create_entry(raw_data, video_id) + if entry: + return entry + + +class GaanaAlbumIE(GaanaBaseIE): + IE_NAME = 'gaana:album' + _VALID_URL = r'https?://(?:www\.)?gaana\.com/album/(?P[^/#?]+)' + _TESTS = [{ + 'url': 'https://gaana.com/album/saajan-hindi', + 'playlist_mincount': 1, + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + # print(webpage) + matchobj = re.findall(r'class="parentnode sourcelist_\d+"> (.*?) ', webpage) + entries = [] + for g in matchobj: + entry = self._create_entry(g, playlist_id) + if entry: + entries.append(self._create_entry(g, playlist_id)) + + return self.playlist_result(entries, playlist_id) + + +class GaanaArtistIE(GaanaBaseIE): + IE_NAME = 'gaana:artist' + _VALID_URL = r'https?://(?:www\.)?gaana\.com/artist/(?P[^/#?]+)' + _TESTS = [{ + 'url': 'https://gaana.com/artist/kumar-sanu', + 'playlist_mincount': 1, + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + urls = self._read_entry(webpage, playlist_id) + entries = [] + + for g in urls: + video_id = g.replace('https://gaana.com/song/', '') + webpage = self._download_webpage(g, video_id) + + raw_data = self._search_regex( + r'class="parentnode sourcelist_\d+"> (.*?) ', + webpage, 'raw data') + + entry = self._create_entry(raw_data, playlist_id) + + if entry: + entries.append(entry) + + return self.playlist_result(entries, playlist_id) + + def _read_entry(self, webpage, video_id): + entries = [] + matchobj = re.findall(r'class="parentnode sourcelist_\d+"> (.*?) ', webpage) + + for g in matchobj: + raw_data = self._parse_json(g, video_id) + new_url = raw_data.get('share_url') + + if new_url: + new_url = self._BASE_URL + new_url + entries.append(new_url) + + return entries From ecf5638a44dbe39284f36574b00bb6c0d8f7a3df Mon Sep 17 00:00:00 2001 From: Gouranga Das <38029649+GourangaDas@users.noreply.github.com> Date: Wed, 19 Dec 2018 11:53:22 +0530 Subject: [PATCH 3/4] gaana.py updated --- youtube_dl/extractor/gaana.py | 250 ++++++++++++++++++---------------- 1 file changed, 130 insertions(+), 120 deletions(-) diff --git a/youtube_dl/extractor/gaana.py b/youtube_dl/extractor/gaana.py index c5e15b97c..b691a53a6 100644 --- a/youtube_dl/extractor/gaana.py +++ b/youtube_dl/extractor/gaana.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import hashlib from ..aes import aes_cbc_decrypt from ..compat import ( compat_b64decode, @@ -17,158 +18,167 @@ from ..utils import ( class GaanaBaseIE(InfoExtractor): _BASE_URL = 'https://gaana.com' + _API_URL = 'https://apiv2.gaana.com/track/stream' + _COOKIE = '' def _Decrypt(self, data): - key = 'Z0AxbiEoZjEjci4wJCkmJQ==' - iv = 'YXNkIUAjIUAjQCExMjMxMg==' - + key = b'g@1n!(f1#r.0$)&%' + iv = b'asd!@#!@#@!12312' stream_url = intlist_to_bytes(aes_cbc_decrypt( bytes_to_intlist(compat_b64decode(data)), - bytes_to_intlist(compat_b64decode(key)), - bytes_to_intlist(compat_b64decode(iv)))).decode() - + bytes_to_intlist(key), + bytes_to_intlist(iv))).decode() + # unpad s = stream_url[:-ord(stream_url[len(stream_url) - 1:])] return s + def _Create_ht(self, track_id): + if not self._COOKIE: + self._COOKIE = self._get_cookies(self._BASE_URL)['PHPSESSID'].value + + mess = track_id + '|' + self._COOKIE + '|03:40:31 sec' + ht = hashlib.md5(mess.encode()).hexdigest() + self._COOKIE[3:9] + '=' + return ht + def _create_entry(self, data, video_id): raw_data = self._parse_json(data, video_id) - video_data = raw_data.get('path') title = raw_data.get('title') - if not title: - print("No title found.") - thumbnail = raw_data.get('atw', '') or raw_data.get('albumartwork', '') + thumbnail = raw_data.get('albumartwork') duration = raw_data.get('duration') + artist = raw_data.get('artist') + + def _format_artist(art): + r_sample = r'#..(\d+)#..(\w+)[^|,]*' + res = re.sub(r_sample, '', art) + return re.sub(r',', ', ', res) + + artist = _format_artist(artist) formats = [] if isinstance(video_data, dict): for value in video_data.keys(): - # need to skip auto - # this format and quaity is too dificult to handle for audio player. - # currently, it has been skipped - # in future this format also be going to available - if not value == 'auto': - content = video_data.get(value) - for k in content: - format_url = self._Decrypt(k.get('message')) + content = video_data.get(value) + for k in content: + format_url = self._Decrypt(k.get('message')) + + if value == 'auto': + format_id = 'normal' + else: format_id = value - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'ext': 'mp4' - }) + info = { + 'url': format_url, + 'format_id': format_id, + 'ext': 'mp4', + 'abr': int_or_none(k.get('bitRate')), + 'format_note': 'mp4-aac' + } - artist = raw_data.get('artist') + if format_id == 'normal': + formats.insert(0, info) + else: + formats.append(info) - # Remove unwanted # character from string - if not artist: - artist = artist.replace(artist, "###", ', ') - - return { - 'id': video_id, - 'title': title, - 'description': raw_data.get('description'), - 'duration': int_or_none(duration), - 'formats': formats, - 'album': raw_data.get('albumtitle'), - 'thumbnail': thumbnail, - 'artist': artist, - 'release_date': raw_data.get('release_date'), - 'language': raw_data.get('language') - } else: - # we are here, beacause gaana.com uses cloudfont.com also - # alongwith some other sites for storage purpose. - # that will be implemented soon. - return None + track_id = raw_data.get('track_ids') + ht = self._Create_ht(track_id) + + for g in ('normal', 'medium', 'high'): + js = self._download_json(self._API_URL, title, headers={ + 'async': '1', + 'method': 'POST'}, query={ + 'ht': ht, + 'request_type': 'web', + 'track_id': track_id, + 'quality': g + }) + format_url = js.get('stream_path') + + formats.append({ + 'url': format_url, + 'format_id': g, + 'ext': 'mp4', + 'abr': int_or_none(js.get('bit_rate')), + 'format_note': 'mp4-aac' + }) + + return { + 'id': video_id, + 'title': title, + 'duration': int_or_none(duration), + 'formats': formats, + 'album': raw_data.get('albumtitle'), + 'thumbnail': thumbnail, + 'artist': artist, + 'release_date': raw_data.get('release_date'), + 'language': raw_data.get('language') + } class GaanaIE(GaanaBaseIE): IE_NAME = 'gaana' - _VALID_URL = r'https?://(?:www\.)?gaana\.com/song/(?P[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?gaana\.com/(?P(song|album|artist|playlist))/(?P[^/#?]+)' _TESTS = [{ - 'url': 'https://gaana.com/song/jeeye-to-jeeye-kaise', - 'only_matching': True, + 'url': 'https://gaana.com/song/chamma-chamma-5', + 'md5': '11f926ce191e008fb82a3200ba047626', + 'info_dict': { + 'id': 'chamma-chamma-5', + 'ext': 'mp4', + 'title': 'Chamma Chamma', + 'thumbnail': r'https://a10.gaanacdn.com/images/song/86/24725286/crop_175x175_1544754220.jpg', + 'duration': '3:16', + 'album': 'Fraud Saiyaan', + 'artist': 'Neha Kakkar, Romi, Arun, Ikka', + 'language': 'Hindi', + 'release_date': 'Dec 14, 2018' + } + }, + { + 'url': 'https://gaana.com/album/simmba', + 'info_dict': [{ + 'md5': '7f2ce83e8f6df7ed2cb50ce29d9f5d7f', + 'id': 'aankh-marey', + 'ext': 'mp4', + 'title': 'Aankh Marey', + 'thumbnail': r'https://a10.gaanacdn.com/images/song/88/24643688/crop_175x175_1544078542.jpg', + 'duration': '3:33', + 'album': 'Simmba', + 'artist': 'Neha Kakkar, Mika Singh, Kumar Sanu', + 'language': 'Hindi', + 'release_date': 'Dec 06, 2018' + }, + { + 'md5': 'cdd78c5f3749ee0b114a26dfa454c64f', + 'id': 'tere-bin-111', + 'ext': 'mp4', + 'title': 'Tere Bin', + 'thumbnail': r'https://a10.gaanacdn.com/images/song/85/24725285/crop_175x175_1544753630.jpg', + 'duration': '3:51', + 'album': 'Simmba', + 'artist': 'Rahat Fateh Ali Khan, Asees Kaur, Tanishk Bagchi', + 'language': 'Hindi', + 'release_date': 'Dec 14, 2018' + }] }] def _real_extract(self, url): - video_id = self._match_id(url) + r_match = re.match(self._VALID_URL, url) + video_id = r_match.group('id') + type_id = r_match.group('idtype') + self.IE_NAME += ':' + type_id + + self._set_cookie(self._BASE_URL, 'PHPSESSID', 'val') webpage = self._download_webpage(url, video_id) - raw_data = self._search_regex( - r'class="parentnode sourcelist_\d+"> (.*?) ', - webpage, 'raw data') - entry = self._create_entry(raw_data, video_id) - if entry: - return entry - -class GaanaAlbumIE(GaanaBaseIE): - IE_NAME = 'gaana:album' - _VALID_URL = r'https?://(?:www\.)?gaana\.com/album/(?P[^/#?]+)' - _TESTS = [{ - 'url': 'https://gaana.com/album/saajan-hindi', - 'playlist_mincount': 1, - 'only_matching': True, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - # print(webpage) - matchobj = re.findall(r'class="parentnode sourcelist_\d+"> (.*?) ', webpage) + matchobj = re.findall(r'class="parentnode sourcelist_\d+">(.*?)', webpage) entries = [] - for g in matchobj: - entry = self._create_entry(g, playlist_id) - if entry: - entries.append(self._create_entry(g, playlist_id)) - - return self.playlist_result(entries, playlist_id) - - -class GaanaArtistIE(GaanaBaseIE): - IE_NAME = 'gaana:artist' - _VALID_URL = r'https?://(?:www\.)?gaana\.com/artist/(?P[^/#?]+)' - _TESTS = [{ - 'url': 'https://gaana.com/artist/kumar-sanu', - 'playlist_mincount': 1, - 'only_matching': True, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - urls = self._read_entry(webpage, playlist_id) - entries = [] - - for g in urls: - video_id = g.replace('https://gaana.com/song/', '') - webpage = self._download_webpage(g, video_id) - - raw_data = self._search_regex( - r'class="parentnode sourcelist_\d+"> (.*?) ', - webpage, 'raw data') - - entry = self._create_entry(raw_data, playlist_id) - - if entry: - entries.append(entry) - - return self.playlist_result(entries, playlist_id) - - def _read_entry(self, webpage, video_id): - entries = [] - matchobj = re.findall(r'class="parentnode sourcelist_\d+"> (.*?) ', webpage) - - for g in matchobj: - raw_data = self._parse_json(g, video_id) - new_url = raw_data.get('share_url') - - if new_url: - new_url = self._BASE_URL + new_url - entries.append(new_url) - - return entries + if len(matchobj) > 1: + for g in matchobj: + entries.append(self._create_entry(g, video_id)) + return self.playlist_result(entries, video_id) + else: + return self._create_entry(matchobj[0], video_id) From 747bee8ad54c8b283f75f58c337fcdde0c74fad9 Mon Sep 17 00:00:00 2001 From: Gouranga Das <38029649+GourangaDas@users.noreply.github.com> Date: Wed, 19 Dec 2018 12:06:08 +0530 Subject: [PATCH 4/4] Update extractors.py --- youtube_dl/extractor/extractors.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e6c28c349..8a8ff094f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -420,8 +420,6 @@ from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gaana import ( GaanaIE, - GaanaAlbumIE, - GaanaArtistIE, ) from .gaskrank import GaskrankIE from .gazeta import GazetaIE