From 4ad0b8f31e957853df45e1bd47fff157eec83084 Mon Sep 17 00:00:00 2001 From: Alpesh Valia Date: Wed, 15 Mar 2017 16:22:53 +0530 Subject: [PATCH 1/5] [hotstar] added support for downloading playlist --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/hotstar.py | 59 +++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 24c478932..9b87acc73 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -401,7 +401,10 @@ from .hitbox import HitboxIE, HitboxLiveIE from .hitrecord import HitRecordIE from .hornbunny import HornBunnyIE from .hotnewhiphop import HotNewHipHopIE -from .hotstar import HotStarIE +from .hotstar import ( + HotStarIE, + HotStarPlaylistIE, +) from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE from .hrti import ( diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 3a7a66a34..5413dcd46 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -7,6 +7,7 @@ from ..utils import ( determine_ext, int_or_none, ) +import re class HotStarIE(InfoExtractor): @@ -16,7 +17,7 @@ class HotStarIE(InfoExtractor): 'info_dict': { 'id': '1000076273', 'ext': 'mp4', - 'title': 'On Air With AIB - English', + 'title': 'On Air With AIB', 'description': 'md5:c957d8868e9bc793ccb813691cc4c434', 'timestamp': 1447227000, 'upload_date': '20151111', @@ -99,3 +100,59 @@ class HotStarIE(InfoExtractor): 'episode_number': int_or_none(video_data.get('episodeNumber')), 'series': video_data.get('contentTitle'), } + +class HotStarBaseIE(InfoExtractor): + @classmethod + def _extract_url_info(cls, url): + mobj = re.match(cls._VALID_URL, url) + return mobj.group('series_id'), mobj.group('playlist_id'), mobj.group('playlist_title') + + def _extract_from_json_url(self, series_id, playlist_title, video ): + + picture_url = video.get('urlPictures'); + thumbnail = 'http://media0-starag.startv.in/r1/thumbs/PCTV/%s/%s/PCTV-%s-hs.jpg' % ( picture_url[-2:], picture_url, picture_url ) + + episode_title = video.get('episodeTitle') + episode_title = episode_title.lower().replace(' ', '-') + url = "http://www.hotstar.com/tv/%s/%s/%s/%s" % (playlist_title, series_id, episode_title, video.get('contentId')) + + info_dict = { + 'id': video.get('contentId'), + 'title': video.get('episodeTitle'), + 'description': video.get('longDescription'), + 'thumbnail' : thumbnail, + 'url' : url, + '_type' : 'url', + } + return info_dict + +class HotStarPlaylistIE(HotStarBaseIE): + IE_NAME = 'hotstar:playlist' + _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/(?P.+)/(?P\d+)/episodes/(?P\d{1,})' + + _TESTS = [{ + 'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993', + 'info_dict': { + 'id': '10856', + 'title': 'pow-bandi-yuddh-ke', + }, + 'playlist_mincount': 0, + }, { + 'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993', + 'only_matching': True, + }] + + def _real_extract(self, url): + series_id, playlist_id, playlist_title = self._extract_url_info(url) + + collection = self._download_json( + "http://search.hotstar.com/AVS/besc?action=SearchContents&appVersion=5.0.39&channel=PCTV&moreFilters=series:%s;&query=*&searchOrder=last_broadcast_date+desc,year+asc,title+asc&type=EPISODE" % playlist_id, + playlist_id + ) + + videos = collection['resultObj']['response']['docs'] + + entries = [ + self._extract_from_json_url( series_id, playlist_title, video ) + for video in videos if video.get('contentId')] + return self.playlist_result(entries, playlist_id, playlist_title) From 70495377094eae25b36a6a476c5a6015742ffedc Mon Sep 17 00:00:00 2001 From: Alpesh Valia Date: Thu, 16 Mar 2017 17:36:29 +0530 Subject: [PATCH 2/5] [hotstar] made suggested changes --- youtube_dl/extractor/hotstar.py | 50 ++++++++++++++++----------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 5413dcd46..aae970213 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -101,31 +101,6 @@ class HotStarIE(InfoExtractor): 'series': video_data.get('contentTitle'), } -class HotStarBaseIE(InfoExtractor): - @classmethod - def _extract_url_info(cls, url): - mobj = re.match(cls._VALID_URL, url) - return mobj.group('series_id'), mobj.group('playlist_id'), mobj.group('playlist_title') - - def _extract_from_json_url(self, series_id, playlist_title, video ): - - picture_url = video.get('urlPictures'); - thumbnail = 'http://media0-starag.startv.in/r1/thumbs/PCTV/%s/%s/PCTV-%s-hs.jpg' % ( picture_url[-2:], picture_url, picture_url ) - - episode_title = video.get('episodeTitle') - episode_title = episode_title.lower().replace(' ', '-') - url = "http://www.hotstar.com/tv/%s/%s/%s/%s" % (playlist_title, series_id, episode_title, video.get('contentId')) - - info_dict = { - 'id': video.get('contentId'), - 'title': video.get('episodeTitle'), - 'description': video.get('longDescription'), - 'thumbnail' : thumbnail, - 'url' : url, - '_type' : 'url', - } - return info_dict - class HotStarPlaylistIE(HotStarBaseIE): IE_NAME = 'hotstar:playlist' _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/(?P.+)/(?P\d+)/episodes/(?P\d{1,})' @@ -142,6 +117,31 @@ class HotStarPlaylistIE(HotStarBaseIE): 'only_matching': True, }] + def _extract_url_info(cls, url): + mobj = re.match(cls._VALID_URL, url) + return mobj.group('series_id'), mobj.group('playlist_id'), mobj.group('playlist_title') + + def _extract_from_json_url(self, series_id, playlist_title, video ): + + picture_url = video.get('urlPictures'); + thumbnail = '' + if picture_url: + thumbnail = 'http://media0-starag.startv.in/r1/thumbs/PCTV/%s/%s/PCTV-%s-hs.jpg' % ( picture_url[-2:], picture_url, picture_url ) + + episode_title = video.get('episodeTitle', '') + episode_title = episode_title.lower().replace(' ', '-') + url = "http://www.hotstar.com/tv/%s/%s/%s/%s" % (playlist_title, series_id, episode_title, video.get('contentId')) + + info_dict = { + 'id': video.get('contentId'), + 'title': video.get('episodeTitle'), + 'description': video.get('longDescription'), + 'thumbnail' : thumbnail, + 'url' : url, + '_type' : 'url', + } + return info_dict + def _real_extract(self, url): series_id, playlist_id, playlist_title = self._extract_url_info(url) From b6b90a7135c8cdf9391a061cfdd31223ce053996 Mon Sep 17 00:00:00 2001 From: Alpesh Valia Date: Wed, 15 Mar 2017 16:22:53 +0530 Subject: [PATCH 3/5] [hotstar] hotstar playlist [hotstar] added support for downloading playlist [hotstar] made suggested changes --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/hotstar.py | 59 +++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 24c478932..9b87acc73 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -401,7 +401,10 @@ from .hitbox import HitboxIE, HitboxLiveIE from .hitrecord import HitRecordIE from .hornbunny import HornBunnyIE from .hotnewhiphop import HotNewHipHopIE -from .hotstar import HotStarIE +from .hotstar import ( + HotStarIE, + HotStarPlaylistIE, +) from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE from .hrti import ( diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 3a7a66a34..aae970213 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -7,6 +7,7 @@ from ..utils import ( determine_ext, int_or_none, ) +import re class HotStarIE(InfoExtractor): @@ -16,7 +17,7 @@ class HotStarIE(InfoExtractor): 'info_dict': { 'id': '1000076273', 'ext': 'mp4', - 'title': 'On Air With AIB - English', + 'title': 'On Air With AIB', 'description': 'md5:c957d8868e9bc793ccb813691cc4c434', 'timestamp': 1447227000, 'upload_date': '20151111', @@ -99,3 +100,59 @@ class HotStarIE(InfoExtractor): 'episode_number': int_or_none(video_data.get('episodeNumber')), 'series': video_data.get('contentTitle'), } + +class HotStarPlaylistIE(HotStarBaseIE): + IE_NAME = 'hotstar:playlist' + _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/(?P.+)/(?P\d+)/episodes/(?P\d{1,})' + + _TESTS = [{ + 'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993', + 'info_dict': { + 'id': '10856', + 'title': 'pow-bandi-yuddh-ke', + }, + 'playlist_mincount': 0, + }, { + 'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993', + 'only_matching': True, + }] + + def _extract_url_info(cls, url): + mobj = re.match(cls._VALID_URL, url) + return mobj.group('series_id'), mobj.group('playlist_id'), mobj.group('playlist_title') + + def _extract_from_json_url(self, series_id, playlist_title, video ): + + picture_url = video.get('urlPictures'); + thumbnail = '' + if picture_url: + thumbnail = 'http://media0-starag.startv.in/r1/thumbs/PCTV/%s/%s/PCTV-%s-hs.jpg' % ( picture_url[-2:], picture_url, picture_url ) + + episode_title = video.get('episodeTitle', '') + episode_title = episode_title.lower().replace(' ', '-') + url = "http://www.hotstar.com/tv/%s/%s/%s/%s" % (playlist_title, series_id, episode_title, video.get('contentId')) + + info_dict = { + 'id': video.get('contentId'), + 'title': video.get('episodeTitle'), + 'description': video.get('longDescription'), + 'thumbnail' : thumbnail, + 'url' : url, + '_type' : 'url', + } + return info_dict + + def _real_extract(self, url): + series_id, playlist_id, playlist_title = self._extract_url_info(url) + + collection = self._download_json( + "http://search.hotstar.com/AVS/besc?action=SearchContents&appVersion=5.0.39&channel=PCTV&moreFilters=series:%s;&query=*&searchOrder=last_broadcast_date+desc,year+asc,title+asc&type=EPISODE" % playlist_id, + playlist_id + ) + + videos = collection['resultObj']['response']['docs'] + + entries = [ + self._extract_from_json_url( series_id, playlist_title, video ) + for video in videos if video.get('contentId')] + return self.playlist_result(entries, playlist_id, playlist_title) From 05000371607ff15ae6cb88d0ee54c482db8a4608 Mon Sep 17 00:00:00 2001 From: Alpesh Valia Date: Thu, 16 Mar 2017 17:41:38 +0530 Subject: [PATCH 4/5] [hotstar] changed base class of hotstarplaylist --- youtube_dl/extractor/hotstar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index aae970213..59f0beb7e 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -101,7 +101,7 @@ class HotStarIE(InfoExtractor): 'series': video_data.get('contentTitle'), } -class HotStarPlaylistIE(HotStarBaseIE): +class HotStarPlaylistIE(InfoExtractor): IE_NAME = 'hotstar:playlist' _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/(?P.+)/(?P\d+)/episodes/(?P\d{1,})' From 4c487d5d0d069913ed6893439302be5451aa0909 Mon Sep 17 00:00:00 2001 From: Alpesh Valia Date: Thu, 16 Mar 2017 19:20:08 +0530 Subject: [PATCH 5/5] [hotstar] improved code --- youtube_dl/extractor/hotstar.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py index 59f0beb7e..ea4fbd575 100644 --- a/youtube_dl/extractor/hotstar.py +++ b/youtube_dl/extractor/hotstar.py @@ -117,10 +117,6 @@ class HotStarPlaylistIE(InfoExtractor): 'only_matching': True, }] - def _extract_url_info(cls, url): - mobj = re.match(cls._VALID_URL, url) - return mobj.group('series_id'), mobj.group('playlist_id'), mobj.group('playlist_title') - def _extract_from_json_url(self, series_id, playlist_title, video ): picture_url = video.get('urlPictures'); @@ -143,15 +139,17 @@ class HotStarPlaylistIE(InfoExtractor): return info_dict def _real_extract(self, url): - series_id, playlist_id, playlist_title = self._extract_url_info(url) + mobj = re.match(self._VALID_URL, url) + series_id = mobj.group('series_id') + playlist_id = mobj.group('playlist_id') + playlist_title = mobj.group('playlist_title') collection = self._download_json( "http://search.hotstar.com/AVS/besc?action=SearchContents&appVersion=5.0.39&channel=PCTV&moreFilters=series:%s;&query=*&searchOrder=last_broadcast_date+desc,year+asc,title+asc&type=EPISODE" % playlist_id, playlist_id ) - videos = collection['resultObj']['response']['docs'] - + videos = collection.get('resultObj', {}).get('response', {}).get('docs', []) entries = [ self._extract_from_json_url( series_id, playlist_title, video ) for video in videos if video.get('contentId')]