From 5ec34dc1f32116ef8d261300ad6236aa16580f38 Mon Sep 17 00:00:00 2001 From: Marco Migliori Date: Wed, 20 Sep 2017 18:09:06 +0200 Subject: [PATCH 1/7] [paramount] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/paramount.py | 92 ++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 youtube_dl/extractor/paramount.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ab95c8575..28a2c7736 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -778,6 +778,7 @@ from .packtpub import ( ) from .pandatv import PandaTVIE from .pandoratv import PandoraTVIE +from .paramount import ParamountIE from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE diff --git a/youtube_dl/extractor/paramount.py b/youtube_dl/extractor/paramount.py new file mode 100644 index 000000000..9e3b4f01c --- /dev/null +++ b/youtube_dl/extractor/paramount.py @@ -0,0 +1,92 @@ +# youtube_dl extractor for www.paramountchannel.it + +# copy to youtube_dl/extractors +# add a line to extractors.py: +# from .paramount import ParamountIE + +# can use functions from common.py, as _self.function(...) +# also import from other modules + +# to test: +# python -m youtube_dl --verbose "http://www.paramountchannel.it/playlist/speciali-paramount-channel/o3gr12/backstage-sigla-paramount/tqvbxd" >| x +# python test/test_download.py TestDownload.test_Paramount + +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class ParamountIE(InfoExtractor): + IE_DESC = 'Paramount Channel' + _VALID_URL = r'http://(www\.)?paramountchannel\.it/.*/[0-9a-z]{6}' + + _TEST = { + 'url': 'http://www.paramountchannel.it/playlist/speciali-paramount-channel/o3gr12/backstage-sigla-paramount/tqvbxd', + 'md5': '433666e341a57bb76dc50b6ff87e5c63', + 'info_dict': { + 'id': '9cb5ad60-9d34-4a9a-99df-157fdd1a5310', + 'ext': 'mp4', + 'title': 'Speciali video, Paramount Channel', + 'description': 'Tutti gli speciali di Paramount Channel: curiosità, approfondimenti e aggiornamenti su film, serie tv e personaggi del cinema.' + } + } + + def _obtain_akamaihd_formats(self, url): + if self._downloader.params.get('verbose', False): + listpage = self._download_webpage(url, 'akamaihd format list') + self.to_screen('formats page = %s' % (listpage)) + listpage = self._download_xml(url, 'akamaihd format list') + formats = [] + for rendition in listpage.findall('./video/item/rendition'): + fmt = { + 'width': int_or_none(rendition.get('width')), + 'height': int_or_none(rendition.get('height')), + 'url': rendition.find('./src').text + } + formats.append(fmt) + return formats + + def _real_extract(self, url): + # webpage + + webpage = self._download_webpage(url, 'webpage') + + id = self._html_search_regex( + r'data-mtv-id="([0-9a-f-]*)"', + webpage, 'id', fatal=False) \ + or \ + self._html_search_regex( + r'"item_longId" *: *"([0-9a-f-]*)"', + webpage, 'id') + self.to_screen('id = %s' % (id)) + + uri = self._html_search_regex( + r'data-mtv-uri="([0-9a-z:\.-]*)"', + webpage, 'uri', fatal=False) \ + or \ + 'mgid:arc:video:paramountchannel.it:' + id + self.to_screen('uri = %s' % (uri)) + + title = self._og_search_title(webpage) + self.to_screen('title = %s' % (title)) + + # list of formats + + server = 'https://mediautilssvcs-a.akamaihd.net' + prefix = '/services/MediaGenerator/' + arguments = 'accountOverride=esperanto.mtvi.com' + listurl = '%s%s%s?%s' % (server, prefix, uri, arguments) + self.to_screen('listurl = %s' % (listurl)) + formats = self._obtain_akamaihd_formats(listurl) + if self._downloader.params.get('verbose', False): + self.to_screen('formats = %s' % (formats)) + + return { + 'id': id, + 'formats': formats, + 'title': title, + 'description': self._og_search_description(webpage), + 'thumbnail': self._html_search_meta('thumbnail', webpage, fatal=False) + } From fe4633a9a40d8546e12e1aed0e36ccb1d3af5608 Mon Sep 17 00:00:00 2001 From: Marco Migliori Date: Wed, 20 Sep 2017 18:15:06 +0200 Subject: [PATCH 2/7] [paramount] remove unnecessary comments --- youtube_dl/extractor/paramount.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/youtube_dl/extractor/paramount.py b/youtube_dl/extractor/paramount.py index 9e3b4f01c..df62ae3e0 100644 --- a/youtube_dl/extractor/paramount.py +++ b/youtube_dl/extractor/paramount.py @@ -1,16 +1,3 @@ -# youtube_dl extractor for www.paramountchannel.it - -# copy to youtube_dl/extractors -# add a line to extractors.py: -# from .paramount import ParamountIE - -# can use functions from common.py, as _self.function(...) -# also import from other modules - -# to test: -# python -m youtube_dl --verbose "http://www.paramountchannel.it/playlist/speciali-paramount-channel/o3gr12/backstage-sigla-paramount/tqvbxd" >| x -# python test/test_download.py TestDownload.test_Paramount - # coding: utf-8 from __future__ import unicode_literals From df6a9996d7fe790e86ec7de3b72adc34dff1916a Mon Sep 17 00:00:00 2001 From: sgerwk Date: Tue, 30 Apr 2019 18:59:57 +0200 Subject: [PATCH 3/7] [paramount] update urls to paramountnetwork.it --- youtube_dl/extractor/paramount.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/paramount.py b/youtube_dl/extractor/paramount.py index df62ae3e0..00650a895 100644 --- a/youtube_dl/extractor/paramount.py +++ b/youtube_dl/extractor/paramount.py @@ -6,17 +6,18 @@ from ..utils import int_or_none class ParamountIE(InfoExtractor): + IE_NAME = 'paramountnetwork.it' IE_DESC = 'Paramount Channel' - _VALID_URL = r'http://(www\.)?paramountchannel\.it/.*/[0-9a-z]{6}' + _VALID_URL = r'http://(www\.)?paramountnetwork\.it/.*/[0-9a-z]{6}' _TEST = { - 'url': 'http://www.paramountchannel.it/playlist/speciali-paramount-channel/o3gr12/backstage-sigla-paramount/tqvbxd', - 'md5': '433666e341a57bb76dc50b6ff87e5c63', + 'url': 'http://www.paramountnetwork.it/playlist/speciali-paramount-channel/o3gr12/speciale-stephen-king/x0xj9k', + 'md5': '4079336559ea61e24eb08b1b1adf2523', 'info_dict': { - 'id': '9cb5ad60-9d34-4a9a-99df-157fdd1a5310', + 'id': 'dbf6d5d5-1a95-41ac-b17b-b5caca227b25', 'ext': 'mp4', - 'title': 'Speciali video, Paramount Channel', - 'description': 'Tutti gli speciali di Paramount Channel: curiosità, approfondimenti e aggiornamenti su film, serie tv e personaggi del cinema.' + 'title': 'SPECIALE STEPHEN KING - Speciali video, Paramount Network', + 'description': 'Tutti gli speciali di Paramount Network curiosità, approfondimenti e aggiornamenti su film, serie tv e personaggi del cinema.' } } From 961fbb6dc0c896363e1b61e99dd00e16e6a63d28 Mon Sep 17 00:00:00 2001 From: sgerwk Date: Wed, 1 May 2019 00:11:57 +0200 Subject: [PATCH 4/7] [paramount] generalize extractor to paramountnetwork.es --- youtube_dl/extractor/paramount.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/paramount.py b/youtube_dl/extractor/paramount.py index 00650a895..e19fca83d 100644 --- a/youtube_dl/extractor/paramount.py +++ b/youtube_dl/extractor/paramount.py @@ -6,9 +6,9 @@ from ..utils import int_or_none class ParamountIE(InfoExtractor): - IE_NAME = 'paramountnetwork.it' + IE_NAME = 'paramountnetwork' IE_DESC = 'Paramount Channel' - _VALID_URL = r'http://(www\.)?paramountnetwork\.it/.*/[0-9a-z]{6}' + _VALID_URL = r'http(s)?://(www\.)?paramountnetwork\.(it|es)/.*/[0-9a-z]{6}($|/)' _TEST = { 'url': 'http://www.paramountnetwork.it/playlist/speciali-paramount-channel/o3gr12/speciale-stephen-king/x0xj9k', @@ -42,6 +42,10 @@ class ParamountIE(InfoExtractor): webpage = self._download_webpage(url, 'webpage') id = self._html_search_regex( + r'mgid:arc:content:paramount(?:network|channel)\.(?:it|es):([0-9a-f-]+)', + webpage, 'id', fatal=False) \ + or \ + self._html_search_regex( r'data-mtv-id="([0-9a-f-]*)"', webpage, 'id', fatal=False) \ or \ @@ -51,10 +55,12 @@ class ParamountIE(InfoExtractor): self.to_screen('id = %s' % (id)) uri = self._html_search_regex( - r'data-mtv-uri="([0-9a-z:\.-]*)"', + r'(mgid:arc:content:paramount(?:network|channel)\.(?:it|es):(?:[0-9a-f-]+))', webpage, 'uri', fatal=False) \ or \ - 'mgid:arc:video:paramountchannel.it:' + id + self._html_search_regex( + r'data-mtv-uri="([0-9a-z:\.-]*)"', + webpage, 'uri') self.to_screen('uri = %s' % (uri)) title = self._og_search_title(webpage) From 7224c2af4a4d8c29cba55c4a85f2ab6cd00d88e2 Mon Sep 17 00:00:00 2001 From: sgerwk Date: Sun, 2 Jun 2019 09:48:45 +0200 Subject: [PATCH 5/7] [paramount] avoid name clash --- youtube_dl/extractor/paramount.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/paramount.py b/youtube_dl/extractor/paramount.py index e19fca83d..11c1b6ccb 100644 --- a/youtube_dl/extractor/paramount.py +++ b/youtube_dl/extractor/paramount.py @@ -6,8 +6,8 @@ from ..utils import int_or_none class ParamountIE(InfoExtractor): - IE_NAME = 'paramountnetwork' - IE_DESC = 'Paramount Channel' + IE_NAME = 'paramount' + IE_DESC = 'Paramount Network' _VALID_URL = r'http(s)?://(www\.)?paramountnetwork\.(it|es)/.*/[0-9a-z]{6}($|/)' _TEST = { From a74eacc4615d9b8a33c2fcb6ecb1b444c1e00740 Mon Sep 17 00:00:00 2001 From: sgerwk Date: Sun, 22 Mar 2020 18:37:24 +0100 Subject: [PATCH 6/7] update extractor --- youtube_dl/extractor/paramount.py | 80 ++++++++++++++++--------------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/paramount.py b/youtube_dl/extractor/paramount.py index 11c1b6ccb..d0aedf4d0 100644 --- a/youtube_dl/extractor/paramount.py +++ b/youtube_dl/extractor/paramount.py @@ -17,70 +17,72 @@ class ParamountIE(InfoExtractor): 'id': 'dbf6d5d5-1a95-41ac-b17b-b5caca227b25', 'ext': 'mp4', 'title': 'SPECIALE STEPHEN KING - Speciali video, Paramount Network', - 'description': 'Tutti gli speciali di Paramount Network curiosità, approfondimenti e aggiornamenti su film, serie tv e personaggi del cinema.' + 'description': 'Tutti gli speciali di Paramount Network curiosità, approfondimenti e aggiornamenti su film, serie tv e personaggi del cinema.' } } - def _obtain_akamaihd_formats(self, url): - if self._downloader.params.get('verbose', False): - listpage = self._download_webpage(url, 'akamaihd format list') - self.to_screen('formats page = %s' % (listpage)) - listpage = self._download_xml(url, 'akamaihd format list') - formats = [] - for rendition in listpage.findall('./video/item/rendition'): - fmt = { - 'width': int_or_none(rendition.get('width')), - 'height': int_or_none(rendition.get('height')), - 'url': rendition.find('./src').text - } - formats.append(fmt) - return formats - def _real_extract(self, url): # webpage webpage = self._download_webpage(url, 'webpage') + # self.to_screen('webpage = %s' % (webpage)) id = self._html_search_regex( - r'mgid:arc:content:paramount(?:network|channel)\.(?:it|es):([0-9a-f-]+)', - webpage, 'id', fatal=False) \ - or \ - self._html_search_regex( - r'data-mtv-id="([0-9a-f-]*)"', - webpage, 'id', fatal=False) \ - or \ - self._html_search_regex( - r'"item_longId" *: *"([0-9a-f-]*)"', + r'mgid:arc:content:web.paramount(?:network|channel|plus)\.(?:it|es|com):([0-9a-f-]+)', webpage, 'id') self.to_screen('id = %s' % (id)) - uri = self._html_search_regex( - r'(mgid:arc:content:paramount(?:network|channel)\.(?:it|es):(?:[0-9a-f-]+))', - webpage, 'uri', fatal=False) \ + episode = self._html_search_regex( + r'mgid:arc:episode:paramount.intl:([0-9a-f-]+)', + webpage, 'episode', fatal=False) \ or \ self._html_search_regex( - r'data-mtv-uri="([0-9a-z:\.-]*)"', - webpage, 'uri') - self.to_screen('uri = %s' % (uri)) + r'"contentId" *: *"([0-9a-f-]*)"', + webpage, 'episode') + self.to_screen('episode = %s' % (episode)) title = self._og_search_title(webpage) self.to_screen('title = %s' % (title)) + # episode page + + server = 'https://media.mtvnservices.com' + prefix = '/pmt/e1/access/index.html' + argument1 = 'uri=mgid:arc:episode:paramount.intl:%s' % (episode) + argument2 = 'configtype=edge' + epurl = '%s%s?%s&%s' % (server, prefix, argument1, argument2) + self.to_screen('epurl = %s' % (epurl)) + eppage = self._download_webpage(epurl, 'episode url page', + headers = {'Referer': url}) + self.to_screen('format list page = %s' % (eppage)) + + uri = self._html_search_regex( + r'(mgid:arc:video:paramount.intl:(?:[0-9a-f-]+))', + eppage, 'uri') + self.to_screen('uri = %s' % (uri)) + + ep = self._html_search_regex( + r'&ep=([0-9a-f-]+)"', + eppage, 'ep') + self.to_screen('ep = %s' % (ep)) + # list of formats - server = 'https://mediautilssvcs-a.akamaihd.net' + server = 'https://media-utils.mtvnservices.com' prefix = '/services/MediaGenerator/' - arguments = 'accountOverride=esperanto.mtvi.com' - listurl = '%s%s%s?%s' % (server, prefix, uri, arguments) + arg1 = 'arcStage=live&accountOverride=intl.mtvi.com&ep=%s' % (ep) + arg2 = '&acceptMethods=hls&format=json&https=true&isEpisode=true' + listurl = '%s%s%s?%s%s' % (server, prefix, uri, arg1, arg2) self.to_screen('listurl = %s' % (listurl)) - formats = self._obtain_akamaihd_formats(listurl) - if self._downloader.params.get('verbose', False): - self.to_screen('formats = %s' % (formats)) + + listpage = self._download_json(listurl, 'url list page') + self.to_screen('listpage = %s' % (listpage)) + src = listpage['package']['video']['item'][0]['rendition'][0]['src'] + self.to_screen('src = %s' % (src)) return { 'id': id, - 'formats': formats, + 'formats': self._extract_m3u8_formats(src, id), 'title': title, 'description': self._og_search_description(webpage), - 'thumbnail': self._html_search_meta('thumbnail', webpage, fatal=False) } From 8b1e2b62bcdcaaebc60eed2a2c835a52186dd79b Mon Sep 17 00:00:00 2001 From: sgerwk Date: Sun, 22 Mar 2020 18:46:20 +0100 Subject: [PATCH 7/7] flake8 --- youtube_dl/extractor/paramount.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/paramount.py b/youtube_dl/extractor/paramount.py index d0aedf4d0..7df578510 100644 --- a/youtube_dl/extractor/paramount.py +++ b/youtube_dl/extractor/paramount.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none class ParamountIE(InfoExtractor): @@ -53,7 +52,7 @@ class ParamountIE(InfoExtractor): epurl = '%s%s?%s&%s' % (server, prefix, argument1, argument2) self.to_screen('epurl = %s' % (epurl)) eppage = self._download_webpage(epurl, 'episode url page', - headers = {'Referer': url}) + headers={'Referer': url}) self.to_screen('format list page = %s' % (eppage)) uri = self._html_search_regex(