From cd8f4ba3ec87dcaade80a18c2c6c8ef1fcd51cca Mon Sep 17 00:00:00 2001 From: Forthrin Date: Tue, 7 May 2019 20:53:20 +0200 Subject: [PATCH] [p3] Initial site support (includes various per-series subdomains) --- youtube_dl/extractor/extractors.py | 4 + youtube_dl/extractor/p3.py | 134 +++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 youtube_dl/extractor/p3.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0d0732dcb..746305e2b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -843,6 +843,10 @@ from .orf import ( ORFIPTVIE, ) from .outsidetv import OutsideTVIE +from .p3 import ( + P3IE, + P3HomeIE, +) from .packtpub import ( PacktPubIE, PacktPubCourseIE, diff --git a/youtube_dl/extractor/p3.py b/youtube_dl/extractor/p3.py new file mode 100644 index 000000000..9eec09f42 --- /dev/null +++ b/youtube_dl/extractor/p3.py @@ -0,0 +1,134 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + unified_strdate, + unified_timestamp, +) + + +class P3IE(InfoExtractor): + _VALID_URL = r'https?://(?P[a-z]+)\.p3\.no/(episoder/sesong-(?P\d+)/episode-(?P\d+)/?|\d{4}/\d{2}/\d{2})/(?P[^/]*)/?' + _TESTS = [ + { + 'url': 'https://blank.p3.no/2019/03/17/sees-24-mars/', + 'info_dict': { + 'id': 'blank-sees-24-mars', + 'title': 'Sees 24. mars', + 'ext': 'mp4', + 'upload_date': '20190317', + 'description': 'Zehra er 19 år. Hun bor hjemme og studerer farmasi på OsloMet med bestevenninnen Amina.', + 'timestamp': 1552820100, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'http://skam.p3.no/2017/06/24/kjaere-sana/', + 'info_dict': { + 'id': 'skam-kjaere-sana', + 'title': 'Kjære Sana', + 'ext': 'mp4', + 'upload_date': '20170624', + 'description': 'SKAM sesong 4 følger Sana gjennom siste semester i andreklasse på Hartvig Nissen vgs i Oslo.', + 'timestamp': 1498336860, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'http://skam.p3.no/episoder/sesong-1/episode-1/', + 'info_dict': { + 'id': 'skam-s01e01', + 'ext': 'mp4', + 'upload_date': '20150925', + 'title': 'Episode 1:11', + 'description': 'Du ser ut som en slut', + 'timestamp': 1443205800, + }, + 'params': { + 'skip_download': True, + }, + } + ] + + def _real_extract(self, url): + mobj = re.search(self._VALID_URL, url) + if mobj.group("season") and mobj.group("episode"): + video_id = 's%02de%02d' % (int(mobj.group("season")), int(mobj.group("episode"))) + else: + video_id = self._match_id(url) + + video_id = mobj.group("name") + '-' + video_id + webpage = self._download_webpage(url, video_id) + + manifest_id = (re.search(r'querySelector\(\'.*?\'\), \'(.*?)\', ludoOptions', webpage) or re.search(r'data-nrk-id="([^"]*)"', webpage)).group(1) + folder = "clip" if re.search(r'-', manifest_id) else "program" + meta = self._download_json('https://psapi.nrk.no/playback/manifest/%s/%s' % (folder, manifest_id), video_id, 'Downloading video JSON') + + # video_id = try_get(meta, lambda x: x['id']) + title = self._html_search_regex(r'[^<]*\s*

\s*\s*
\s*