From d5ec9d76cd3081ce4c7952e432734a1a5f960006 Mon Sep 17 00:00:00 2001 From: kr4ssi Date: Tue, 19 Feb 2019 21:43:15 +0100 Subject: [PATCH] [NxLoad] Add new extractor --- youtube_dl/extractor/extractors.py | 3 +- youtube_dl/extractor/nxload.py | 112 +++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/nxload.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 923dfe7f4..b17779607 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -809,6 +809,7 @@ from .nrk import ( ) from .ntvde import NTVDeIE from .ntvru import NTVRuIE +from .nxload import NxLoadIE from .nytimes import ( NYTimesIE, NYTimesArticleIE, @@ -1405,7 +1406,7 @@ from .webofstories import ( WebOfStoriesPlaylistIE, ) from .weibo import ( - WeiboIE, + WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE diff --git a/youtube_dl/extractor/nxload.py b/youtube_dl/extractor/nxload.py new file mode 100644 index 000000000..b9d29c2a2 --- /dev/null +++ b/youtube_dl/extractor/nxload.py @@ -0,0 +1,112 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import (js_to_json) + + +class NxLoadIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?nxload\.com/(?:embed-)?(?P\w+)\.html' + + _TESTS = [ + { + 'url': 'https://nxload.com/embed-w9uwujpk2na7.html', + 'file': 'pso-kkk-1080p-w9uwujpk2na7.mp4', + 'md5': '955afd4f8f2c019bc4f116897346e3f9', + 'info_dict': { + 'id': 'w9uwujpk2na7', + 'ext': 'mp4', + 'title': 'pso-firstman web 1080p', + 'thumbnail': 're:^https://\w+.nxload.com/i/\d{2}/\d{5}/\w+.jpg$', + 'url': 're:^https://\w+.nxload.com/[,\w]+/v.mp4$' + } + }, + { + 'url': 'https://nxload.com/qhwxcxj5ah56.html', + 'file': 'pso kkk 1080p mkv-qhwxcxj5ah56.mp4', + 'md5': '983814ba610cd26ddd0819cd6d26ab68', + 'info_dict': { + 'id': 'qhwxcxj5ah56', + 'ext': 'mp4', + 'title': 'pso kkk 1080p mkv', + 'thumbnail': 're:^https://\w+.nxload.com/i/\d{2}/\d{5}/\w+.jpg', + 'url': 're:^https://\w+.nxload.com/[,\w]+/v.mp4$' + } + }, + { + 'url': 'https://nxload.com/embed-ig0ud2p3h57l.html', + 'file': 'ig0ud2p3h57l-ig0ud2p3h57l.mp4', + 'md5': 'ab3a79c831fccfd8a34c77775082c694', + 'info_dict': { + 'id': 'ig0ud2p3h57l', + 'ext': 'mp4', + 'title': 'ig0ud2p3h57l', + 'thumbnail': 're:^https://\w+.nxload.com/i/\d{2}/\d{5}/\w+.jpg', + 'url': 're:^https://\w+.nxload.com/[,\w]+/v.mp4$' + } + }, + { + 'url': 'https://nxload.com/ig0ud2p3h57l.html', + 'file': 'streams org Noragami S1E01 German DTS 1080p Blu Ray x264 mkv-ig0ud2p3h57l.mp4', + 'md5': 'ab3a79c831fccfd8a34c77775082c694', + 'info_dict': { + 'id': 'ig0ud2p3h57l', + 'ext': 'mp4', + 'title': 'streams org Noragami S1E01 German DTS 1080p Blu Ray x264 mkv', + 'thumbnail': 're:^https://\w+.nxload.com/i/\d{2}/\d{5}/\w+.jpg', + 'url': 're:^https://\w+.nxload.com/[,\w]+/v.mp4$' + } + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'Watch ([^<]+)', webpage, 'title', '') + alt_title = self._html_search_regex(r'
([^<]+)', webpage, 'title', video_id) + title = title or alt_title + + json = self._search_regex(r'new Clappr\.Player\(((?:.|\s)+?})\);', webpage, u'video URL').replace('function() { }', '0').replace('3*1024*1024', '3145728') + jsonObj = self._parse_json(json, video_id, transform_source=js_to_json) + + self.report_extraction(video_id) + + sources = jsonObj.get('sources') + labels = jsonObj.get('levelSelectorConfig').get('labels') + manifest_url = sources[0] + formats = [ + { + 'url': sources[1], + 'format_id': labels.get('1'), + 'width': 1920, + 'height': 1080 + }, + { + 'url': sources[2], + 'format_id': labels.get('0'), + 'width': 1280, + 'height': 720, + 'quality': -2 + } + ] + self._sort_formats(formats) + + thumbnail = jsonObj.get('poster') + subtitles = {} + for subtitle in jsonObj.get('playback').get('externalTracks'): + label = subtitle.get('label') + url = subtitle.get('src') + if label != 'Upload SRT': + subtitles[label] = [{'url': url}] + + return { + 'id': video_id, + 'formats': formats, + 'manifest_url': manifest_url, + 'title': title, + 'alt_title': alt_title, + 'thumbnail': thumbnail, + 'subtitles': subtitles + }