[NxLoad] Add new extractor

2025-03-13 05:27:17 +08:00 · 2019-02-19 21:43:15 +01:00 · 2019-02-19 21:43:15 +01:00 · d5ec9d76cd
commit d5ec9d76cd
parent 77a842c892
2 changed files with 114 additions and 1 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -809,6 +809,7 @@ from .nrk import (
 )
 from .ntvde import NTVDeIE
 from .ntvru import NTVRuIE
+from .nxload import NxLoadIE
 from .nytimes import (
    NYTimesIE,
    NYTimesArticleIE,
--- a/youtube_dl/extractor/nxload.py
+++ b/youtube_dl/extractor/nxload.py
@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+from ..utils import (js_to_json)
+
+
+class NxLoadIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nxload\.com/(?:embed-)?(?P<id>\w+)\.html'
+
+    _TESTS = [
+        {
+            'url': 'https://nxload.com/embed-w9uwujpk2na7.html',
+            'file': 'pso-kkk-1080p-w9uwujpk2na7.mp4',
+            'md5': '955afd4f8f2c019bc4f116897346e3f9',
+            'info_dict': {
+                'id': 'w9uwujpk2na7',
+                'ext': 'mp4',
+                'title': 'pso-firstman web 1080p',
+                'thumbnail': 're:^https://\w+.nxload.com/i/\d{2}/\d{5}/\w+.jpg$',
+                'url': 're:^https://\w+.nxload.com/[,\w]+/v.mp4$'
+            }
+        },
+        {
+            'url': 'https://nxload.com/qhwxcxj5ah56.html',
+            'file': 'pso kkk 1080p mkv-qhwxcxj5ah56.mp4',
+            'md5': '983814ba610cd26ddd0819cd6d26ab68',
+            'info_dict': {
+                'id': 'qhwxcxj5ah56',
+                'ext': 'mp4',
+                'title': 'pso kkk 1080p mkv',
+                'thumbnail': 're:^https://\w+.nxload.com/i/\d{2}/\d{5}/\w+.jpg',
+                'url': 're:^https://\w+.nxload.com/[,\w]+/v.mp4$'
+            }
+        },
+        {
+            'url': 'https://nxload.com/embed-ig0ud2p3h57l.html',
+            'file': 'ig0ud2p3h57l-ig0ud2p3h57l.mp4',
+            'md5': 'ab3a79c831fccfd8a34c77775082c694',
+            'info_dict': {
+                'id': 'ig0ud2p3h57l',
+                'ext': 'mp4',
+                'title': 'ig0ud2p3h57l',
+                'thumbnail': 're:^https://\w+.nxload.com/i/\d{2}/\d{5}/\w+.jpg',
+                'url': 're:^https://\w+.nxload.com/[,\w]+/v.mp4$'
+            }
+        },
+        {
+            'url': 'https://nxload.com/ig0ud2p3h57l.html',
+            'file': 'streams org Noragami S1E01 German DTS 1080p Blu Ray x264 mkv-ig0ud2p3h57l.mp4',
+            'md5': 'ab3a79c831fccfd8a34c77775082c694',
+            'info_dict': {
+                'id': 'ig0ud2p3h57l',
+                'ext': 'mp4',
+                'title': 'streams org Noragami S1E01 German DTS 1080p Blu Ray x264 mkv',
+                'thumbnail': 're:^https://\w+.nxload.com/i/\d{2}/\d{5}/\w+.jpg',
+                'url': 're:^https://\w+.nxload.com/[,\w]+/v.mp4$'
+            }
+        }
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(r'<title>Watch ([^<]+)</title>', webpage, 'title', '')
+        alt_title = self._html_search_regex(r'<div class="filename">([^<]+)', webpage, 'title', video_id)
+        title = title or alt_title
+
+        json = self._search_regex(r'new Clappr\.Player\(((?:.|\s)+?})\);', webpage, u'video URL').replace('function() {  }', '0').replace('3*1024*1024', '3145728')
+        jsonObj = self._parse_json(json, video_id, transform_source=js_to_json)
+
+        self.report_extraction(video_id)
+
+        sources = jsonObj.get('sources')
+        labels = jsonObj.get('levelSelectorConfig').get('labels')
+        manifest_url = sources[0]
+        formats = [
+            {
+                'url': sources[1],
+                'format_id': labels.get('1'),
+                'width': 1920,
+                'height': 1080
+            },
+            {
+                'url': sources[2],
+                'format_id': labels.get('0'),
+                'width': 1280,
+                'height': 720,
+                'quality': -2
+            }
+        ]
+        self._sort_formats(formats)
+
+        thumbnail = jsonObj.get('poster')
+        subtitles = {}
+        for subtitle in jsonObj.get('playback').get('externalTracks'):
+            label = subtitle.get('label')
+            url = subtitle.get('src')
+            if label != 'Upload SRT':
+                subtitles[label] = [{'url': url}]
+
+            return {
+                'id': video_id,
+                'formats': formats,
+                'manifest_url': manifest_url,
+                'title': title,
+                'alt_title': alt_title,
+                'thumbnail': thumbnail,
+                'subtitles': subtitles
+            }