[verystream] Add new extractor ( is just a copy of Openload with some little changes )

2025-03-10 12:27:15 +08:00 · 2019-04-21 09:14:12 +02:00 · 2019-04-21 09:14:12 +02:00 · a08e83f1f1
commit a08e83f1f1
parent 5de538787d
2 changed files with 116 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -832,6 +832,7 @@ from .ooyala import (
    OoyalaExternalIE,
 )
 from .openload import OpenloadIE
 from .verystream import VerystreamIE
 from .ora import OraTVIE
 from .orf import (
    ORFTVthekIE,
--- a/youtube_dl/extractor/verystream.py
+++ b/youtube_dl/extractor/verystream.py
@ -0,0 +1,115 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import os
 import random
 import re
 import subprocess
 import tempfile
 from .openload import PhantomJSwrapper
 from .common import InfoExtractor
 from ..compat import (
    compat_urlparse,
    compat_kwargs,
 )
 from ..utils import (
    check_executable,
    determine_ext,
    encodeArgument,
    ExtractorError,
    get_element_by_id,
    get_exe_version,
    is_outdated_version,
    std_headers,
 )
 class VerystreamIE(InfoExtractor):
    _DOMAINS = r'(?:verystream\.com)'
    _VALID_URL = r'''(?x)
                    https?://
                        (?P<host>
                            (?:www\.)?
                            %s
                        )/
                        (?:e|embed)/
                        (?P<id>[a-zA-Z0-9-_]+)
                    ''' % _DOMAINS
    _TESTS = [{
        'url': 'https://verystream.com/e/b8NWEgkqNLI/',
        'only_matching': True,
    }]
    _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{major}.0.{build}.{patch} Safari/537.36'
    @staticmethod
    def _extract_urls(webpage):
        return re.findall(
            r'<iframe[^>]+src=["\']((?:https?://)?%s/embed/[a-zA-Z0-9-_]+)'
            % VerystreamIE._DOMAINS, webpage)
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        host = mobj.group('host')
        video_id = mobj.group('id')
        url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
        headers = {
            'User-Agent': self._USER_AGENT_TPL % {
                'major': random.randint(63, 73),
                'build': random.randint(3239, 3683),
                'patch': random.randint(0, 100),
            },
        }
        for path in ('e', 'embed'):
            page_url = url_pattern % path
            last = path == 'f'
            webpage = self._download_webpage(
                page_url, video_id, 'Downloading %s webpage' % path,
                headers=headers, fatal=last)
            if not webpage:
                continue
            if 'File not found' in webpage or 'deleted by the owner' in webpage:
                if not last:
                    continue
                raise ExtractorError('File not found', expected=True, video_id=video_id)
            break
        phantom = PhantomJSwrapper(self, required_version='2.0')
        webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers)
        decoded_id = (get_element_by_id('streamurl', webpage) or
                      get_element_by_id('streamuri', webpage) or
                      get_element_by_id('streamurj', webpage) or
                      self._search_regex(
                          (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
                           r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
                           r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
                           r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
                           r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
                          'stream URL'))
        video_url = 'https://%s/gettoken/%s?mime=true' % (host, decoded_id)
        title = self._og_search_title(webpage, default=None) or self._search_regex(
            r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
            'title', default=None) or self._html_search_meta(
            'description', webpage, 'title', fatal=True)
        entries = self._parse_html5_media_entries(page_url, webpage, video_id)
        entry = entries[0] if entries else {}
        subtitles = entry.get('subtitles')
        return {
            'id': video_id,
            'title': title,
            'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
            'url': video_url,
            'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
            'subtitles': subtitles,
            'http_headers': headers,
        }