1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-13 04:00:32 +08:00

Merge pull request #8 from rg3/master

updating again
This commit is contained in:
Kade 2018-01-27 22:05:30 -05:00 committed by GitHub
commit 075efa1998
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 456 additions and 119 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.01.21*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.01.27*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.01.21** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.01.27**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.01.21 [debug] youtube-dl version 2018.01.27
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,3 +1,23 @@
version 2018.01.27
Core
* [extractor/common] Improve _json_ld for articles
* Switch codebase to use compat_b64decode
+ [compat] Add compat_b64decode
Extractors
+ [seznamzpravy] Add support for seznam.cz and seznamzpravy.cz (#14102, #14616)
* [dplay] Bypass geo restriction
+ [dplay] Add support for disco-api videos (#15396)
* [youtube] Extract precise error messages (#15284)
* [teachertube] Capture and output error message
* [teachertube] Fix and relax thumbnail extraction (#15403)
+ [prosiebensat1] Add another clip id regular expression (#15378)
* [tbs] Update tokenizer url (#15395)
* [mixcloud] Use compat_b64decode (#15394)
- [thesixtyone] Remove extractor (#15341)
version 2018.01.21 version 2018.01.21
Core Core

View File

@ -732,6 +732,8 @@
- **ServingSys** - **ServingSys**
- **Servus** - **Servus**
- **Sexu** - **Sexu**
- **SeznamZpravy**
- **SeznamZpravyArticle**
- **Shahid** - **Shahid**
- **ShahidShow** - **ShahidShow**
- **Shared**: shared.sx - **Shared**: shared.sx
@ -822,7 +824,6 @@
- **ThePlatform** - **ThePlatform**
- **ThePlatformFeed** - **ThePlatformFeed**
- **TheScene** - **TheScene**
- **TheSixtyOne**
- **TheStar** - **TheStar**
- **TheSun** - **TheSun**
- **TheWeatherChannel** - **TheWeatherChannel**

View File

@ -1,8 +1,8 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
from math import ceil from math import ceil
from .compat import compat_b64decode
from .utils import bytes_to_intlist, intlist_to_bytes from .utils import bytes_to_intlist, intlist_to_bytes
BLOCK_SIZE_BYTES = 16 BLOCK_SIZE_BYTES = 16
@ -180,7 +180,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
""" """
NONCE_LENGTH_BYTES = 8 NONCE_LENGTH_BYTES = 8
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8'))) data = bytes_to_intlist(compat_b64decode(data))
password = bytes_to_intlist(password.encode('utf-8')) password = bytes_to_intlist(password.encode('utf-8'))
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))

View File

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import binascii import binascii
import collections import collections
import ctypes import ctypes
@ -2896,9 +2897,24 @@ except TypeError:
if isinstance(spec, compat_str): if isinstance(spec, compat_str):
spec = spec.encode('ascii') spec = spec.encode('ascii')
return struct.unpack(spec, *args) return struct.unpack(spec, *args)
class compat_Struct(struct.Struct):
def __init__(self, fmt):
if isinstance(fmt, compat_str):
fmt = fmt.encode('ascii')
super(compat_Struct, self).__init__(fmt)
else: else:
compat_struct_pack = struct.pack compat_struct_pack = struct.pack
compat_struct_unpack = struct.unpack compat_struct_unpack = struct.unpack
if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
class compat_Struct(struct.Struct):
def unpack(self, string):
if not isinstance(string, buffer):
string = buffer(string)
return super(compat_Struct, self).unpack(string)
else:
compat_Struct = struct.Struct
try: try:
from future_builtins import zip as compat_zip from future_builtins import zip as compat_zip
@ -2908,6 +2924,16 @@ except ImportError: # not 2.6+ or is 3.x
except ImportError: except ImportError:
compat_zip = zip compat_zip = zip
if sys.version_info < (3, 3):
def compat_b64decode(s, *args, **kwargs):
if isinstance(s, compat_str):
s = s.encode('ascii')
return base64.b64decode(s, *args, **kwargs)
else:
compat_b64decode = base64.b64decode
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0): if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
# names, see the original PyPy issue [1] and the youtube-dl one [2]. # names, see the original PyPy issue [1] and the youtube-dl one [2].
@ -2930,6 +2956,8 @@ __all__ = [
'compat_HTMLParseError', 'compat_HTMLParseError',
'compat_HTMLParser', 'compat_HTMLParser',
'compat_HTTPError', 'compat_HTTPError',
'compat_Struct',
'compat_b64decode',
'compat_basestring', 'compat_basestring',
'compat_chr', 'compat_chr',
'compat_cookiejar', 'compat_cookiejar',

View File

@ -1,12 +1,12 @@
from __future__ import division, unicode_literals from __future__ import division, unicode_literals
import base64
import io import io
import itertools import itertools
import time import time
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import ( from ..compat import (
compat_b64decode,
compat_etree_fromstring, compat_etree_fromstring,
compat_urlparse, compat_urlparse,
compat_urllib_error, compat_urllib_error,
@ -312,7 +312,7 @@ class F4mFD(FragmentFD):
boot_info = self._get_bootstrap_from_url(bootstrap_url) boot_info = self._get_bootstrap_from_url(bootstrap_url)
else: else:
bootstrap_url = None bootstrap_url = None
bootstrap = base64.b64decode(node.text.encode('ascii')) bootstrap = compat_b64decode(node.text)
boot_info = read_bootstrap_info(bootstrap) boot_info = read_bootstrap_info(bootstrap)
return boot_info, bootstrap_url return boot_info, bootstrap_url
@ -349,7 +349,7 @@ class F4mFD(FragmentFD):
live = boot_info['live'] live = boot_info['live']
metadata_node = media.find(_add_ns('metadata')) metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None: if metadata_node is not None:
metadata = base64.b64decode(metadata_node.text.encode('ascii')) metadata = compat_b64decode(metadata_node.text)
else: else:
metadata = None metadata = None

View File

@ -1,25 +1,27 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import time import time
import struct
import binascii import binascii
import io import io
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import compat_urllib_error from ..compat import (
compat_Struct,
compat_urllib_error,
)
u8 = struct.Struct(b'>B') u8 = compat_Struct('>B')
u88 = struct.Struct(b'>Bx') u88 = compat_Struct('>Bx')
u16 = struct.Struct(b'>H') u16 = compat_Struct('>H')
u1616 = struct.Struct(b'>Hxx') u1616 = compat_Struct('>Hxx')
u32 = struct.Struct(b'>I') u32 = compat_Struct('>I')
u64 = struct.Struct(b'>Q') u64 = compat_Struct('>Q')
s88 = struct.Struct(b'>bx') s88 = compat_Struct('>bx')
s16 = struct.Struct(b'>h') s16 = compat_Struct('>h')
s1616 = struct.Struct(b'>hxx') s1616 = compat_Struct('>hxx')
s32 = struct.Struct(b'>i') s32 = compat_Struct('>i')
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
@ -139,7 +141,7 @@ def write_piff_header(stream, params):
sample_entry_payload += u16.pack(0x18) # depth sample_entry_payload += u16.pack(0x18) # depth
sample_entry_payload += s16.pack(-1) # pre defined sample_entry_payload += s16.pack(-1) # pre defined
codec_private_data = binascii.unhexlify(params['codec_private_data']) codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
if fourcc in ('H264', 'AVC1'): if fourcc in ('H264', 'AVC1'):
sps, pps = codec_private_data.split(u32.pack(1))[1:] sps, pps = codec_private_data.split(u32.pack(1))[1:]
avcc_payload = u8.pack(1) # configuration version avcc_payload = u8.pack(1) # configuration version

View File

@ -1,13 +1,15 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import json import json
import os import os
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_cbc_decrypt from ..aes import aes_cbc_decrypt
from ..compat import compat_ord from ..compat import (
compat_b64decode,
compat_ord,
)
from ..utils import ( from ..utils import (
bytes_to_intlist, bytes_to_intlist,
ExtractorError, ExtractorError,
@ -48,9 +50,9 @@ class ADNIE(InfoExtractor):
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(enc_subtitles[24:])), bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'), bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'),
bytes_to_intlist(base64.b64decode(enc_subtitles[:24])) bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
)) ))
subtitles_json = self._parse_json( subtitles_json = self._parse_json(
dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(), dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(),

View File

@ -1,11 +1,13 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote,
)
class BigflixIE(InfoExtractor): class BigflixIE(InfoExtractor):
@ -39,8 +41,8 @@ class BigflixIE(InfoExtractor):
webpage, 'title') webpage, 'title')
def decode_url(quoted_b64_url): def decode_url(quoted_b64_url):
return base64.b64decode(compat_urllib_parse_unquote( return compat_b64decode(compat_urllib_parse_unquote(
quoted_b64_url).encode('ascii')).decode('utf-8') quoted_b64_url)).decode('utf-8')
formats = [] formats = []
for height, encoded_url in re.findall( for height, encoded_url in re.findall(

View File

@ -1,11 +1,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import base64
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE from .youtube import YoutubeIE
from ..compat import compat_b64decode
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError ExtractorError
@ -58,7 +58,7 @@ class ChilloutzoneIE(InfoExtractor):
base64_video_info = self._html_search_regex( base64_video_info = self._html_search_regex(
r'var cozVidData = "(.+?)";', webpage, 'video data') r'var cozVidData = "(.+?)";', webpage, 'video data')
decoded_video_info = base64.b64decode(base64_video_info.encode('utf-8')).decode('utf-8') decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8')
video_info_dict = json.loads(decoded_video_info) video_info_dict = json.loads(decoded_video_info)
# get video information from dict # get video information from dict

View File

@ -1,10 +1,10 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_b64decode
from ..utils import parse_duration from ..utils import parse_duration
@ -44,8 +44,7 @@ class ChirbitIE(InfoExtractor):
# Reverse engineered from https://chirb.it/js/chirbit.player.js (look # Reverse engineered from https://chirb.it/js/chirbit.player.js (look
# for soundURL) # for soundURL)
audio_url = base64.b64decode( audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8')
data_fd[::-1].encode('ascii')).decode('utf-8')
title = self._search_regex( title = self._search_regex(
r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title') r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')

View File

@ -1043,7 +1043,7 @@ class InfoExtractor(object):
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'): if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name')) info['series'] = unescapeHTML(part_of_series.get('name'))
elif item_type == 'Article': elif item_type in ('Article', 'NewsArticle'):
info.update({ info.update({
'timestamp': parse_iso8601(e.get('datePublished')), 'timestamp': parse_iso8601(e.get('datePublished')),
'title': unescapeHTML(e.get('headline')), 'title': unescapeHTML(e.get('headline')),

View File

@ -3,13 +3,13 @@ from __future__ import unicode_literals
import re import re
import json import json
import base64
import zlib import zlib
from hashlib import sha1 from hashlib import sha1
from math import pow, sqrt, floor from math import pow, sqrt, floor
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_b64decode,
compat_etree_fromstring, compat_etree_fromstring,
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
compat_urllib_request, compat_urllib_request,
@ -272,8 +272,8 @@ class CrunchyrollIE(CrunchyrollBaseIE):
} }
def _decrypt_subtitles(self, data, iv, id): def _decrypt_subtitles(self, data, iv, id):
data = bytes_to_intlist(base64.b64decode(data.encode('utf-8'))) data = bytes_to_intlist(compat_b64decode(data))
iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8'))) iv = bytes_to_intlist(compat_b64decode(iv))
id = int(id) id = int(id)
def obfuscate_key_aux(count, modulo, start): def obfuscate_key_aux(count, modulo, start):

View File

@ -10,6 +10,7 @@ from ..aes import (
aes_cbc_decrypt, aes_cbc_decrypt,
aes_cbc_encrypt, aes_cbc_encrypt,
) )
from ..compat import compat_b64decode
from ..utils import ( from ..utils import (
bytes_to_intlist, bytes_to_intlist,
bytes_to_long, bytes_to_long,
@ -93,7 +94,7 @@ class DaisukiMottoIE(InfoExtractor):
rtn = self._parse_json( rtn = self._parse_json(
intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist( intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(
base64.b64decode(encrypted_rtn)), compat_b64decode(encrypted_rtn)),
aes_key, iv)).decode('utf-8').rstrip('\0'), aes_key, iv)).decode('utf-8').rstrip('\0'),
video_id) video_id)

View File

@ -12,25 +12,28 @@ from ..compat import (
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
float_or_none,
int_or_none, int_or_none,
remove_end, remove_end,
try_get, try_get,
unified_strdate, unified_strdate,
unified_timestamp,
update_url_query, update_url_query,
USER_AGENTS, USER_AGENTS,
) )
class DPlayIE(InfoExtractor): class DPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?P<domain>www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:videoer/)?(?P<id>[^/]+/[^/?#]+)'
_TESTS = [{ _TESTS = [{
# non geo restricted, via secure api, unsigned download hls URL # non geo restricted, via secure api, unsigned download hls URL
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
'info_dict': { 'info_dict': {
'id': '3172', 'id': '3172',
'display_id': 'season-1-svensken-lar-sig-njuta-av-livet', 'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Svensken lär sig njuta av livet', 'title': 'Svensken lär sig njuta av livet',
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8', 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
@ -48,7 +51,7 @@ class DPlayIE(InfoExtractor):
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/', 'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
'info_dict': { 'info_dict': {
'id': '70816', 'id': '70816',
'display_id': 'season-6-episode-12', 'display_id': 'mig-og-min-mor/season-6-episode-12',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Episode 12', 'title': 'Episode 12',
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90', 'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
@ -65,6 +68,30 @@ class DPlayIE(InfoExtractor):
# geo restricted, via direct unsigned hls URL # geo restricted, via direct unsigned hls URL
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/', 'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
'only_matching': True, 'only_matching': True,
}, {
# disco-api
'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
'info_dict': {
'id': '40206',
'display_id': 'i-kongens-klr/sesong-1-episode-7',
'ext': 'mp4',
'title': 'Episode 7',
'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf',
'duration': 2611.16,
'timestamp': 1516726800,
'upload_date': '20180123',
'series': 'I kongens klær',
'season_number': 1,
'episode_number': 7,
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}, {
# geo restricted, bypassable via X-Forwarded-For
'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -72,10 +99,81 @@ class DPlayIE(InfoExtractor):
display_id = mobj.group('id') display_id = mobj.group('id')
domain = mobj.group('domain') domain = mobj.group('domain')
self._initialize_geo_bypass([mobj.group('country').upper()])
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_id = self._search_regex( video_id = self._search_regex(
r'data-video-id=["\'](\d+)', webpage, 'video id') r'data-video-id=["\'](\d+)', webpage, 'video id', default=None)
if not video_id:
host = mobj.group('host')
disco_base = 'https://disco-api.%s' % host
self._download_json(
'%s/token' % disco_base, display_id, 'Downloading token',
query={
'realm': host.replace('.', ''),
})
video = self._download_json(
'%s/content/videos/%s' % (disco_base, display_id), display_id,
headers={
'Referer': url,
'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1',
}, query={
'include': 'show'
})
video_id = video['data']['id']
info = video['data']['attributes']
title = info['name']
formats = []
for format_id, format_dict in self._download_json(
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
display_id)['data']['attributes']['streaming'].items():
if not isinstance(format_dict, dict):
continue
format_url = format_dict.get('url')
if not format_url:
continue
ext = determine_ext(format_url)
if format_id == 'dash' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, display_id, mpd_id='dash', fatal=False))
elif format_id == 'hls' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, display_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False))
else:
formats.append({
'url': format_url,
'format_id': format_id,
})
self._sort_formats(formats)
series = None
try:
included = video.get('included')
if isinstance(included, list):
show = next(e for e in included if e.get('type') == 'show')
series = try_get(
show, lambda x: x['attributes']['name'], compat_str)
except StopIteration:
pass
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': info.get('description'),
'duration': float_or_none(
info.get('videoDuration'), scale=1000),
'timestamp': unified_timestamp(info.get('publishStart')),
'series': series,
'season_number': int_or_none(info.get('seasonNumber')),
'episode_number': int_or_none(info.get('episodeNumber')),
'age_limit': int_or_none(info.get('minimum_age')),
'formats': formats,
}
info = self._download_json( info = self._download_json(
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id), 'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),

View File

@ -1,10 +1,10 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_b64decode
from ..utils import ( from ..utils import (
qualities, qualities,
sanitized_Request, sanitized_Request,
@ -42,7 +42,7 @@ class DumpertIE(InfoExtractor):
r'data-files="([^"]+)"', webpage, 'data files') r'data-files="([^"]+)"', webpage, 'data files')
files = self._parse_json( files = self._parse_json(
base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'), compat_b64decode(files_base64).decode('utf-8'),
video_id) video_id)
quality = qualities(['flv', 'mobile', 'tablet', '720p']) quality = qualities(['flv', 'mobile', 'tablet', '720p'])

View File

@ -1,13 +1,13 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urlparse, compat_b64decode,
compat_str, compat_str,
compat_urlparse,
) )
from ..utils import ( from ..utils import (
extract_attributes, extract_attributes,
@ -36,9 +36,9 @@ class EinthusanIE(InfoExtractor):
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
def _decrypt(self, encrypted_data, video_id): def _decrypt(self, encrypted_data, video_id):
return self._parse_json(base64.b64decode(( return self._parse_json(compat_b64decode((
encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1] encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
).encode('ascii')).decode('utf-8'), video_id) )).decode('utf-8'), video_id)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@ -933,6 +933,10 @@ from .servingsys import ServingSysIE
from .servus import ServusIE from .servus import ServusIE
from .sevenplus import SevenPlusIE from .sevenplus import SevenPlusIE
from .sexu import SexuIE from .sexu import SexuIE
from .seznamzpravy import (
SeznamZpravyIE,
SeznamZpravyArticleIE,
)
from .shahid import ( from .shahid import (
ShahidIE, ShahidIE,
ShahidShowIE, ShahidShowIE,

View File

@ -1,8 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_b64decode
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
HEADRequest, HEADRequest,
@ -48,7 +47,7 @@ class HotNewHipHopIE(InfoExtractor):
if 'mediaKey' not in mkd: if 'mediaKey' not in mkd:
raise ExtractorError('Did not get a media key') raise ExtractorError('Did not get a media key')
redirect_url = base64.b64decode(video_url_base64).decode('utf-8') redirect_url = compat_b64decode(video_url_base64).decode('utf-8')
redirect_req = HEADRequest(redirect_url) redirect_req = HEADRequest(redirect_url)
req = self._request_webpage( req = self._request_webpage(
redirect_req, video_id, redirect_req, video_id,

View File

@ -2,9 +2,8 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
from ..compat import ( from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urlparse, compat_urlparse,
) )
@ -61,7 +60,7 @@ class InfoQIE(BokeCCBaseIE):
encoded_id = self._search_regex( encoded_id = self._search_regex(
r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None) r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None)
real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8')) real_id = compat_urllib_parse_unquote(compat_b64decode(encoded_id).decode('utf-8'))
playpath = 'mp4:' + real_id playpath = 'mp4:' + real_id
return [{ return [{

View File

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import datetime import datetime
import hashlib import hashlib
import re import re
@ -9,6 +8,7 @@ import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_b64decode,
compat_ord, compat_ord,
compat_str, compat_str,
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
@ -329,7 +329,7 @@ class LetvCloudIE(InfoExtractor):
raise ExtractorError('Letv cloud returned an unknwon error') raise ExtractorError('Letv cloud returned an unknwon error')
def b64decode(s): def b64decode(s):
return base64.b64decode(s.encode('utf-8')).decode('utf-8') return compat_b64decode(s).decode('utf-8')
formats = [] formats = []
for media in play_json['data']['video_info']['media'].values(): for media in play_json['data']['video_info']['media'].values():

View File

@ -1,13 +1,12 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote from ..compat import (
from ..utils import ( compat_b64decode,
int_or_none, compat_urllib_parse_unquote,
) )
from ..utils import int_or_none
class MangomoloBaseIE(InfoExtractor): class MangomoloBaseIE(InfoExtractor):
@ -51,4 +50,4 @@ class MangomoloLiveIE(MangomoloBaseIE):
_IS_LIVE = True _IS_LIVE = True
def _get_real_id(self, page_id): def _get_real_id(self, page_id):
return base64.b64decode(compat_urllib_parse_unquote(page_id).encode()).decode() return compat_b64decode(compat_urllib_parse_unquote(page_id)).decode()

View File

@ -1,12 +1,12 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import functools import functools
import itertools import itertools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_b64decode,
compat_chr, compat_chr,
compat_ord, compat_ord,
compat_str, compat_str,
@ -79,7 +79,7 @@ class MixcloudIE(InfoExtractor):
if encrypted_play_info is not None: if encrypted_play_info is not None:
# Decode # Decode
encrypted_play_info = base64.b64decode(encrypted_play_info) encrypted_play_info = compat_b64decode(encrypted_play_info)
else: else:
# New path # New path
full_info_json = self._parse_json(self._html_search_regex( full_info_json = self._parse_json(self._html_search_regex(
@ -109,7 +109,7 @@ class MixcloudIE(InfoExtractor):
kpa_target = encrypted_play_info kpa_target = encrypted_play_info
else: else:
kps = ['https://', 'http://'] kps = ['https://', 'http://']
kpa_target = base64.b64decode(info_json['streamInfo']['url']) kpa_target = compat_b64decode(info_json['streamInfo']['url'])
for kp in kps: for kp in kps:
partial_key = self._decrypt_xor_cipher(kpa_target, kp) partial_key = self._decrypt_xor_cipher(kpa_target, kp)
for quote in ["'", '"']: for quote in ["'", '"']:
@ -165,7 +165,7 @@ class MixcloudIE(InfoExtractor):
format_url = stream_info.get(url_key) format_url = stream_info.get(url_key)
if not format_url: if not format_url:
continue continue
decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url)) decrypted = self._decrypt_xor_cipher(key, compat_b64decode(format_url))
if not decrypted: if not decrypted:
continue continue
if url_key == 'hlsUrl': if url_key == 'hlsUrl':

View File

@ -1,9 +1,13 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import (
compat_b64decode,
compat_str,
compat_urllib_parse_urlencode,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
@ -12,7 +16,6 @@ from ..utils import (
try_get, try_get,
unsmuggle_url, unsmuggle_url,
) )
from ..compat import compat_urllib_parse_urlencode
class OoyalaBaseIE(InfoExtractor): class OoyalaBaseIE(InfoExtractor):
@ -44,7 +47,7 @@ class OoyalaBaseIE(InfoExtractor):
url_data = try_get(stream, lambda x: x['url']['data'], compat_str) url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
if not url_data: if not url_data:
continue continue
s_url = base64.b64decode(url_data.encode('ascii')).decode('utf-8') s_url = compat_b64decode(url_data).decode('utf-8')
if not s_url or s_url in urls: if not s_url or s_url in urls:
continue continue
urls.append(s_url) urls.append(s_url)

View File

@ -345,6 +345,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
r'clip[iI]d\s*=\s*["\'](\d+)', r'clip[iI]d\s*=\s*["\'](\d+)',
r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
r'proMamsId&quot;\s*:\s*&quot;(\d+)', r'proMamsId&quot;\s*:\s*&quot;(\d+)',
r'proMamsId"\s*:\s*"(\d+)',
] ]
_TITLE_REGEXES = [ _TITLE_REGEXES = [
r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',

View File

@ -1,12 +1,12 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_cbc_decrypt from ..aes import aes_cbc_decrypt
from ..compat import ( from ..compat import (
compat_b64decode,
compat_ord, compat_ord,
compat_str, compat_str,
) )
@ -142,11 +142,11 @@ class RTL2YouIE(RTL2YouBaseIE):
stream_data = self._download_json( stream_data = self._download_json(
self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id) self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)
data, iv = base64.b64decode(stream_data['streamUrl']).decode().split(':') data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':')
stream_url = intlist_to_bytes(aes_cbc_decrypt( stream_url = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(base64.b64decode(data)), bytes_to_intlist(compat_b64decode(data)),
bytes_to_intlist(self._AES_KEY), bytes_to_intlist(self._AES_KEY),
bytes_to_intlist(base64.b64decode(iv)) bytes_to_intlist(compat_b64decode(iv))
)) ))
if b'rtl2_you_video_not_found' in stream_url: if b'rtl2_you_video_not_found' in stream_url:
raise ExtractorError('video not found', expected=True) raise ExtractorError('video not found', expected=True)

View File

@ -7,6 +7,7 @@ import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_b64decode,
compat_struct_unpack, compat_struct_unpack,
) )
from ..utils import ( from ..utils import (
@ -21,7 +22,7 @@ from ..utils import (
def _decrypt_url(png): def _decrypt_url(png):
encrypted_data = base64.b64decode(png.encode('utf-8')) encrypted_data = compat_b64decode(png)
text_index = encrypted_data.find(b'tEXt') text_index = encrypted_data.find(b'tEXt')
text_chunk = encrypted_data[text_index - 4:] text_chunk = encrypted_data[text_index - 4:]
length = compat_struct_unpack('!I', text_chunk[:4])[0] length = compat_struct_unpack('!I', text_chunk[:4])[0]

View File

@ -0,0 +1,170 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_str,
compat_urllib_parse_urlparse,
)
from ..utils import (
urljoin,
int_or_none,
parse_codecs,
try_get,
)
def _raw_id(src_url):
return compat_urllib_parse_urlparse(src_url).path.split('/')[-1]
class SeznamZpravyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?seznamzpravy\.cz/iframe/player\?.*\bsrc='
_TESTS = [{
'url': 'https://www.seznamzpravy.cz/iframe/player?duration=241&serviceSlug=zpravy&src=https%3A%2F%2Fv39-a.sdn.szn.cz%2Fv_39%2Fvmd%2F5999c902ea707c67d8e267a9%3Ffl%3Dmdk%2C432f65a0%7C&itemType=video&autoPlay=false&title=Sv%C4%9Bt%20bez%20obalu%3A%20%C4%8Ce%C5%A1t%C3%AD%20voj%C3%A1ci%20na%20mis%C3%ADch%20(kr%C3%A1tk%C3%A1%20verze)&series=Sv%C4%9Bt%20bez%20obalu&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_F_I%2FR5puJ.jpeg%3Ffl%3Dcro%2C0%2C0%2C1920%2C1080%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=1920&height=1080&cutFrom=0&cutTo=0&splVersion=VOD&contentId=170889&contextId=35990&showAdvert=true&collocation=&autoplayPossible=true&embed=&isVideoTooShortForPreroll=false&isVideoTooLongForPostroll=true&videoCommentOpKey=&videoCommentId=&version=4.0.76&dotService=zpravy&gemiusPrismIdentifier=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b.Z7&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5&sectionPrefixPreroll=%2Fzpravy',
'info_dict': {
'id': '170889',
'ext': 'mp4',
'title': 'Svět bez obalu: Čeští vojáci na misích (krátká verze)',
'thumbnail': r're:^https?://.*\.jpe?g',
'duration': 241,
'series': 'Svět bez obalu',
},
'params': {
'skip_download': True,
},
}, {
# with Location key
'url': 'https://www.seznamzpravy.cz/iframe/player?duration=null&serviceSlug=zpravy&src=https%3A%2F%2Flive-a.sdn.szn.cz%2Fv_39%2F59e468fe454f8472a96af9fa%3Ffl%3Dmdk%2C5c1e2840%7C&itemType=livevod&autoPlay=false&title=P%C5%99edseda%20KDU-%C4%8CSL%20Pavel%20B%C4%9Blobr%C3%A1dek%20ve%20volebn%C3%AD%20V%C3%BDzv%C4%9B%20Seznamu&series=V%C3%BDzva&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_G_J%2FjTBCs.jpeg%3Ffl%3Dcro%2C0%2C0%2C1280%2C720%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=16&height=9&cutFrom=0&cutTo=0&splVersion=VOD&contentId=185688&contextId=38489&showAdvert=true&collocation=&hideFullScreen=false&hideSubtitles=false&embed=&isVideoTooShortForPreroll=false&isVideoTooShortForPreroll2=false&isVideoTooLongForPostroll=false&fakePostrollZoneID=seznam.clanky.zpravy.preroll&fakePrerollZoneID=seznam.clanky.zpravy.preroll&videoCommentId=&trim=default_16x9&noPrerollVideoLength=30&noPreroll2VideoLength=undefined&noMidrollVideoLength=0&noPostrollVideoLength=999999&autoplayPossible=true&version=5.0.41&dotService=zpravy&gemiusPrismIdentifier=zD3g7byfW5ekpXmxTVLaq5Srjw5i4hsYo0HY1aBwIe..27&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5&sectionPrefixPreroll=%2Fzpravy%2Fvyzva&zoneIdPostroll=seznam.pack.videospot&skipOffsetPostroll=5&sectionPrefixPostroll=%2Fzpravy%2Fvyzva&regression=false',
'info_dict': {
'id': '185688',
'ext': 'mp4',
'title': 'Předseda KDU-ČSL Pavel Bělobrádek ve volební Výzvě Seznamu',
'thumbnail': r're:^https?://.*\.jpe?g',
'series': 'Výzva',
},
'params': {
'skip_download': True,
},
}]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url') for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1',
webpage)]
def _extract_sdn_formats(self, sdn_url, video_id):
sdn_data = self._download_json(sdn_url, video_id)
if sdn_data.get('Location'):
sdn_url = sdn_data['Location']
sdn_data = self._download_json(sdn_url, video_id)
formats = []
mp4_formats = try_get(sdn_data, lambda x: x['data']['mp4'], dict) or {}
for format_id, format_data in mp4_formats.items():
relative_url = format_data.get('url')
if not relative_url:
continue
try:
width, height = format_data.get('resolution')
except (TypeError, ValueError):
width, height = None, None
f = {
'url': urljoin(sdn_url, relative_url),
'format_id': 'http-%s' % format_id,
'tbr': int_or_none(format_data.get('bandwidth'), scale=1000),
'width': int_or_none(width),
'height': int_or_none(height),
}
f.update(parse_codecs(format_data.get('codec')))
formats.append(f)
pls = sdn_data.get('pls', {})
def get_url(format_id):
return try_get(pls, lambda x: x[format_id]['url'], compat_str)
dash_rel_url = get_url('dash')
if dash_rel_url:
formats.extend(self._extract_mpd_formats(
urljoin(sdn_url, dash_rel_url), video_id, mpd_id='dash',
fatal=False))
hls_rel_url = get_url('hls')
if hls_rel_url:
formats.extend(self._extract_m3u8_formats(
urljoin(sdn_url, hls_rel_url), video_id, ext='mp4',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
return formats
def _real_extract(self, url):
params = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
src = params['src'][0]
title = params['title'][0]
video_id = params.get('contentId', [_raw_id(src)])[0]
formats = self._extract_sdn_formats(src + 'spl2,2,VOD', video_id)
duration = int_or_none(params.get('duration', [None])[0])
series = params.get('series', [None])[0]
thumbnail = params.get('poster', [None])[0]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'series': series,
'formats': formats,
}
class SeznamZpravyArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:seznam\.cz/zpravy|seznamzpravy\.cz)/clanek/(?:[^/?#&]+)-(?P<id>\d+)'
_API_URL = 'https://apizpravy.seznam.cz/'
_TESTS = [{
# two videos on one page, with SDN URL
'url': 'https://www.seznamzpravy.cz/clanek/jejich-svet-na-nas-utoci-je-lepsi-branit-se-na-jejich-pisecku-rika-reziser-a-major-v-zaloze-marhoul-35990',
'info_dict': {
'id': '35990',
'title': 'md5:6011c877a36905f28f271fcd8dcdb0f2',
'description': 'md5:933f7b06fa337a814ba199d3596d27ba',
},
'playlist_count': 2,
}, {
# video with live stream URL
'url': 'https://www.seznam.cz/zpravy/clanek/znovu-do-vlady-s-ano-pavel-belobradek-ve-volebnim-specialu-seznamu-38489',
'info_dict': {
'id': '38489',
'title': 'md5:8fa1afdc36fd378cf0eba2b74c5aca60',
'description': 'md5:428e7926a1a81986ec7eb23078004fb4',
},
'playlist_count': 1,
}]
def _real_extract(self, url):
article_id = self._match_id(url)
webpage = self._download_webpage(url, article_id)
info = self._search_json_ld(webpage, article_id, default={})
print(info)
title = info.get('title') or self._og_search_title(webpage, fatal=False)
description = info.get('description') or self._og_search_description(webpage)
return self.playlist_result([
self.url_result(url, ie=SeznamZpravyIE.ie_key())
for url in SeznamZpravyIE._extract_urls(webpage)],
article_id, title, description)

View File

@ -1,8 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_b64decode
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -22,8 +21,8 @@ class SharedBaseIE(InfoExtractor):
video_url = self._extract_video_url(webpage, video_id, url) video_url = self._extract_video_url(webpage, video_id, url)
title = base64.b64decode(self._html_search_meta( title = compat_b64decode(self._html_search_meta(
'full:title', webpage, 'title').encode('utf-8')).decode('utf-8') 'full:title', webpage, 'title')).decode('utf-8')
filesize = int_or_none(self._html_search_meta( filesize = int_or_none(self._html_search_meta(
'full:size', webpage, 'file size', fatal=False)) 'full:size', webpage, 'file size', fatal=False))
@ -92,5 +91,4 @@ class VivoIE(SharedBaseIE):
r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'stream', group='url'), webpage, 'stream', group='url'),
video_id, video_id,
transform_source=lambda x: base64.b64decode( transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0]
x.encode('ascii')).decode('utf-8'))[0]

View File

@ -58,7 +58,7 @@ class TBSIE(TurnerBaseIE):
continue continue
if stream_data.get('playlistProtection') == 'spe': if stream_data.get('playlistProtection') == 'spe':
m3u8_url = self._add_akamai_spe_token( m3u8_url = self._add_akamai_spe_token(
'http://www.%s.com/service/token_spe' % site, 'http://token.vgtf.net/token/token_spe',
m3u8_url, media_id, { m3u8_url, media_id, {
'url': url, 'url': url,
'site_name': site[:3].upper(), 'site_name': site[:3].upper(),

View File

@ -5,8 +5,9 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
qualities,
determine_ext, determine_ext,
ExtractorError,
qualities,
) )
@ -17,6 +18,7 @@ class TeacherTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# flowplayer
'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
'md5': 'f9434ef992fd65936d72999951ee254c', 'md5': 'f9434ef992fd65936d72999951ee254c',
'info_dict': { 'info_dict': {
@ -24,19 +26,10 @@ class TeacherTubeIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Measures of dispersion from a frequency table', 'title': 'Measures of dispersion from a frequency table',
'description': 'Measures of dispersion from a frequency table', 'description': 'Measures of dispersion from a frequency table',
'thumbnail': r're:http://.*\.jpg', 'thumbnail': r're:https?://.*\.(?:jpg|png)',
},
}, {
'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064',
'md5': '0d625ec6bc9bf50f70170942ad580676',
'info_dict': {
'id': '340064',
'ext': 'mp4',
'title': 'How to Make Paper Dolls _ Paper Art Projects',
'description': 'Learn how to make paper dolls in this simple',
'thumbnail': r're:http://.*\.jpg',
}, },
}, { }, {
# jwplayer
'url': 'http://www.teachertube.com/music.php?music_id=8805', 'url': 'http://www.teachertube.com/music.php?music_id=8805',
'md5': '01e8352006c65757caf7b961f6050e21', 'md5': '01e8352006c65757caf7b961f6050e21',
'info_dict': { 'info_dict': {
@ -46,20 +39,21 @@ class TeacherTubeIE(InfoExtractor):
'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P', 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P',
}, },
}, { }, {
# unavailable video
'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790', 'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790',
'md5': '9c79fbb2dd7154823996fc28d4a26998', 'only_matching': True,
'info_dict': {
'id': '297790',
'ext': 'mp4',
'title': 'Intro Video - Schleicher',
'description': 'Intro Video - Why to flip, how flipping will',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
error = self._search_regex(
r'<div\b[^>]+\bclass=["\']msgBox error[^>]+>([^<]+)', webpage,
'error', default=None)
if error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
title = self._html_search_meta('title', webpage, 'title', fatal=True) title = self._html_search_meta('title', webpage, 'title', fatal=True)
TITLE_SUFFIX = ' - TeacherTube' TITLE_SUFFIX = ' - TeacherTube'
if title.endswith(TITLE_SUFFIX): if title.endswith(TITLE_SUFFIX):
@ -84,12 +78,16 @@ class TeacherTubeIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._html_search_meta(
'thumbnail', webpage)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'),
'formats': formats,
'description': description, 'description': description,
'thumbnail': thumbnail,
'formats': formats,
} }

View File

@ -1,18 +1,20 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import binascii import binascii
import re import re
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_ord,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
qualities, qualities,
determine_ext, determine_ext,
) )
from ..compat import compat_ord
class TeamcocoIE(InfoExtractor): class TeamcocoIE(InfoExtractor):
@ -97,7 +99,7 @@ class TeamcocoIE(InfoExtractor):
for i in range(len(cur_fragments)): for i in range(len(cur_fragments)):
cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii') cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii')
try: try:
raw_data = base64.b64decode(cur_sequence) raw_data = compat_b64decode(cur_sequence)
if compat_ord(raw_data[0]) == compat_ord('{'): if compat_ord(raw_data[0]) == compat_ord('{'):
return json.loads(raw_data.decode('utf-8')) return json.loads(raw_data.decode('utf-8'))
except (TypeError, binascii.Error, UnicodeDecodeError, ValueError): except (TypeError, binascii.Error, UnicodeDecodeError, ValueError):

View File

@ -1,9 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_parse_qs from ..compat import (
compat_b64decode,
compat_parse_qs,
)
class TutvIE(InfoExtractor): class TutvIE(InfoExtractor):
@ -26,7 +27,7 @@ class TutvIE(InfoExtractor):
data_content = self._download_webpage( data_content = self._download_webpage(
'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info') 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info')
video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0].encode('utf-8')).decode('utf-8') video_url = compat_b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8')
return { return {
'id': internal_id, 'id': internal_id,

View File

@ -1596,6 +1596,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if 'token' not in video_info: if 'token' not in video_info:
video_info = get_video_info video_info = get_video_info
break break
def extract_unavailable_message():
return self._html_search_regex(
r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
video_webpage, 'unavailable message', default=None)
if 'token' not in video_info: if 'token' not in video_info:
if 'reason' in video_info: if 'reason' in video_info:
if 'The uploader has not made this video available in your country.' in video_info['reason']: if 'The uploader has not made this video available in your country.' in video_info['reason']:
@ -1604,8 +1610,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
countries = regions_allowed.split(',') if regions_allowed else None countries = regions_allowed.split(',') if regions_allowed else None
self.raise_geo_restricted( self.raise_geo_restricted(
msg=video_info['reason'][0], countries=countries) msg=video_info['reason'][0], countries=countries)
reason = video_info['reason'][0]
if 'Invalid parameters' in reason:
unavailable_message = extract_unavailable_message()
if unavailable_message:
reason = unavailable_message
raise ExtractorError( raise ExtractorError(
'YouTube said: %s' % video_info['reason'][0], 'YouTube said: %s' % reason,
expected=True, video_id=video_id) expected=True, video_id=video_id)
else: else:
raise ExtractorError( raise ExtractorError(
@ -1953,9 +1964,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
formats.append(a_format) formats.append(a_format)
else: else:
unavailable_message = self._html_search_regex( unavailable_message = extract_unavailable_message()
r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
video_webpage, 'unavailable message', default=None)
if unavailable_message: if unavailable_message:
raise ExtractorError(unavailable_message, expected=True) raise ExtractorError(unavailable_message, expected=True)
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')

View File

@ -866,8 +866,8 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
# expected HTTP responses to meet HTTP/1.0 or later (see also # expected HTTP responses to meet HTTP/1.0 or later (see also
# https://github.com/rg3/youtube-dl/issues/6727) # https://github.com/rg3/youtube-dl/issues/6727)
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
kwargs[b'strict'] = True kwargs['strict'] = True
hc = http_class(*args, **kwargs) hc = http_class(*args, **compat_kwargs(kwargs))
source_address = ydl_handler._params.get('source_address') source_address = ydl_handler._params.get('source_address')
if source_address is not None: if source_address is not None:
sa = (source_address, 0) sa = (source_address, 0)

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2018.01.21' __version__ = '2018.01.27'