diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 145c3ff83..c5eff009c 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.01.21*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.01.21** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.01.27*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.01.27** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2018.01.21 +[debug] youtube-dl version 2018.01.27 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 65a01fcc7..00c5c9c6b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +version 2018.01.27 + +Core +* [extractor/common] Improve _json_ld for articles +* Switch codebase to use compat_b64decode ++ [compat] Add compat_b64decode + +Extractors ++ [seznamzpravy] Add support for seznam.cz and seznamzpravy.cz (#14102, #14616) +* [dplay] Bypass geo restriction ++ [dplay] Add support for disco-api videos (#15396) +* [youtube] Extract precise error messages (#15284) +* [teachertube] Capture and output error message +* [teachertube] Fix and relax thumbnail extraction (#15403) ++ [prosiebensat1] Add another clip id regular expression (#15378) +* [tbs] Update tokenizer url (#15395) +* [mixcloud] Use compat_b64decode (#15394) +- [thesixtyone] Remove extractor (#15341) + + version 2018.01.21 Core diff --git a/docs/supportedsites.md b/docs/supportedsites.md index b0825c58b..c15b5eec5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -732,6 +732,8 @@ - **ServingSys** - **Servus** - **Sexu** + - **SeznamZpravy** + - **SeznamZpravyArticle** - **Shahid** - **ShahidShow** - **Shared**: shared.sx @@ -822,7 +824,6 @@ - **ThePlatform** - **ThePlatformFeed** - **TheScene** - - **TheSixtyOne** - **TheStar** - **TheSun** - **TheWeatherChannel** diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index c5bb3c4ef..461bb6d41 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -1,8 +1,8 @@ from __future__ import unicode_literals -import base64 from math import ceil +from .compat import compat_b64decode from .utils import bytes_to_intlist, intlist_to_bytes BLOCK_SIZE_BYTES = 16 @@ -180,7 +180,7 @@ def aes_decrypt_text(data, password, key_size_bytes): """ NONCE_LENGTH_BYTES = 8 - data = bytes_to_intlist(base64.b64decode(data.encode('utf-8'))) + data = bytes_to_intlist(compat_b64decode(data)) password = bytes_to_intlist(password.encode('utf-8')) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 41ca9adf1..27ece2d29 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import base64 import binascii import collections import ctypes @@ -2896,9 +2897,24 @@ except TypeError: if isinstance(spec, compat_str): spec = spec.encode('ascii') return struct.unpack(spec, *args) + + class compat_Struct(struct.Struct): + def __init__(self, fmt): + if isinstance(fmt, compat_str): + fmt = fmt.encode('ascii') + super(compat_Struct, self).__init__(fmt) else: compat_struct_pack = struct.pack compat_struct_unpack = struct.unpack + if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8): + class compat_Struct(struct.Struct): + def unpack(self, string): + if not isinstance(string, buffer): + string = buffer(string) + return super(compat_Struct, self).unpack(string) + else: + compat_Struct = struct.Struct + try: from future_builtins import zip as compat_zip @@ -2908,6 +2924,16 @@ except ImportError: # not 2.6+ or is 3.x except ImportError: compat_zip = zip + +if sys.version_info < (3, 3): + def compat_b64decode(s, *args, **kwargs): + if isinstance(s, compat_str): + s = s.encode('ascii') + return base64.b64decode(s, *args, **kwargs) +else: + compat_b64decode = base64.b64decode + + if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0): # PyPy2 prior to version 5.4.0 expects byte strings as Windows function # names, see the original PyPy issue [1] and the youtube-dl one [2]. @@ -2930,6 +2956,8 @@ __all__ = [ 'compat_HTMLParseError', 'compat_HTMLParser', 'compat_HTTPError', + 'compat_Struct', + 'compat_b64decode', 'compat_basestring', 'compat_chr', 'compat_cookiejar', diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index fdb80f42a..15e71be9a 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -1,12 +1,12 @@ from __future__ import division, unicode_literals -import base64 import io import itertools import time from .fragment import FragmentFD from ..compat import ( + compat_b64decode, compat_etree_fromstring, compat_urlparse, compat_urllib_error, @@ -312,7 +312,7 @@ class F4mFD(FragmentFD): boot_info = self._get_bootstrap_from_url(bootstrap_url) else: bootstrap_url = None - bootstrap = base64.b64decode(node.text.encode('ascii')) + bootstrap = compat_b64decode(node.text) boot_info = read_bootstrap_info(bootstrap) return boot_info, bootstrap_url @@ -349,7 +349,7 @@ class F4mFD(FragmentFD): live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: - metadata = base64.b64decode(metadata_node.text.encode('ascii')) + metadata = compat_b64decode(metadata_node.text) else: metadata = None diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 9b001ecff..063fcf444 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -1,25 +1,27 @@ from __future__ import unicode_literals import time -import struct import binascii import io from .fragment import FragmentFD -from ..compat import compat_urllib_error +from ..compat import ( + compat_Struct, + compat_urllib_error, +) -u8 = struct.Struct(b'>B') -u88 = struct.Struct(b'>Bx') -u16 = struct.Struct(b'>H') -u1616 = struct.Struct(b'>Hxx') -u32 = struct.Struct(b'>I') -u64 = struct.Struct(b'>Q') +u8 = compat_Struct('>B') +u88 = compat_Struct('>Bx') +u16 = compat_Struct('>H') +u1616 = compat_Struct('>Hxx') +u32 = compat_Struct('>I') +u64 = compat_Struct('>Q') -s88 = struct.Struct(b'>bx') -s16 = struct.Struct(b'>h') -s1616 = struct.Struct(b'>hxx') -s32 = struct.Struct(b'>i') +s88 = compat_Struct('>bx') +s16 = compat_Struct('>h') +s1616 = compat_Struct('>hxx') +s32 = compat_Struct('>i') unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) @@ -139,7 +141,7 @@ def write_piff_header(stream, params): sample_entry_payload += u16.pack(0x18) # depth sample_entry_payload += s16.pack(-1) # pre defined - codec_private_data = binascii.unhexlify(params['codec_private_data']) + codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8')) if fourcc in ('H264', 'AVC1'): sps, pps = codec_private_data.split(u32.pack(1))[1:] avcc_payload = u8.pack(1) # configuration version diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index cffdab6ca..64fb755da 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -1,13 +1,15 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import json import os from .common import InfoExtractor from ..aes import aes_cbc_decrypt -from ..compat import compat_ord +from ..compat import ( + compat_b64decode, + compat_ord, +) from ..utils import ( bytes_to_intlist, ExtractorError, @@ -48,9 +50,9 @@ class ADNIE(InfoExtractor): # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( - bytes_to_intlist(base64.b64decode(enc_subtitles[24:])), + bytes_to_intlist(compat_b64decode(enc_subtitles[24:])), bytes_to_intlist(b'\x1b\xe0\x29\x61\x38\x94\x24\x00\x12\xbd\xc5\x80\xac\xce\xbe\xb0'), - bytes_to_intlist(base64.b64decode(enc_subtitles[:24])) + bytes_to_intlist(compat_b64decode(enc_subtitles[:24])) )) subtitles_json = self._parse_json( dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(), diff --git a/youtube_dl/extractor/bigflix.py b/youtube_dl/extractor/bigflix.py index b4ce767af..28e3e59f6 100644 --- a/youtube_dl/extractor/bigflix.py +++ b/youtube_dl/extractor/bigflix.py @@ -1,11 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import re from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote, +) class BigflixIE(InfoExtractor): @@ -39,8 +41,8 @@ class BigflixIE(InfoExtractor): webpage, 'title') def decode_url(quoted_b64_url): - return base64.b64decode(compat_urllib_parse_unquote( - quoted_b64_url).encode('ascii')).decode('utf-8') + return compat_b64decode(compat_urllib_parse_unquote( + quoted_b64_url)).decode('utf-8') formats = [] for height, encoded_url in re.findall( diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py index d4769da75..5aac21299 100644 --- a/youtube_dl/extractor/chilloutzone.py +++ b/youtube_dl/extractor/chilloutzone.py @@ -1,11 +1,11 @@ from __future__ import unicode_literals import re -import base64 import json from .common import InfoExtractor from .youtube import YoutubeIE +from ..compat import compat_b64decode from ..utils import ( clean_html, ExtractorError @@ -58,7 +58,7 @@ class ChilloutzoneIE(InfoExtractor): base64_video_info = self._html_search_regex( r'var cozVidData = "(.+?)";', webpage, 'video data') - decoded_video_info = base64.b64decode(base64_video_info.encode('utf-8')).decode('utf-8') + decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8') video_info_dict = json.loads(decoded_video_info) # get video information from dict diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py index 4815b34be..8d75cdf19 100644 --- a/youtube_dl/extractor/chirbit.py +++ b/youtube_dl/extractor/chirbit.py @@ -1,10 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import re from .common import InfoExtractor +from ..compat import compat_b64decode from ..utils import parse_duration @@ -44,8 +44,7 @@ class ChirbitIE(InfoExtractor): # Reverse engineered from https://chirb.it/js/chirbit.player.js (look # for soundURL) - audio_url = base64.b64decode( - data_fd[::-1].encode('ascii')).decode('utf-8') + audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8') title = self._search_regex( r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 92da0ce3c..1aad00aea 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1043,7 +1043,7 @@ class InfoExtractor(object): part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries') if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'): info['series'] = unescapeHTML(part_of_series.get('name')) - elif item_type == 'Article': + elif item_type in ('Article', 'NewsArticle'): info.update({ 'timestamp': parse_iso8601(e.get('datePublished')), 'title': unescapeHTML(e.get('headline')), diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index b92f25447..3efdc8c21 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -3,13 +3,13 @@ from __future__ import unicode_literals import re import json -import base64 import zlib from hashlib import sha1 from math import pow, sqrt, floor from .common import InfoExtractor from ..compat import ( + compat_b64decode, compat_etree_fromstring, compat_urllib_parse_urlencode, compat_urllib_request, @@ -272,8 +272,8 @@ class CrunchyrollIE(CrunchyrollBaseIE): } def _decrypt_subtitles(self, data, iv, id): - data = bytes_to_intlist(base64.b64decode(data.encode('utf-8'))) - iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8'))) + data = bytes_to_intlist(compat_b64decode(data)) + iv = bytes_to_intlist(compat_b64decode(iv)) id = int(id) def obfuscate_key_aux(count, modulo, start): diff --git a/youtube_dl/extractor/daisuki.py b/youtube_dl/extractor/daisuki.py index 5c9ac68a0..dbc1aa5d4 100644 --- a/youtube_dl/extractor/daisuki.py +++ b/youtube_dl/extractor/daisuki.py @@ -10,6 +10,7 @@ from ..aes import ( aes_cbc_decrypt, aes_cbc_encrypt, ) +from ..compat import compat_b64decode from ..utils import ( bytes_to_intlist, bytes_to_long, @@ -93,7 +94,7 @@ class DaisukiMottoIE(InfoExtractor): rtn = self._parse_json( intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist( - base64.b64decode(encrypted_rtn)), + compat_b64decode(encrypted_rtn)), aes_key, iv)).decode('utf-8').rstrip('\0'), video_id) diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py index 76e784105..a08dace43 100644 --- a/youtube_dl/extractor/dplay.py +++ b/youtube_dl/extractor/dplay.py @@ -12,25 +12,28 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + determine_ext, ExtractorError, + float_or_none, int_or_none, remove_end, try_get, unified_strdate, + unified_timestamp, update_url_query, USER_AGENTS, ) class DPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?Pwww\.dplay\.(?:dk|se|no))/[^/]+/(?P[^/?#]+)' + _VALID_URL = r'https?://(?Pwww\.(?Pdplay\.(?Pdk|se|no)))/(?:videoer/)?(?P[^/]+/[^/?#]+)' _TESTS = [{ # non geo restricted, via secure api, unsigned download hls URL 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', 'info_dict': { 'id': '3172', - 'display_id': 'season-1-svensken-lar-sig-njuta-av-livet', + 'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet', 'ext': 'mp4', 'title': 'Svensken lär sig njuta av livet', 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8', @@ -48,7 +51,7 @@ class DPlayIE(InfoExtractor): 'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/', 'info_dict': { 'id': '70816', - 'display_id': 'season-6-episode-12', + 'display_id': 'mig-og-min-mor/season-6-episode-12', 'ext': 'mp4', 'title': 'Episode 12', 'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90', @@ -65,6 +68,30 @@ class DPlayIE(InfoExtractor): # geo restricted, via direct unsigned hls URL 'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/', 'only_matching': True, + }, { + # disco-api + 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7', + 'info_dict': { + 'id': '40206', + 'display_id': 'i-kongens-klr/sesong-1-episode-7', + 'ext': 'mp4', + 'title': 'Episode 7', + 'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf', + 'duration': 2611.16, + 'timestamp': 1516726800, + 'upload_date': '20180123', + 'series': 'I kongens klær', + 'season_number': 1, + 'episode_number': 7, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, { + # geo restricted, bypassable via X-Forwarded-For + 'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3', + 'only_matching': True, }] def _real_extract(self, url): @@ -72,10 +99,81 @@ class DPlayIE(InfoExtractor): display_id = mobj.group('id') domain = mobj.group('domain') + self._initialize_geo_bypass([mobj.group('country').upper()]) + webpage = self._download_webpage(url, display_id) video_id = self._search_regex( - r'data-video-id=["\'](\d+)', webpage, 'video id') + r'data-video-id=["\'](\d+)', webpage, 'video id', default=None) + + if not video_id: + host = mobj.group('host') + disco_base = 'https://disco-api.%s' % host + self._download_json( + '%s/token' % disco_base, display_id, 'Downloading token', + query={ + 'realm': host.replace('.', ''), + }) + video = self._download_json( + '%s/content/videos/%s' % (disco_base, display_id), display_id, + headers={ + 'Referer': url, + 'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1', + }, query={ + 'include': 'show' + }) + video_id = video['data']['id'] + info = video['data']['attributes'] + title = info['name'] + formats = [] + for format_id, format_dict in self._download_json( + '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id), + display_id)['data']['attributes']['streaming'].items(): + if not isinstance(format_dict, dict): + continue + format_url = format_dict.get('url') + if not format_url: + continue + ext = determine_ext(format_url) + if format_id == 'dash' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, display_id, mpd_id='dash', fatal=False)) + elif format_id == 'hls' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) + self._sort_formats(formats) + + series = None + try: + included = video.get('included') + if isinstance(included, list): + show = next(e for e in included if e.get('type') == 'show') + series = try_get( + show, lambda x: x['attributes']['name'], compat_str) + except StopIteration: + pass + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': info.get('description'), + 'duration': float_or_none( + info.get('videoDuration'), scale=1000), + 'timestamp': unified_timestamp(info.get('publishStart')), + 'series': series, + 'season_number': int_or_none(info.get('seasonNumber')), + 'episode_number': int_or_none(info.get('episodeNumber')), + 'age_limit': int_or_none(info.get('minimum_age')), + 'formats': formats, + } info = self._download_json( 'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id), diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py index c9fc9b5a9..be2e3d378 100644 --- a/youtube_dl/extractor/dumpert.py +++ b/youtube_dl/extractor/dumpert.py @@ -1,10 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import re from .common import InfoExtractor +from ..compat import compat_b64decode from ..utils import ( qualities, sanitized_Request, @@ -42,7 +42,7 @@ class DumpertIE(InfoExtractor): r'data-files="([^"]+)"', webpage, 'data files') files = self._parse_json( - base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'), + compat_b64decode(files_base64).decode('utf-8'), video_id) quality = qualities(['flv', 'mobile', 'tablet', '720p']) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 3f6268637..4485bf8c1 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -1,13 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import json from .common import InfoExtractor from ..compat import ( - compat_urlparse, + compat_b64decode, compat_str, + compat_urlparse, ) from ..utils import ( extract_attributes, @@ -36,9 +36,9 @@ class EinthusanIE(InfoExtractor): # reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js def _decrypt(self, encrypted_data, video_id): - return self._parse_json(base64.b64decode(( + return self._parse_json(compat_b64decode(( encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1] - ).encode('ascii')).decode('utf-8'), video_id) + )).decode('utf-8'), video_id) def _real_extract(self, url): video_id = self._match_id(url) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 57e74ba62..b442256fe 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -933,6 +933,10 @@ from .servingsys import ServingSysIE from .servus import ServusIE from .sevenplus import SevenPlusIE from .sexu import SexuIE +from .seznamzpravy import ( + SeznamZpravyIE, + SeznamZpravyArticleIE, +) from .shahid import ( ShahidIE, ShahidShowIE, diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index 34163725f..4703e1894 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -1,8 +1,7 @@ from __future__ import unicode_literals -import base64 - from .common import InfoExtractor +from ..compat import compat_b64decode from ..utils import ( ExtractorError, HEADRequest, @@ -48,7 +47,7 @@ class HotNewHipHopIE(InfoExtractor): if 'mediaKey' not in mkd: raise ExtractorError('Did not get a media key') - redirect_url = base64.b64decode(video_url_base64).decode('utf-8') + redirect_url = compat_b64decode(video_url_base64).decode('utf-8') redirect_req = HEADRequest(redirect_url) req = self._request_webpage( redirect_req, video_id, diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index c3e892feb..391c2f5d0 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -2,9 +2,8 @@ from __future__ import unicode_literals -import base64 - from ..compat import ( + compat_b64decode, compat_urllib_parse_unquote, compat_urlparse, ) @@ -61,7 +60,7 @@ class InfoQIE(BokeCCBaseIE): encoded_id = self._search_regex( r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None) - real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8')) + real_id = compat_urllib_parse_unquote(compat_b64decode(encoded_id).decode('utf-8')) playpath = 'mp4:' + real_id return [{ diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py index 0a07c1320..ffe10154b 100644 --- a/youtube_dl/extractor/leeco.py +++ b/youtube_dl/extractor/leeco.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import datetime import hashlib import re @@ -9,6 +8,7 @@ import time from .common import InfoExtractor from ..compat import ( + compat_b64decode, compat_ord, compat_str, compat_urllib_parse_urlencode, @@ -329,7 +329,7 @@ class LetvCloudIE(InfoExtractor): raise ExtractorError('Letv cloud returned an unknwon error') def b64decode(s): - return base64.b64decode(s.encode('utf-8')).decode('utf-8') + return compat_b64decode(s).decode('utf-8') formats = [] for media in play_json['data']['video_info']['media'].values(): diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index dbd761a67..482175a34 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -1,13 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 - from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote -from ..utils import ( - int_or_none, +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote, ) +from ..utils import int_or_none class MangomoloBaseIE(InfoExtractor): @@ -51,4 +50,4 @@ class MangomoloLiveIE(MangomoloBaseIE): _IS_LIVE = True def _get_real_id(self, page_id): - return base64.b64decode(compat_urllib_parse_unquote(page_id).encode()).decode() + return compat_b64decode(compat_urllib_parse_unquote(page_id)).decode() diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 7b2bb6e20..a56b7690f 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -1,12 +1,12 @@ from __future__ import unicode_literals -import base64 import functools import itertools import re from .common import InfoExtractor from ..compat import ( + compat_b64decode, compat_chr, compat_ord, compat_str, @@ -79,7 +79,7 @@ class MixcloudIE(InfoExtractor): if encrypted_play_info is not None: # Decode - encrypted_play_info = base64.b64decode(encrypted_play_info) + encrypted_play_info = compat_b64decode(encrypted_play_info) else: # New path full_info_json = self._parse_json(self._html_search_regex( @@ -109,7 +109,7 @@ class MixcloudIE(InfoExtractor): kpa_target = encrypted_play_info else: kps = ['https://', 'http://'] - kpa_target = base64.b64decode(info_json['streamInfo']['url']) + kpa_target = compat_b64decode(info_json['streamInfo']['url']) for kp in kps: partial_key = self._decrypt_xor_cipher(kpa_target, kp) for quote in ["'", '"']: @@ -165,7 +165,7 @@ class MixcloudIE(InfoExtractor): format_url = stream_info.get(url_key) if not format_url: continue - decrypted = self._decrypt_xor_cipher(key, base64.b64decode(format_url)) + decrypted = self._decrypt_xor_cipher(key, compat_b64decode(format_url)) if not decrypted: continue if url_key == 'hlsUrl': diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 52580baed..ad8bf03f8 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -1,9 +1,13 @@ from __future__ import unicode_literals + import re -import base64 from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_b64decode, + compat_str, + compat_urllib_parse_urlencode, +) from ..utils import ( determine_ext, ExtractorError, @@ -12,7 +16,6 @@ from ..utils import ( try_get, unsmuggle_url, ) -from ..compat import compat_urllib_parse_urlencode class OoyalaBaseIE(InfoExtractor): @@ -44,7 +47,7 @@ class OoyalaBaseIE(InfoExtractor): url_data = try_get(stream, lambda x: x['url']['data'], compat_str) if not url_data: continue - s_url = base64.b64decode(url_data.encode('ascii')).decode('utf-8') + s_url = compat_b64decode(url_data).decode('utf-8') if not s_url or s_url in urls: continue urls.append(s_url) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 7e680a728..48757fd4f 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -345,6 +345,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): r'clip[iI]d\s*=\s*["\'](\d+)', r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", r'proMamsId"\s*:\s*"(\d+)', + r'proMamsId"\s*:\s*"(\d+)', ] _TITLE_REGEXES = [ r'

\s*(.+?)

', diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py index 666e90e90..18a327d81 100644 --- a/youtube_dl/extractor/rtl2.py +++ b/youtube_dl/extractor/rtl2.py @@ -1,12 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import re from .common import InfoExtractor from ..aes import aes_cbc_decrypt from ..compat import ( + compat_b64decode, compat_ord, compat_str, ) @@ -142,11 +142,11 @@ class RTL2YouIE(RTL2YouBaseIE): stream_data = self._download_json( self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id) - data, iv = base64.b64decode(stream_data['streamUrl']).decode().split(':') + data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':') stream_url = intlist_to_bytes(aes_cbc_decrypt( - bytes_to_intlist(base64.b64decode(data)), + bytes_to_intlist(compat_b64decode(data)), bytes_to_intlist(self._AES_KEY), - bytes_to_intlist(base64.b64decode(iv)) + bytes_to_intlist(compat_b64decode(iv)) )) if b'rtl2_you_video_not_found' in stream_url: raise ExtractorError('video not found', expected=True) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index fa60ffd5e..ce9db0629 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -7,6 +7,7 @@ import time from .common import InfoExtractor from ..compat import ( + compat_b64decode, compat_struct_unpack, ) from ..utils import ( @@ -21,7 +22,7 @@ from ..utils import ( def _decrypt_url(png): - encrypted_data = base64.b64decode(png.encode('utf-8')) + encrypted_data = compat_b64decode(png) text_index = encrypted_data.find(b'tEXt') text_chunk = encrypted_data[text_index - 4:] length = compat_struct_unpack('!I', text_chunk[:4])[0] diff --git a/youtube_dl/extractor/seznamzpravy.py b/youtube_dl/extractor/seznamzpravy.py new file mode 100644 index 000000000..cf32d1e0c --- /dev/null +++ b/youtube_dl/extractor/seznamzpravy.py @@ -0,0 +1,170 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_str, + compat_urllib_parse_urlparse, +) +from ..utils import ( + urljoin, + int_or_none, + parse_codecs, + try_get, +) + + +def _raw_id(src_url): + return compat_urllib_parse_urlparse(src_url).path.split('/')[-1] + + +class SeznamZpravyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?seznamzpravy\.cz/iframe/player\?.*\bsrc=' + _TESTS = [{ + 'url': 'https://www.seznamzpravy.cz/iframe/player?duration=241&serviceSlug=zpravy&src=https%3A%2F%2Fv39-a.sdn.szn.cz%2Fv_39%2Fvmd%2F5999c902ea707c67d8e267a9%3Ffl%3Dmdk%2C432f65a0%7C&itemType=video&autoPlay=false&title=Sv%C4%9Bt%20bez%20obalu%3A%20%C4%8Ce%C5%A1t%C3%AD%20voj%C3%A1ci%20na%20mis%C3%ADch%20(kr%C3%A1tk%C3%A1%20verze)&series=Sv%C4%9Bt%20bez%20obalu&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_F_I%2FR5puJ.jpeg%3Ffl%3Dcro%2C0%2C0%2C1920%2C1080%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=1920&height=1080&cutFrom=0&cutTo=0&splVersion=VOD&contentId=170889&contextId=35990&showAdvert=true&collocation=&autoplayPossible=true&embed=&isVideoTooShortForPreroll=false&isVideoTooLongForPostroll=true&videoCommentOpKey=&videoCommentId=&version=4.0.76&dotService=zpravy&gemiusPrismIdentifier=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b.Z7&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5§ionPrefixPreroll=%2Fzpravy', + 'info_dict': { + 'id': '170889', + 'ext': 'mp4', + 'title': 'Svět bez obalu: Čeští vojáci na misích (krátká verze)', + 'thumbnail': r're:^https?://.*\.jpe?g', + 'duration': 241, + 'series': 'Svět bez obalu', + }, + 'params': { + 'skip_download': True, + }, + }, { + # with Location key + 'url': 'https://www.seznamzpravy.cz/iframe/player?duration=null&serviceSlug=zpravy&src=https%3A%2F%2Flive-a.sdn.szn.cz%2Fv_39%2F59e468fe454f8472a96af9fa%3Ffl%3Dmdk%2C5c1e2840%7C&itemType=livevod&autoPlay=false&title=P%C5%99edseda%20KDU-%C4%8CSL%20Pavel%20B%C4%9Blobr%C3%A1dek%20ve%20volebn%C3%AD%20V%C3%BDzv%C4%9B%20Seznamu&series=V%C3%BDzva&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_G_J%2FjTBCs.jpeg%3Ffl%3Dcro%2C0%2C0%2C1280%2C720%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=16&height=9&cutFrom=0&cutTo=0&splVersion=VOD&contentId=185688&contextId=38489&showAdvert=true&collocation=&hideFullScreen=false&hideSubtitles=false&embed=&isVideoTooShortForPreroll=false&isVideoTooShortForPreroll2=false&isVideoTooLongForPostroll=false&fakePostrollZoneID=seznam.clanky.zpravy.preroll&fakePrerollZoneID=seznam.clanky.zpravy.preroll&videoCommentId=&trim=default_16x9&noPrerollVideoLength=30&noPreroll2VideoLength=undefined&noMidrollVideoLength=0&noPostrollVideoLength=999999&autoplayPossible=true&version=5.0.41&dotService=zpravy&gemiusPrismIdentifier=zD3g7byfW5ekpXmxTVLaq5Srjw5i4hsYo0HY1aBwIe..27&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5§ionPrefixPreroll=%2Fzpravy%2Fvyzva&zoneIdPostroll=seznam.pack.videospot&skipOffsetPostroll=5§ionPrefixPostroll=%2Fzpravy%2Fvyzva®ression=false', + 'info_dict': { + 'id': '185688', + 'ext': 'mp4', + 'title': 'Předseda KDU-ČSL Pavel Bělobrádek ve volební Výzvě Seznamu', + 'thumbnail': r're:^https?://.*\.jpe?g', + 'series': 'Výzva', + }, + 'params': { + 'skip_download': True, + }, + }] + + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') for mobj in re.finditer( + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1', + webpage)] + + def _extract_sdn_formats(self, sdn_url, video_id): + sdn_data = self._download_json(sdn_url, video_id) + + if sdn_data.get('Location'): + sdn_url = sdn_data['Location'] + sdn_data = self._download_json(sdn_url, video_id) + + formats = [] + mp4_formats = try_get(sdn_data, lambda x: x['data']['mp4'], dict) or {} + for format_id, format_data in mp4_formats.items(): + relative_url = format_data.get('url') + if not relative_url: + continue + + try: + width, height = format_data.get('resolution') + except (TypeError, ValueError): + width, height = None, None + + f = { + 'url': urljoin(sdn_url, relative_url), + 'format_id': 'http-%s' % format_id, + 'tbr': int_or_none(format_data.get('bandwidth'), scale=1000), + 'width': int_or_none(width), + 'height': int_or_none(height), + } + f.update(parse_codecs(format_data.get('codec'))) + formats.append(f) + + pls = sdn_data.get('pls', {}) + + def get_url(format_id): + return try_get(pls, lambda x: x[format_id]['url'], compat_str) + + dash_rel_url = get_url('dash') + if dash_rel_url: + formats.extend(self._extract_mpd_formats( + urljoin(sdn_url, dash_rel_url), video_id, mpd_id='dash', + fatal=False)) + + hls_rel_url = get_url('hls') + if hls_rel_url: + formats.extend(self._extract_m3u8_formats( + urljoin(sdn_url, hls_rel_url), video_id, ext='mp4', + m3u8_id='hls', fatal=False)) + + self._sort_formats(formats) + return formats + + def _real_extract(self, url): + params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + + src = params['src'][0] + title = params['title'][0] + video_id = params.get('contentId', [_raw_id(src)])[0] + formats = self._extract_sdn_formats(src + 'spl2,2,VOD', video_id) + + duration = int_or_none(params.get('duration', [None])[0]) + series = params.get('series', [None])[0] + thumbnail = params.get('poster', [None])[0] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'series': series, + 'formats': formats, + } + + +class SeznamZpravyArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:seznam\.cz/zpravy|seznamzpravy\.cz)/clanek/(?:[^/?#&]+)-(?P\d+)' + _API_URL = 'https://apizpravy.seznam.cz/' + + _TESTS = [{ + # two videos on one page, with SDN URL + 'url': 'https://www.seznamzpravy.cz/clanek/jejich-svet-na-nas-utoci-je-lepsi-branit-se-na-jejich-pisecku-rika-reziser-a-major-v-zaloze-marhoul-35990', + 'info_dict': { + 'id': '35990', + 'title': 'md5:6011c877a36905f28f271fcd8dcdb0f2', + 'description': 'md5:933f7b06fa337a814ba199d3596d27ba', + }, + 'playlist_count': 2, + }, { + # video with live stream URL + 'url': 'https://www.seznam.cz/zpravy/clanek/znovu-do-vlady-s-ano-pavel-belobradek-ve-volebnim-specialu-seznamu-38489', + 'info_dict': { + 'id': '38489', + 'title': 'md5:8fa1afdc36fd378cf0eba2b74c5aca60', + 'description': 'md5:428e7926a1a81986ec7eb23078004fb4', + }, + 'playlist_count': 1, + }] + + def _real_extract(self, url): + article_id = self._match_id(url) + + webpage = self._download_webpage(url, article_id) + + info = self._search_json_ld(webpage, article_id, default={}) + print(info) + + title = info.get('title') or self._og_search_title(webpage, fatal=False) + description = info.get('description') or self._og_search_description(webpage) + + return self.playlist_result([ + self.url_result(url, ie=SeznamZpravyIE.ie_key()) + for url in SeznamZpravyIE._extract_urls(webpage)], + article_id, title, description) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 89e19e927..b2250afdd 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -1,8 +1,7 @@ from __future__ import unicode_literals -import base64 - from .common import InfoExtractor +from ..compat import compat_b64decode from ..utils import ( ExtractorError, int_or_none, @@ -22,8 +21,8 @@ class SharedBaseIE(InfoExtractor): video_url = self._extract_video_url(webpage, video_id, url) - title = base64.b64decode(self._html_search_meta( - 'full:title', webpage, 'title').encode('utf-8')).decode('utf-8') + title = compat_b64decode(self._html_search_meta( + 'full:title', webpage, 'title')).decode('utf-8') filesize = int_or_none(self._html_search_meta( 'full:size', webpage, 'file size', fatal=False)) @@ -92,5 +91,4 @@ class VivoIE(SharedBaseIE): r'InitializeStream\s*\(\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'stream', group='url'), video_id, - transform_source=lambda x: base64.b64decode( - x.encode('ascii')).decode('utf-8'))[0] + transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0] diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py index eab22c38f..edc31729d 100644 --- a/youtube_dl/extractor/tbs.py +++ b/youtube_dl/extractor/tbs.py @@ -58,7 +58,7 @@ class TBSIE(TurnerBaseIE): continue if stream_data.get('playlistProtection') == 'spe': m3u8_url = self._add_akamai_spe_token( - 'http://www.%s.com/service/token_spe' % site, + 'http://token.vgtf.net/token/token_spe', m3u8_url, media_id, { 'url': url, 'site_name': site[:3].upper(), diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index f14713a78..1272078c5 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -5,8 +5,9 @@ import re from .common import InfoExtractor from ..utils import ( - qualities, determine_ext, + ExtractorError, + qualities, ) @@ -17,6 +18,7 @@ class TeacherTubeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P\d+)' _TESTS = [{ + # flowplayer 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', 'md5': 'f9434ef992fd65936d72999951ee254c', 'info_dict': { @@ -24,19 +26,10 @@ class TeacherTubeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Measures of dispersion from a frequency table', 'description': 'Measures of dispersion from a frequency table', - 'thumbnail': r're:http://.*\.jpg', - }, - }, { - 'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064', - 'md5': '0d625ec6bc9bf50f70170942ad580676', - 'info_dict': { - 'id': '340064', - 'ext': 'mp4', - 'title': 'How to Make Paper Dolls _ Paper Art Projects', - 'description': 'Learn how to make paper dolls in this simple', - 'thumbnail': r're:http://.*\.jpg', + 'thumbnail': r're:https?://.*\.(?:jpg|png)', }, }, { + # jwplayer 'url': 'http://www.teachertube.com/music.php?music_id=8805', 'md5': '01e8352006c65757caf7b961f6050e21', 'info_dict': { @@ -46,20 +39,21 @@ class TeacherTubeIE(InfoExtractor): 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P', }, }, { + # unavailable video 'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790', - 'md5': '9c79fbb2dd7154823996fc28d4a26998', - 'info_dict': { - 'id': '297790', - 'ext': 'mp4', - 'title': 'Intro Video - Schleicher', - 'description': 'Intro Video - Why to flip, how flipping will', - }, + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + error = self._search_regex( + r']+\bclass=["\']msgBox error[^>]+>([^<]+)', webpage, + 'error', default=None) + if error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + title = self._html_search_meta('title', webpage, 'title', fatal=True) TITLE_SUFFIX = ' - TeacherTube' if title.endswith(TITLE_SUFFIX): @@ -84,12 +78,16 @@ class TeacherTubeIE(InfoExtractor): self._sort_formats(formats) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._html_search_meta( + 'thumbnail', webpage) + return { 'id': video_id, 'title': title, - 'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'), - 'formats': formats, 'description': description, + 'thumbnail': thumbnail, + 'formats': formats, } diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 75346393b..9056c8cbc 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -1,18 +1,20 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 import binascii import re import json from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_ord, +) from ..utils import ( ExtractorError, qualities, determine_ext, ) -from ..compat import compat_ord class TeamcocoIE(InfoExtractor): @@ -97,7 +99,7 @@ class TeamcocoIE(InfoExtractor): for i in range(len(cur_fragments)): cur_sequence = (''.join(cur_fragments[i:] + cur_fragments[:i])).encode('ascii') try: - raw_data = base64.b64decode(cur_sequence) + raw_data = compat_b64decode(cur_sequence) if compat_ord(raw_data[0]) == compat_ord('{'): return json.loads(raw_data.decode('utf-8')) except (TypeError, binascii.Error, UnicodeDecodeError, ValueError): diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py index 822372ea1..362318b24 100644 --- a/youtube_dl/extractor/tutv.py +++ b/youtube_dl/extractor/tutv.py @@ -1,9 +1,10 @@ from __future__ import unicode_literals -import base64 - from .common import InfoExtractor -from ..compat import compat_parse_qs +from ..compat import ( + compat_b64decode, + compat_parse_qs, +) class TutvIE(InfoExtractor): @@ -26,7 +27,7 @@ class TutvIE(InfoExtractor): data_content = self._download_webpage( 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info') - video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0].encode('utf-8')).decode('utf-8') + video_url = compat_b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8') return { 'id': internal_id, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f698a5627..43051512b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1596,6 +1596,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if 'token' not in video_info: video_info = get_video_info break + + def extract_unavailable_message(): + return self._html_search_regex( + r'(?s)]+id="unavailable-message"[^>]*>(.+?)', + video_webpage, 'unavailable message', default=None) + if 'token' not in video_info: if 'reason' in video_info: if 'The uploader has not made this video available in your country.' in video_info['reason']: @@ -1604,8 +1610,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): countries = regions_allowed.split(',') if regions_allowed else None self.raise_geo_restricted( msg=video_info['reason'][0], countries=countries) + reason = video_info['reason'][0] + if 'Invalid parameters' in reason: + unavailable_message = extract_unavailable_message() + if unavailable_message: + reason = unavailable_message raise ExtractorError( - 'YouTube said: %s' % video_info['reason'][0], + 'YouTube said: %s' % reason, expected=True, video_id=video_id) else: raise ExtractorError( @@ -1953,9 +1964,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' formats.append(a_format) else: - unavailable_message = self._html_search_regex( - r'(?s)]+id="unavailable-message"[^>]*>(.+?)', - video_webpage, 'unavailable message', default=None) + unavailable_message = extract_unavailable_message() if unavailable_message: raise ExtractorError(unavailable_message, expected=True) raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2fe9cf585..ef44b99a5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -866,8 +866,8 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): # expected HTTP responses to meet HTTP/1.0 or later (see also # https://github.com/rg3/youtube-dl/issues/6727) if sys.version_info < (3, 0): - kwargs[b'strict'] = True - hc = http_class(*args, **kwargs) + kwargs['strict'] = True + hc = http_class(*args, **compat_kwargs(kwargs)) source_address = ydl_handler._params.get('source_address') if source_address is not None: sa = (source_address, 0) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 11e82f433..8a2b57ffb 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2018.01.21' +__version__ = '2018.01.27'