1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-10 06:09:59 +08:00

Merge pull request #309 from ytdl-org/master

[pull] master from ytdl-org:master
This commit is contained in:
pull[bot] 2020-05-07 16:05:01 +00:00 committed by GitHub
commit 047ce5bac3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 344 additions and 116 deletions

View File

@ -39,6 +39,13 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
assert_cookie_has_value('HTTPONLY_COOKIE') assert_cookie_has_value('HTTPONLY_COOKIE')
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
def test_malformed_cookies(self):
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
cookiejar.load(ignore_discard=True, ignore_expires=True)
# Cookies should be empty since all malformed cookie file entries
# will be ignored
self.assertFalse(cookiejar._cookies)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -0,0 +1,9 @@
# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This is a generated file! Do not edit.
# Cookie file entry with invalid number of fields - 6 instead of 7
www.foobar.foobar FALSE / FALSE 0 COOKIE
# Cookie file entry with invalid expires at
www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE

View File

@ -57,6 +57,17 @@ try:
except ImportError: # Python 2 except ImportError: # Python 2
import cookielib as compat_cookiejar import cookielib as compat_cookiejar
if sys.version_info[0] == 2:
class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
def __init__(self, version, name, value, *args, **kwargs):
if isinstance(name, compat_str):
name = name.encode()
if isinstance(value, compat_str):
value = value.encode()
compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
else:
compat_cookiejar_Cookie = compat_cookiejar.Cookie
try: try:
import http.cookies as compat_cookies import http.cookies as compat_cookies
except ImportError: # Python 2 except ImportError: # Python 2
@ -2987,6 +2998,7 @@ __all__ = [
'compat_basestring', 'compat_basestring',
'compat_chr', 'compat_chr',
'compat_cookiejar', 'compat_cookiejar',
'compat_cookiejar_Cookie',
'compat_cookies', 'compat_cookies',
'compat_ctypes_WINFUNCTYPE', 'compat_ctypes_WINFUNCTYPE',
'compat_etree_Element', 'compat_etree_Element',

View File

@ -227,7 +227,7 @@ class HttpFD(FileDownloader):
while True: while True:
try: try:
# Download and write # Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter))
# socket.timeout is a subclass of socket.error but may not have # socket.timeout is a subclass of socket.error but may not have
# errno set # errno set
except socket.timeout as e: except socket.timeout as e:
@ -299,7 +299,7 @@ class HttpFD(FileDownloader):
'elapsed': now - ctx.start_time, 'elapsed': now - ctx.start_time,
}) })
if is_test and byte_counter == data_len: if data_len is not None and byte_counter == data_len:
break break
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:

View File

@ -15,7 +15,7 @@ import time
import math import math
from ..compat import ( from ..compat import (
compat_cookiejar, compat_cookiejar_Cookie,
compat_cookies, compat_cookies,
compat_etree_Element, compat_etree_Element,
compat_etree_fromstring, compat_etree_fromstring,
@ -2843,7 +2843,7 @@ class InfoExtractor(object):
def _set_cookie(self, domain, name, value, expire_time=None, port=None, def _set_cookie(self, domain, name, value, expire_time=None, port=None,
path='/', secure=False, discard=False, rest={}, **kwargs): path='/', secure=False, discard=False, rest={}, **kwargs):
cookie = compat_cookiejar.Cookie( cookie = compat_cookiejar_Cookie(
0, name, value, port, port is not None, domain, True, 0, name, value, port, port is not None, domain, True,
domain.startswith('.'), path, True, secure, expire_time, domain.startswith('.'), path, True, secure, expire_time,
discard, None, None, rest) discard, None, None, rest)

View File

@ -32,7 +32,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod @staticmethod
def _get_cookie_value(cookies, name): def _get_cookie_value(cookies, name):
cookie = cookies.get('name') cookie = cookies.get(name)
if cookie: if cookie:
return cookie.value return cookie.value

View File

@ -804,6 +804,16 @@ from .orf import (
ORFFM4IE, ORFFM4IE,
ORFFM4StoryIE, ORFFM4StoryIE,
ORFOE1IE, ORFOE1IE,
ORFOE3IE,
ORFNOEIE,
ORFWIEIE,
ORFBGLIE,
ORFOOEIE,
ORFSTMIE,
ORFKTNIE,
ORFSBGIE,
ORFTIRIE,
ORFVBGIE,
ORFIPTVIE, ORFIPTVIE,
) )
from .outsidetv import OutsideTVIE from .outsidetv import OutsideTVIE

View File

@ -16,12 +16,22 @@ class IPrimaIE(InfoExtractor):
_GEO_BYPASS = False _GEO_BYPASS = False
_TESTS = [{ _TESTS = [{
'url': 'http://play.iprima.cz/gondici-s-r-o-33', 'url': 'https://prima.iprima.cz/particka/92-epizoda',
'info_dict': { 'info_dict': {
'id': 'p136534', 'id': 'p51388',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Gondíci s. r. o. (34)', 'title': 'Partička (92)',
'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd', 'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
},
'params': {
'skip_download': True, # m3u8 download
},
}, {
'url': 'https://cnn.iprima.cz/videa/70-epizoda',
'info_dict': {
'id': 'p681554',
'ext': 'mp4',
'title': 'HLAVNÍ ZPRÁVY 3.5.2020',
}, },
'params': { 'params': {
'skip_download': True, # m3u8 download 'skip_download': True, # m3u8 download
@ -68,9 +78,15 @@ class IPrimaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._og_search_title(
webpage, default=None) or self._search_regex(
r'<h1>([^<]+)', webpage, 'title')
video_id = self._search_regex( video_id = self._search_regex(
(r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)', (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
r'data-product="([^"]+)">'), r'data-product="([^"]+)">',
r'id=["\']player-(p\d+)"',
r'playerId\s*:\s*["\']player-(p\d+)'),
webpage, 'real id') webpage, 'real id')
playerpage = self._download_webpage( playerpage = self._download_webpage(
@ -125,8 +141,8 @@ class IPrimaIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage), 'title': title,
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage, default=None),
'formats': formats, 'formats': formats,
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage, default=None),
} }

View File

@ -162,13 +162,12 @@ class ORFTVthekIE(InfoExtractor):
class ORFRadioIE(InfoExtractor): class ORFRadioIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
station = mobj.group('station')
show_date = mobj.group('date') show_date = mobj.group('date')
show_id = mobj.group('show') show_id = mobj.group('show')
data = self._download_json( data = self._download_json(
'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s'
% (station, show_id, show_date), show_id) % (self._API_STATION, show_id, show_date), show_id)
entries = [] entries = []
for info in data['streams']: for info in data['streams']:
@ -183,7 +182,7 @@ class ORFRadioIE(InfoExtractor):
duration = end - start if end and start else None duration = end - start if end and start else None
entries.append({ entries.append({
'id': loop_stream_id.replace('.mp3', ''), 'id': loop_stream_id.replace('.mp3', ''),
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, loop_stream_id), 'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
'title': title, 'title': title,
'description': clean_html(data.get('subtitle')), 'description': clean_html(data.get('subtitle')),
'duration': duration, 'duration': duration,
@ -205,6 +204,8 @@ class ORFFM4IE(ORFRadioIE):
IE_NAME = 'orf:fm4' IE_NAME = 'orf:fm4'
IE_DESC = 'radio FM4' IE_DESC = 'radio FM4'
_VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)' _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
_API_STATION = 'fm4'
_LOOP_STATION = 'fm4'
_TEST = { _TEST = {
'url': 'http://fm4.orf.at/player/20170107/4CC', 'url': 'http://fm4.orf.at/player/20170107/4CC',
@ -223,10 +224,142 @@ class ORFFM4IE(ORFRadioIE):
} }
class ORFNOEIE(ORFRadioIE):
IE_NAME = 'orf:noe'
IE_DESC = 'Radio Niederösterreich'
_VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'noe'
_LOOP_STATION = 'oe2n'
_TEST = {
'url': 'https://noe.orf.at/player/20200423/NGM',
'only_matching': True,
}
class ORFWIEIE(ORFRadioIE):
IE_NAME = 'orf:wien'
IE_DESC = 'Radio Wien'
_VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'wie'
_LOOP_STATION = 'oe2w'
_TEST = {
'url': 'https://wien.orf.at/player/20200423/WGUM',
'only_matching': True,
}
class ORFBGLIE(ORFRadioIE):
IE_NAME = 'orf:burgenland'
IE_DESC = 'Radio Burgenland'
_VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'bgl'
_LOOP_STATION = 'oe2b'
_TEST = {
'url': 'https://burgenland.orf.at/player/20200423/BGM',
'only_matching': True,
}
class ORFOOEIE(ORFRadioIE):
IE_NAME = 'orf:oberoesterreich'
IE_DESC = 'Radio Oberösterreich'
_VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'ooe'
_LOOP_STATION = 'oe2o'
_TEST = {
'url': 'https://ooe.orf.at/player/20200423/OGMO',
'only_matching': True,
}
class ORFSTMIE(ORFRadioIE):
IE_NAME = 'orf:steiermark'
IE_DESC = 'Radio Steiermark'
_VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'stm'
_LOOP_STATION = 'oe2st'
_TEST = {
'url': 'https://steiermark.orf.at/player/20200423/STGMS',
'only_matching': True,
}
class ORFKTNIE(ORFRadioIE):
IE_NAME = 'orf:kaernten'
IE_DESC = 'Radio Kärnten'
_VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'ktn'
_LOOP_STATION = 'oe2k'
_TEST = {
'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
'only_matching': True,
}
class ORFSBGIE(ORFRadioIE):
IE_NAME = 'orf:salzburg'
IE_DESC = 'Radio Salzburg'
_VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'sbg'
_LOOP_STATION = 'oe2s'
_TEST = {
'url': 'https://salzburg.orf.at/player/20200423/SGUM',
'only_matching': True,
}
class ORFTIRIE(ORFRadioIE):
IE_NAME = 'orf:tirol'
IE_DESC = 'Radio Tirol'
_VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'tir'
_LOOP_STATION = 'oe2t'
_TEST = {
'url': 'https://tirol.orf.at/player/20200423/TGUMO',
'only_matching': True,
}
class ORFVBGIE(ORFRadioIE):
IE_NAME = 'orf:vorarlberg'
IE_DESC = 'Radio Vorarlberg'
_VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'vbg'
_LOOP_STATION = 'oe2v'
_TEST = {
'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
'only_matching': True,
}
class ORFOE3IE(ORFRadioIE):
IE_NAME = 'orf:oe3'
IE_DESC = 'Radio Österreich 3'
_VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'oe3'
_LOOP_STATION = 'oe3'
_TEST = {
'url': 'https://oe3.orf.at/player/20200424/3WEK',
'only_matching': True,
}
class ORFOE1IE(ORFRadioIE): class ORFOE1IE(ORFRadioIE):
IE_NAME = 'orf:oe1' IE_NAME = 'orf:oe1'
IE_DESC = 'Radio Österreich 1' IE_DESC = 'Radio Österreich 1'
_VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)' _VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
_API_STATION = 'oe1'
_LOOP_STATION = 'oe1'
_TEST = { _TEST = {
'url': 'http://oe1.orf.at/player/20170108/456544', 'url': 'http://oe1.orf.at/player/20170108/456544',

View File

@ -82,17 +82,6 @@ class PuhuTVIE(InfoExtractor):
urls = [] urls = []
formats = [] formats = []
def add_http_from_hls(m3u8_f):
http_url = m3u8_f['url'].replace('/hls/', '/mp4/').replace('/chunklist.m3u8', '.mp4')
if http_url != m3u8_f['url']:
f = m3u8_f.copy()
f.update({
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
'url': http_url,
})
formats.append(f)
for video in videos['data']['videos']: for video in videos['data']['videos']:
media_url = url_or_none(video.get('url')) media_url = url_or_none(video.get('url'))
if not media_url or media_url in urls: if not media_url or media_url in urls:
@ -101,12 +90,9 @@ class PuhuTVIE(InfoExtractor):
playlist = video.get('is_playlist') playlist = video.get('is_playlist')
if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url: if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
m3u8_formats = self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', entry_protocol='m3u8_native', media_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False))
for m3u8_f in m3u8_formats:
formats.append(m3u8_f)
add_http_from_hls(m3u8_f)
continue continue
quality = int_or_none(video.get('quality')) quality = int_or_none(video.get('quality'))
@ -128,8 +114,6 @@ class PuhuTVIE(InfoExtractor):
format_id += '-%sp' % quality format_id += '-%sp' % quality
f['format_id'] = format_id f['format_id'] = format_id
formats.append(f) formats.append(f)
if is_hls:
add_http_from_hls(f)
self._sort_formats(formats) self._sort_formats(formats)
creator = try_get( creator = try_get(

View File

@ -2,12 +2,17 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse_urlencode,
)
from ..utils import ( from ..utils import (
clean_html, clean_html,
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601,
qualities,
update_url_query, update_url_query,
str_or_none,
) )
@ -16,21 +21,25 @@ class UOLIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P<id>\d+|[\w-]+-[A-Z0-9]+)' _VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P<id>\d+|[\w-]+-[A-Z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931', 'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931',
'md5': '25291da27dc45e0afb5718a8603d3816', 'md5': '4f1e26683979715ff64e4e29099cf020',
'info_dict': { 'info_dict': {
'id': '15951931', 'id': '15951931',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Miss simpatia é encontrada morta', 'title': 'Miss simpatia é encontrada morta',
'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2', 'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2',
'timestamp': 1470421860,
'upload_date': '20160805',
} }
}, { }, {
'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326', 'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
'md5': 'e41a2fb7b7398a3a46b6af37b15c00c9', 'md5': '2850a0e8dfa0a7307e04a96c5bdc5bc2',
'info_dict': { 'info_dict': {
'id': '15954259', 'id': '15954259',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Incêndio destrói uma das maiores casas noturnas de Londres', 'title': 'Incêndio destrói uma das maiores casas noturnas de Londres',
'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.', 'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.',
'timestamp': 1470674520,
'upload_date': '20160808',
} }
}, { }, {
'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931', 'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931',
@ -55,91 +64,55 @@ class UOLIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
_FORMATS = {
'2': {
'width': 640,
'height': 360,
},
'5': {
'width': 1280,
'height': 720,
},
'6': {
'width': 426,
'height': 240,
},
'7': {
'width': 1920,
'height': 1080,
},
'8': {
'width': 192,
'height': 144,
},
'9': {
'width': 568,
'height': 320,
},
'11': {
'width': 640,
'height': 360,
}
}
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
media_id = None
if video_id.isdigit():
media_id = video_id
if not media_id:
embed_page = self._download_webpage(
'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id,
video_id, 'Downloading embed page', fatal=False)
if embed_page:
media_id = self._search_regex(
(r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'),
embed_page, 'media id', default=None)
if not media_id:
webpage = self._download_webpage(url, video_id)
media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id')
video_data = self._download_json( video_data = self._download_json(
'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id, # https://api.mais.uol.com.br/apiuol/v4/player/data/[MEDIA_ID]
media_id)['item'] 'https://api.mais.uol.com.br/apiuol/v3/media/detail/' + video_id,
video_id)['item']
media_id = compat_str(video_data['mediaId'])
title = video_data['title'] title = video_data['title']
ver = video_data.get('revision', 2)
query = { uol_formats = self._download_json(
'ver': video_data.get('numRevision', 2), 'https://croupier.mais.uol.com.br/v3/formats/%s/jsonp' % media_id,
'r': 'http://mais.uol.com.br', media_id)
} quality = qualities(['mobile', 'WEBM', '360p', '720p', '1080p'])
for k in ('token', 'sign'):
v = video_data.get(k)
if v:
query[k] = v
formats = [] formats = []
for f in video_data.get('formats', []): for format_id, f in uol_formats.items():
if not isinstance(f, dict):
continue
f_url = f.get('url') or f.get('secureUrl') f_url = f.get('url') or f.get('secureUrl')
if not f_url: if not f_url:
continue continue
query = {
'ver': ver,
'r': 'http://mais.uol.com.br',
}
for k in ('token', 'sign'):
v = f.get(k)
if v:
query[k] = v
f_url = update_url_query(f_url, query) f_url = update_url_query(f_url, query)
format_id = str_or_none(f.get('id')) format_id = format_id
if format_id == '10': if format_id == 'HLS':
formats.extend(self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
f_url, video_id, 'mp4', 'm3u8_native', f_url, media_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False)
encoded_query = compat_urllib_parse_urlencode(query)
for m3u8_f in m3u8_formats:
m3u8_f['extra_param_to_segment_url'] = encoded_query
m3u8_f['url'] = update_url_query(m3u8_f['url'], query)
formats.extend(m3u8_formats)
continue continue
fmt = { formats.append({
'format_id': format_id, 'format_id': format_id,
'url': f_url, 'url': f_url,
'source_preference': 1, 'quality': quality(format_id),
} 'preference': -1,
fmt.update(self._FORMATS.get(format_id, {})) })
formats.append(fmt) self._sort_formats(formats)
self._sort_formats(formats, ('height', 'width', 'source_preference', 'tbr', 'ext'))
tags = [] tags = []
for tag in video_data.get('tags', []): for tag in video_data.get('tags', []):
@ -148,12 +121,24 @@ class UOLIE(InfoExtractor):
continue continue
tags.append(tag_description) tags.append(tag_description)
thumbnails = []
for q in ('Small', 'Medium', 'Wmedium', 'Large', 'Wlarge', 'Xlarge'):
q_url = video_data.get('thumb' + q)
if not q_url:
continue
thumbnails.append({
'id': q,
'url': q_url,
})
return { return {
'id': media_id, 'id': media_id,
'title': title, 'title': title,
'description': clean_html(video_data.get('desMedia')), 'description': clean_html(video_data.get('description')),
'thumbnail': video_data.get('thumbnail'), 'thumbnails': thumbnails,
'duration': int_or_none(video_data.get('durationSeconds')) or parse_duration(video_data.get('duration')), 'duration': parse_duration(video_data.get('duration')),
'tags': tags, 'tags': tags,
'formats': formats, 'formats': formats,
'timestamp': parse_iso8601(video_data.get('publishDate'), ' '),
'view_count': int_or_none(video_data.get('viewsQtty')),
} }

View File

@ -7,6 +7,7 @@ import base64
import binascii import binascii
import calendar import calendar
import codecs import codecs
import collections
import contextlib import contextlib
import ctypes import ctypes
import datetime import datetime
@ -30,6 +31,7 @@ import ssl
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
import time
import traceback import traceback
import xml.etree.ElementTree import xml.etree.ElementTree
import zlib import zlib
@ -2735,14 +2737,66 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
1. https://curl.haxx.se/docs/http-cookies.html 1. https://curl.haxx.se/docs/http-cookies.html
""" """
_HTTPONLY_PREFIX = '#HttpOnly_' _HTTPONLY_PREFIX = '#HttpOnly_'
_ENTRY_LEN = 7
_HEADER = '''# Netscape HTTP Cookie File
# This file is generated by youtube-dl. Do not edit.
'''
_CookieFileEntry = collections.namedtuple(
'CookieFileEntry',
('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
def save(self, filename=None, ignore_discard=False, ignore_expires=False): def save(self, filename=None, ignore_discard=False, ignore_expires=False):
"""
Save cookies to a file.
Most of the code is taken from CPython 3.8 and slightly adapted
to support cookie files with UTF-8 in both python 2 and 3.
"""
if filename is None:
if self.filename is not None:
filename = self.filename
else:
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
# Store session cookies with `expires` set to 0 instead of an empty # Store session cookies with `expires` set to 0 instead of an empty
# string # string
for cookie in self: for cookie in self:
if cookie.expires is None: if cookie.expires is None:
cookie.expires = 0 cookie.expires = 0
compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
with io.open(filename, 'w', encoding='utf-8') as f:
f.write(self._HEADER)
now = time.time()
for cookie in self:
if not ignore_discard and cookie.discard:
continue
if not ignore_expires and cookie.is_expired(now):
continue
if cookie.secure:
secure = 'TRUE'
else:
secure = 'FALSE'
if cookie.domain.startswith('.'):
initial_dot = 'TRUE'
else:
initial_dot = 'FALSE'
if cookie.expires is not None:
expires = compat_str(cookie.expires)
else:
expires = ''
if cookie.value is None:
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas http.cookiejar regards it as a
# cookie with no value.
name = ''
value = cookie.name
else:
name = cookie.name
value = cookie.value
f.write(
'\t'.join([cookie.domain, initial_dot, cookie.path,
secure, expires, name, value]) + '\n')
def load(self, filename=None, ignore_discard=False, ignore_expires=False): def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file.""" """Load cookies from a file."""
@ -2752,12 +2806,30 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
else: else:
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
def prepare_line(line):
if line.startswith(self._HTTPONLY_PREFIX):
line = line[len(self._HTTPONLY_PREFIX):]
# comments and empty lines are fine
if line.startswith('#') or not line.strip():
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line
cf = io.StringIO() cf = io.StringIO()
with open(filename) as f: with io.open(filename, encoding='utf-8') as f:
for line in f: for line in f:
if line.startswith(self._HTTPONLY_PREFIX): try:
line = line[len(self._HTTPONLY_PREFIX):] cf.write(prepare_line(line))
cf.write(compat_str(line)) except compat_cookiejar.LoadError as e:
write_string(
'WARNING: skipping cookie file entry due to %s: %r\n'
% (e, line), sys.stderr)
continue
cf.seek(0) cf.seek(0)
self._really_load(cf, filename, ignore_discard, ignore_expires) self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to # Session cookies are denoted by either `expires` field set to