Merge pull request #309 from ytdl-org/master

[pull] master from ytdl-org:master
2026-06-05 12:23:32 +08:00 · 2020-05-07 16:05:01 +00:00
parent c92792bd2c 30fa5c6087
commit 047ce5bac3
12 changed files with 344 additions and 116 deletions
@@ -39,6 +39,13 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
        assert_cookie_has_value('HTTPONLY_COOKIE')
        assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')

+    def test_malformed_cookies(self):
+        cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
+        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        # Cookies should be empty since all malformed cookie file entries
+        # will be ignored
+        self.assertFalse(cookiejar._cookies)
+

 if __name__ == '__main__':
    unittest.main()
@@ -0,0 +1,9 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file!  Do not edit.
+
+# Cookie file entry with invalid number of fields - 6 instead of 7
+www.foobar.foobar	FALSE	/	FALSE	0	COOKIE
+
+# Cookie file entry with invalid expires at
+www.foobar.foobar	FALSE	/	FALSE	1.7976931348623157e+308	COOKIE	VALUE
@@ -57,6 +57,17 @@ try:
 except ImportError:  # Python 2
    import cookielib as compat_cookiejar

+if sys.version_info[0] == 2:
+    class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
+        def __init__(self, version, name, value, *args, **kwargs):
+            if isinstance(name, compat_str):
+                name = name.encode()
+            if isinstance(value, compat_str):
+                value = value.encode()
+            compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
+else:
+    compat_cookiejar_Cookie = compat_cookiejar.Cookie
+
 try:
    import http.cookies as compat_cookies
 except ImportError:  # Python 2
@@ -2987,6 +2998,7 @@ __all__ = [
    'compat_basestring',
    'compat_chr',
    'compat_cookiejar',
+    'compat_cookiejar_Cookie',
    'compat_cookies',
    'compat_ctypes_WINFUNCTYPE',
    'compat_etree_Element',
@@ -227,7 +227,7 @@ class HttpFD(FileDownloader):
            while True:
                try:
                    # Download and write
-                    data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
+                    data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter))
                # socket.timeout is a subclass of socket.error but may not have
                # errno set
                except socket.timeout as e:
@@ -299,7 +299,7 @@ class HttpFD(FileDownloader):
                    'elapsed': now - ctx.start_time,
                })

-                if is_test and byte_counter == data_len:
+                if data_len is not None and byte_counter == data_len:
                    break

            if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
@@ -15,7 +15,7 @@ import time
 import math

 from ..compat import (
-    compat_cookiejar,
+    compat_cookiejar_Cookie,
    compat_cookies,
    compat_etree_Element,
    compat_etree_fromstring,
@@ -2843,7 +2843,7 @@ class InfoExtractor(object):

    def _set_cookie(self, domain, name, value, expire_time=None, port=None,
                    path='/', secure=False, discard=False, rest={}, **kwargs):
-        cookie = compat_cookiejar.Cookie(
+        cookie = compat_cookiejar_Cookie(
            0, name, value, port, port is not None, domain, True,
            domain.startswith('.'), path, True, secure, expire_time,
            discard, None, None, rest)
@@ -32,7 +32,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):

    @staticmethod
    def _get_cookie_value(cookies, name):
-        cookie = cookies.get('name')
+        cookie = cookies.get(name)
        if cookie:
            return cookie.value

@@ -804,6 +804,16 @@ from .orf import (
    ORFFM4IE,
    ORFFM4StoryIE,
    ORFOE1IE,
+    ORFOE3IE,
+    ORFNOEIE,
+    ORFWIEIE,
+    ORFBGLIE,
+    ORFOOEIE,
+    ORFSTMIE,
+    ORFKTNIE,
+    ORFSBGIE,
+    ORFTIRIE,
+    ORFVBGIE,
    ORFIPTVIE,
 )
 from .outsidetv import OutsideTVIE
@@ -16,12 +16,22 @@ class IPrimaIE(InfoExtractor):
    _GEO_BYPASS = False

    _TESTS = [{
-        'url': 'http://play.iprima.cz/gondici-s-r-o-33',
+        'url': 'https://prima.iprima.cz/particka/92-epizoda',
        'info_dict': {
-            'id': 'p136534',
+            'id': 'p51388',
            'ext': 'mp4',
-            'title': 'Gondíci s. r. o. (34)',
-            'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
+            'title': 'Partička (92)',
+            'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
+        },
+        'params': {
+            'skip_download': True,  # m3u8 download
+        },
+    }, {
+        'url': 'https://cnn.iprima.cz/videa/70-epizoda',
+        'info_dict': {
+            'id': 'p681554',
+            'ext': 'mp4',
+            'title': 'HLAVNÍ ZPRÁVY 3.5.2020',
        },
        'params': {
            'skip_download': True,  # m3u8 download
@@ -68,9 +78,15 @@ class IPrimaIE(InfoExtractor):

        webpage = self._download_webpage(url, video_id)

+        title = self._og_search_title(
+            webpage, default=None) or self._search_regex(
+            r'<h1>([^<]+)', webpage, 'title')
+
        video_id = self._search_regex(
            (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
-             r'data-product="([^"]+)">'),
+             r'data-product="([^"]+)">',
+             r'id=["\']player-(p\d+)"',
+             r'playerId\s*:\s*["\']player-(p\d+)'),
            webpage, 'real id')

        playerpage = self._download_webpage(
@@ -125,8 +141,8 @@ class IPrimaIE(InfoExtractor):

        return {
            'id': video_id,
-            'title': self._og_search_title(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
+            'title': title,
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
            'formats': formats,
-            'description': self._og_search_description(webpage),
+            'description': self._og_search_description(webpage, default=None),
        }
@@ -162,13 +162,12 @@ class ORFTVthekIE(InfoExtractor):
 class ORFRadioIE(InfoExtractor):
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        station = mobj.group('station')
        show_date = mobj.group('date')
        show_id = mobj.group('show')

        data = self._download_json(
            'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s'
-            % (station, show_id, show_date), show_id)
+            % (self._API_STATION, show_id, show_date), show_id)

        entries = []
        for info in data['streams']:
@@ -183,7 +182,7 @@ class ORFRadioIE(InfoExtractor):
            duration = end - start if end and start else None
            entries.append({
                'id': loop_stream_id.replace('.mp3', ''),
-                'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, loop_stream_id),
+                'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
                'title': title,
                'description': clean_html(data.get('subtitle')),
                'duration': duration,
@@ -205,6 +204,8 @@ class ORFFM4IE(ORFRadioIE):
    IE_NAME = 'orf:fm4'
    IE_DESC = 'radio FM4'
    _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
+    _API_STATION = 'fm4'
+    _LOOP_STATION = 'fm4'

    _TEST = {
        'url': 'http://fm4.orf.at/player/20170107/4CC',
@@ -223,10 +224,142 @@ class ORFFM4IE(ORFRadioIE):
    }


+class ORFNOEIE(ORFRadioIE):
+    IE_NAME = 'orf:noe'
+    IE_DESC = 'Radio Niederösterreich'
+    _VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+    _API_STATION = 'noe'
+    _LOOP_STATION = 'oe2n'
+
+    _TEST = {
+        'url': 'https://noe.orf.at/player/20200423/NGM',
+        'only_matching': True,
+    }
+
+
+class ORFWIEIE(ORFRadioIE):
+    IE_NAME = 'orf:wien'
+    IE_DESC = 'Radio Wien'
+    _VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+    _API_STATION = 'wie'
+    _LOOP_STATION = 'oe2w'
+
+    _TEST = {
+        'url': 'https://wien.orf.at/player/20200423/WGUM',
+        'only_matching': True,
+    }
+
+
+class ORFBGLIE(ORFRadioIE):
+    IE_NAME = 'orf:burgenland'
+    IE_DESC = 'Radio Burgenland'
+    _VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+    _API_STATION = 'bgl'
+    _LOOP_STATION = 'oe2b'
+
+    _TEST = {
+        'url': 'https://burgenland.orf.at/player/20200423/BGM',
+        'only_matching': True,
+    }
+
+
+class ORFOOEIE(ORFRadioIE):
+    IE_NAME = 'orf:oberoesterreich'
+    IE_DESC = 'Radio Oberösterreich'
+    _VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+    _API_STATION = 'ooe'
+    _LOOP_STATION = 'oe2o'
+
+    _TEST = {
+        'url': 'https://ooe.orf.at/player/20200423/OGMO',
+        'only_matching': True,
+    }
+
+
+class ORFSTMIE(ORFRadioIE):
+    IE_NAME = 'orf:steiermark'
+    IE_DESC = 'Radio Steiermark'
+    _VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+    _API_STATION = 'stm'
+    _LOOP_STATION = 'oe2st'
+
+    _TEST = {
+        'url': 'https://steiermark.orf.at/player/20200423/STGMS',
+        'only_matching': True,
+    }
+
+
+class ORFKTNIE(ORFRadioIE):
+    IE_NAME = 'orf:kaernten'
+    IE_DESC = 'Radio Kärnten'
+    _VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+    _API_STATION = 'ktn'
+    _LOOP_STATION = 'oe2k'
+
+    _TEST = {
+        'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
+        'only_matching': True,
+    }
+
+
+class ORFSBGIE(ORFRadioIE):
+    IE_NAME = 'orf:salzburg'
+    IE_DESC = 'Radio Salzburg'
+    _VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+    _API_STATION = 'sbg'
+    _LOOP_STATION = 'oe2s'
+
+    _TEST = {
+        'url': 'https://salzburg.orf.at/player/20200423/SGUM',
+        'only_matching': True,
+    }
+
+
+class ORFTIRIE(ORFRadioIE):
+    IE_NAME = 'orf:tirol'
+    IE_DESC = 'Radio Tirol'
+    _VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+    _API_STATION = 'tir'
+    _LOOP_STATION = 'oe2t'
+
+    _TEST = {
+        'url': 'https://tirol.orf.at/player/20200423/TGUMO',
+        'only_matching': True,
+    }
+
+
+class ORFVBGIE(ORFRadioIE):
+    IE_NAME = 'orf:vorarlberg'
+    IE_DESC = 'Radio Vorarlberg'
+    _VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+    _API_STATION = 'vbg'
+    _LOOP_STATION = 'oe2v'
+
+    _TEST = {
+        'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
+        'only_matching': True,
+    }
+
+
+class ORFOE3IE(ORFRadioIE):
+    IE_NAME = 'orf:oe3'
+    IE_DESC = 'Radio Österreich 3'
+    _VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+    _API_STATION = 'oe3'
+    _LOOP_STATION = 'oe3'
+
+    _TEST = {
+        'url': 'https://oe3.orf.at/player/20200424/3WEK',
+        'only_matching': True,
+    }
+
+
 class ORFOE1IE(ORFRadioIE):
    IE_NAME = 'orf:oe1'
    IE_DESC = 'Radio Österreich 1'
    _VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+    _API_STATION = 'oe1'
+    _LOOP_STATION = 'oe1'

    _TEST = {
        'url': 'http://oe1.orf.at/player/20170108/456544',
@@ -82,17 +82,6 @@ class PuhuTVIE(InfoExtractor):
        urls = []
        formats = []

-        def add_http_from_hls(m3u8_f):
-            http_url = m3u8_f['url'].replace('/hls/', '/mp4/').replace('/chunklist.m3u8', '.mp4')
-            if http_url != m3u8_f['url']:
-                f = m3u8_f.copy()
-                f.update({
-                    'format_id': f['format_id'].replace('hls', 'http'),
-                    'protocol': 'http',
-                    'url': http_url,
-                })
-                formats.append(f)
-
        for video in videos['data']['videos']:
            media_url = url_or_none(video.get('url'))
            if not media_url or media_url in urls:
@@ -101,12 +90,9 @@ class PuhuTVIE(InfoExtractor):

            playlist = video.get('is_playlist')
            if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
-                m3u8_formats = self._extract_m3u8_formats(
+                formats.extend(self._extract_m3u8_formats(
                    media_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False)
-                for m3u8_f in m3u8_formats:
-                    formats.append(m3u8_f)
-                    add_http_from_hls(m3u8_f)
+                    m3u8_id='hls', fatal=False))
                continue

            quality = int_or_none(video.get('quality'))
@@ -128,8 +114,6 @@ class PuhuTVIE(InfoExtractor):
                format_id += '-%sp' % quality
            f['format_id'] = format_id
            formats.append(f)
-            if is_hls:
-                add_http_from_hls(f)
        self._sort_formats(formats)

        creator = try_get(
@@ -2,12 +2,17 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
+from ..compat import (
+    compat_str,
+    compat_urllib_parse_urlencode,
+)
 from ..utils import (
    clean_html,
    int_or_none,
    parse_duration,
+    parse_iso8601,
+    qualities,
    update_url_query,
-    str_or_none,
 )


@@ -16,21 +21,25 @@ class UOLIE(InfoExtractor):
    _VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P<id>\d+|[\w-]+-[A-Z0-9]+)'
    _TESTS = [{
        'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931',
-        'md5': '25291da27dc45e0afb5718a8603d3816',
+        'md5': '4f1e26683979715ff64e4e29099cf020',
        'info_dict': {
            'id': '15951931',
            'ext': 'mp4',
            'title': 'Miss simpatia é encontrada morta',
            'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2',
+            'timestamp': 1470421860,
+            'upload_date': '20160805',
        }
    }, {
        'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
-        'md5': 'e41a2fb7b7398a3a46b6af37b15c00c9',
+        'md5': '2850a0e8dfa0a7307e04a96c5bdc5bc2',
        'info_dict': {
            'id': '15954259',
            'ext': 'mp4',
            'title': 'Incêndio destrói uma das maiores casas noturnas de Londres',
            'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.',
+            'timestamp': 1470674520,
+            'upload_date': '20160808',
        }
    }, {
        'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931',
@@ -55,91 +64,55 @@ class UOLIE(InfoExtractor):
        'only_matching': True,
    }]

-    _FORMATS = {
-        '2': {
-            'width': 640,
-            'height': 360,
-        },
-        '5': {
-            'width': 1280,
-            'height': 720,
-        },
-        '6': {
-            'width': 426,
-            'height': 240,
-        },
-        '7': {
-            'width': 1920,
-            'height': 1080,
-        },
-        '8': {
-            'width': 192,
-            'height': 144,
-        },
-        '9': {
-            'width': 568,
-            'height': 320,
-        },
-        '11': {
-            'width': 640,
-            'height': 360,
-        }
-    }
-
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        media_id = None
-
-        if video_id.isdigit():
-            media_id = video_id
-
-        if not media_id:
-            embed_page = self._download_webpage(
-                'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id,
-                video_id, 'Downloading embed page', fatal=False)
-            if embed_page:
-                media_id = self._search_regex(
-                    (r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'),
-                    embed_page, 'media id', default=None)
-
-        if not media_id:
-            webpage = self._download_webpage(url, video_id)
-            media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id')

        video_data = self._download_json(
-            'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id,
-            media_id)['item']
+            # https://api.mais.uol.com.br/apiuol/v4/player/data/[MEDIA_ID]
+            'https://api.mais.uol.com.br/apiuol/v3/media/detail/' + video_id,
+            video_id)['item']
+        media_id = compat_str(video_data['mediaId'])
        title = video_data['title']
+        ver = video_data.get('revision', 2)

-        query = {
-            'ver': video_data.get('numRevision', 2),
-            'r': 'http://mais.uol.com.br',
-        }
-        for k in ('token', 'sign'):
-            v = video_data.get(k)
-            if v:
-                query[k] = v
-
+        uol_formats = self._download_json(
+            'https://croupier.mais.uol.com.br/v3/formats/%s/jsonp' % media_id,
+            media_id)
+        quality = qualities(['mobile', 'WEBM', '360p', '720p', '1080p'])
        formats = []
-        for f in video_data.get('formats', []):
+        for format_id, f in uol_formats.items():
+            if not isinstance(f, dict):
+                continue
            f_url = f.get('url') or f.get('secureUrl')
            if not f_url:
                continue
+            query = {
+                'ver': ver,
+                'r': 'http://mais.uol.com.br',
+            }
+            for k in ('token', 'sign'):
+                v = f.get(k)
+                if v:
+                    query[k] = v
            f_url = update_url_query(f_url, query)
-            format_id = str_or_none(f.get('id'))
-            if format_id == '10':
-                formats.extend(self._extract_m3u8_formats(
-                    f_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id='hls', fatal=False))
+            format_id = format_id
+            if format_id == 'HLS':
+                m3u8_formats = self._extract_m3u8_formats(
+                    f_url, media_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False)
+                encoded_query = compat_urllib_parse_urlencode(query)
+                for m3u8_f in m3u8_formats:
+                    m3u8_f['extra_param_to_segment_url'] = encoded_query
+                    m3u8_f['url'] = update_url_query(m3u8_f['url'], query)
+                formats.extend(m3u8_formats)
                continue
-            fmt = {
+            formats.append({
                'format_id': format_id,
                'url': f_url,
-                'source_preference': 1,
-            }
-            fmt.update(self._FORMATS.get(format_id, {}))
-            formats.append(fmt)
-        self._sort_formats(formats, ('height', 'width', 'source_preference', 'tbr', 'ext'))
+                'quality': quality(format_id),
+                'preference': -1,
+            })
+        self._sort_formats(formats)

        tags = []
        for tag in video_data.get('tags', []):
@@ -148,12 +121,24 @@ class UOLIE(InfoExtractor):
                continue
            tags.append(tag_description)

+        thumbnails = []
+        for q in ('Small', 'Medium', 'Wmedium', 'Large', 'Wlarge', 'Xlarge'):
+            q_url = video_data.get('thumb' + q)
+            if not q_url:
+                continue
+            thumbnails.append({
+                'id': q,
+                'url': q_url,
+            })
+
        return {
            'id': media_id,
            'title': title,
-            'description': clean_html(video_data.get('desMedia')),
-            'thumbnail': video_data.get('thumbnail'),
-            'duration': int_or_none(video_data.get('durationSeconds')) or parse_duration(video_data.get('duration')),
+            'description': clean_html(video_data.get('description')),
+            'thumbnails': thumbnails,
+            'duration': parse_duration(video_data.get('duration')),
            'tags': tags,
            'formats': formats,
+            'timestamp': parse_iso8601(video_data.get('publishDate'), ' '),
+            'view_count': int_or_none(video_data.get('viewsQtty')),
        }
@@ -7,6 +7,7 @@ import base64
 import binascii
 import calendar
 import codecs
+import collections
 import contextlib
 import ctypes
 import datetime
@@ -30,6 +31,7 @@ import ssl
 import subprocess
 import sys
 import tempfile
+import time
 import traceback
 import xml.etree.ElementTree
 import zlib
@@ -2735,14 +2737,66 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
    1. https://curl.haxx.se/docs/http-cookies.html
    """
    _HTTPONLY_PREFIX = '#HttpOnly_'
+    _ENTRY_LEN = 7
+    _HEADER = '''# Netscape HTTP Cookie File
+# This file is generated by youtube-dl.  Do not edit.
+
+'''
+    _CookieFileEntry = collections.namedtuple(
+        'CookieFileEntry',
+        ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))

    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+        """
+        Save cookies to a file.
+
+        Most of the code is taken from CPython 3.8 and slightly adapted
+        to support cookie files with UTF-8 in both python 2 and 3.
+        """
+        if filename is None:
+            if self.filename is not None:
+                filename = self.filename
+            else:
+                raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+
        # Store session cookies with `expires` set to 0 instead of an empty
        # string
        for cookie in self:
            if cookie.expires is None:
                cookie.expires = 0
-        compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
+
+        with io.open(filename, 'w', encoding='utf-8') as f:
+            f.write(self._HEADER)
+            now = time.time()
+            for cookie in self:
+                if not ignore_discard and cookie.discard:
+                    continue
+                if not ignore_expires and cookie.is_expired(now):
+                    continue
+                if cookie.secure:
+                    secure = 'TRUE'
+                else:
+                    secure = 'FALSE'
+                if cookie.domain.startswith('.'):
+                    initial_dot = 'TRUE'
+                else:
+                    initial_dot = 'FALSE'
+                if cookie.expires is not None:
+                    expires = compat_str(cookie.expires)
+                else:
+                    expires = ''
+                if cookie.value is None:
+                    # cookies.txt regards 'Set-Cookie: foo' as a cookie
+                    # with no name, whereas http.cookiejar regards it as a
+                    # cookie with no value.
+                    name = ''
+                    value = cookie.name
+                else:
+                    name = cookie.name
+                    value = cookie.value
+                f.write(
+                    '\t'.join([cookie.domain, initial_dot, cookie.path,
+                               secure, expires, name, value]) + '\n')

    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
        """Load cookies from a file."""
@@ -2752,12 +2806,30 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
            else:
                raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)

+        def prepare_line(line):
+            if line.startswith(self._HTTPONLY_PREFIX):
+                line = line[len(self._HTTPONLY_PREFIX):]
+            # comments and empty lines are fine
+            if line.startswith('#') or not line.strip():
+                return line
+            cookie_list = line.split('\t')
+            if len(cookie_list) != self._ENTRY_LEN:
+                raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
+            cookie = self._CookieFileEntry(*cookie_list)
+            if cookie.expires_at and not cookie.expires_at.isdigit():
+                raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+            return line
+
        cf = io.StringIO()
-        with open(filename) as f:
+        with io.open(filename, encoding='utf-8') as f:
            for line in f:
-                if line.startswith(self._HTTPONLY_PREFIX):
-                    line = line[len(self._HTTPONLY_PREFIX):]
-                cf.write(compat_str(line))
+                try:
+                    cf.write(prepare_line(line))
+                except compat_cookiejar.LoadError as e:
+                    write_string(
+                        'WARNING: skipping cookie file entry due to %s: %r\n'
+                        % (e, line), sys.stderr)
+                    continue
        cf.seek(0)
        self._really_load(cf, filename, ignore_discard, ignore_expires)
        # Session cookies are denoted by either `expires` field set to