mirror of
https://github.com/l1ving/youtube-dl
synced 2025-01-23 11:15:35 +08:00
[YoutubeDL:Playlist DownloadLog]: Using file_cache to record the progress of playlist-download-task. Download each videos in order; then re-download the broken videos.
This commit is contained in:
parent
b1e4e389f3
commit
2e6a1babfa
@ -9,18 +9,21 @@ import copy
|
|||||||
import datetime
|
import datetime
|
||||||
import errno
|
import errno
|
||||||
import fileinput
|
import fileinput
|
||||||
|
import hashlib
|
||||||
import io
|
import io
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import locale
|
import locale
|
||||||
import operator
|
import operator
|
||||||
import os
|
import os
|
||||||
|
import pickle
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
|
import tempfile
|
||||||
import time
|
import time
|
||||||
import tokenize
|
import tokenize
|
||||||
import traceback
|
import traceback
|
||||||
@ -109,6 +112,119 @@ from .version import __version__
|
|||||||
if compat_os_name == 'nt':
|
if compat_os_name == 'nt':
|
||||||
import ctypes
|
import ctypes
|
||||||
|
|
||||||
|
youtube_dl_url_key = "youtube_dl_url_key"
|
||||||
|
|
||||||
|
|
||||||
|
class PlaylistTaskLog(object):
|
||||||
|
def __init__(self, task_id, tmp_path=None, max_retry_count=5, to_screen=None):
|
||||||
|
self.to_screen = to_screen or print
|
||||||
|
self._task_id = task_id
|
||||||
|
self._log = {}
|
||||||
|
self._tmp_path = tmp_path or tempfile.gettempdir()
|
||||||
|
self._log_file = os.path.join(self._tmp_path, "{}.pkl".format(self._task_id))
|
||||||
|
self._max_retry_count = max_retry_count
|
||||||
|
self._pkl_exists_before = False
|
||||||
|
# init log
|
||||||
|
self._init_log()
|
||||||
|
|
||||||
|
def _init_log(self):
|
||||||
|
if not os.path.exists(self._log_file):
|
||||||
|
with open(self._log_file, "wb"):
|
||||||
|
pass
|
||||||
|
self._pkl_exists_before = False
|
||||||
|
return
|
||||||
|
|
||||||
|
with open(self._log_file, "rb") as f:
|
||||||
|
self._log = pickle.load(f)
|
||||||
|
self._pkl_exists_before = True
|
||||||
|
return
|
||||||
|
|
||||||
|
def _update_config(self):
|
||||||
|
if not self._log:
|
||||||
|
if os.path.exists(self._log_file):
|
||||||
|
os.remove(self._log_file)
|
||||||
|
return
|
||||||
|
|
||||||
|
with open(self._log_file, "wb") as f:
|
||||||
|
pickle.dump(self._log, f)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def create_id_by_ie_result(cls, ie_result):
|
||||||
|
|
||||||
|
def byteify(object_input):
|
||||||
|
if isinstance(object_input, dict):
|
||||||
|
str_dict = {byteify(key): byteify(value) for key, value in object_input.items()}
|
||||||
|
return ";;;".join(["{}:{}".format(key, str_dict[key]) for key in sorted(str_dict.keys())])
|
||||||
|
elif isinstance(object_input, (list, tuple)):
|
||||||
|
return ";;;".join([byteify(element) for element in sorted(object_input)])
|
||||||
|
else:
|
||||||
|
return "{}".format(object_input)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if youtube_dl_url_key in ie_result:
|
||||||
|
try:
|
||||||
|
return hashlib.md5(ie_result[youtube_dl_url_key]).hexdigest()
|
||||||
|
except Exception:
|
||||||
|
return hashlib.md5(ie_result[youtube_dl_url_key].encode("utf-8")).hexdigest()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
return hashlib.md5(json.dumps(ie_result, sort_keys=True)).hexdigest()
|
||||||
|
except TypeError:
|
||||||
|
return hashlib.md5(byteify(ie_result).encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
# python 2
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
# Python 3
|
||||||
|
def _pop_task_info():
|
||||||
|
if not self._log:
|
||||||
|
return None
|
||||||
|
|
||||||
|
sorted_list = sorted(self._log.keys(), key=lambda x: self._log[x]["count"])
|
||||||
|
|
||||||
|
return self._log[sorted_list[0]]
|
||||||
|
|
||||||
|
while True:
|
||||||
|
task_info = _pop_task_info()
|
||||||
|
|
||||||
|
if task_info is None:
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
return task_info["task"]
|
||||||
|
|
||||||
|
next = __next__ # python 2
|
||||||
|
|
||||||
|
def commit(self, video_id, success=True):
|
||||||
|
if video_id not in self._log:
|
||||||
|
self.to_screen("WARNING: {} not in playlist log!".format(video_id))
|
||||||
|
return
|
||||||
|
|
||||||
|
if success is True:
|
||||||
|
self._log.pop(video_id)
|
||||||
|
else:
|
||||||
|
self._log[video_id]["count"] += 1
|
||||||
|
if self._log[video_id]["count"] > self._max_retry_count:
|
||||||
|
self._log.pop(video_id)
|
||||||
|
|
||||||
|
self._update_config()
|
||||||
|
# self.to_screen("DEBUG: PlaylistTaskLog[{} left] is {}".format(len(self._log), self._log))
|
||||||
|
self.to_screen("DEBUG: left {} videos".format(len(self._log)))
|
||||||
|
|
||||||
|
def add_task(self, number, n_entries, entry, extra):
|
||||||
|
video_id = "{}".format(number)
|
||||||
|
|
||||||
|
if not self._pkl_exists_before and video_id not in self._log:
|
||||||
|
self._log[video_id] = {
|
||||||
|
"count": 0,
|
||||||
|
"task": {"vid": video_id, "number": number, "n_entries": n_entries, "entry": entry, "extra": extra}
|
||||||
|
}
|
||||||
|
|
||||||
|
self._update_config()
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDL(object):
|
class YoutubeDL(object):
|
||||||
"""YoutubeDL class.
|
"""YoutubeDL class.
|
||||||
@ -800,6 +916,12 @@ class YoutubeDL(object):
|
|||||||
}
|
}
|
||||||
self.add_default_extra_info(ie_result, ie, url)
|
self.add_default_extra_info(ie_result, ie, url)
|
||||||
if process:
|
if process:
|
||||||
|
try:
|
||||||
|
if youtube_dl_url_key not in ie_result:
|
||||||
|
ie_result[youtube_dl_url_key] = url
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return self.process_ie_result(ie_result, download, extra_info)
|
return self.process_ie_result(ie_result, download, extra_info)
|
||||||
else:
|
else:
|
||||||
return ie_result
|
return ie_result
|
||||||
@ -972,8 +1094,10 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||||
|
|
||||||
|
_download_log = PlaylistTaskLog(task_id=PlaylistTaskLog.create_id_by_ie_result(ie_result),
|
||||||
|
to_screen=self.to_screen)
|
||||||
|
|
||||||
for i, entry in enumerate(entries, 1):
|
for i, entry in enumerate(entries, 1):
|
||||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
|
||||||
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||||
# minimal changes
|
# minimal changes
|
||||||
if x_forwarded_for:
|
if x_forwarded_for:
|
||||||
@ -997,10 +1121,24 @@ class YoutubeDL(object):
|
|||||||
self.to_screen('[download] ' + reason)
|
self.to_screen('[download] ' + reason)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
entry_result = self.process_ie_result(entry,
|
_download_log.add_task(number=i, n_entries=len(entries), entry=entry, extra=extra)
|
||||||
download=download,
|
|
||||||
extra_info=extra)
|
# try to download all videos
|
||||||
playlist_results.append(entry_result)
|
for task in _download_log:
|
||||||
|
self.to_screen('[download] Downloading No.{} in {} videos'.format(task["number"], task["n_entries"]))
|
||||||
|
|
||||||
|
try:
|
||||||
|
entry_result = self.process_ie_result(task["entry"],
|
||||||
|
download=download,
|
||||||
|
extra_info=task["extra"])
|
||||||
|
except Exception as e:
|
||||||
|
self.to_stderr("ERROR: {}".format(e))
|
||||||
|
_download_log.commit(video_id=task["vid"], success=False)
|
||||||
|
else:
|
||||||
|
playlist_results.append(entry_result)
|
||||||
|
|
||||||
|
_download_log.commit(video_id=task["vid"], success=True)
|
||||||
|
|
||||||
ie_result['entries'] = playlist_results
|
ie_result['entries'] = playlist_results
|
||||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||||
return ie_result
|
return ie_result
|
||||||
|
Loading…
Reference in New Issue
Block a user