1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-09 09:39:57 +08:00

[postprocessor/ffmpeg] Support for DCSubtitle (XML) format

This commit is contained in:
Forthrin 2019-05-08 13:27:37 +02:00
parent 71ebd35d50
commit faa5b2ed54
2 changed files with 37 additions and 2 deletions

View File

@ -19,6 +19,7 @@ from ..utils import (
shell_quote,
subtitles_filename,
dfxp2srt,
dc2srt,
ISO639Utils,
replace_extension,
)
@ -610,7 +611,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
sub_filenames.append(old_file)
new_file = subtitles_filename(filename, lang, new_ext)
if ext in ('dfxp', 'ttml', 'tt'):
if ext in ('dfxp', 'ttml', 'tt', 'xml'):
self._downloader.report_warning(
'You have requested to convert dfxp (TTML) subtitles into another format, '
'which results in style information loss')
@ -619,7 +620,12 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
srt_file = subtitles_filename(filename, lang, 'srt')
with open(dfxp_file, 'rb') as f:
srt_data = dfxp2srt(f.read())
file = f.read()
if ext == 'xml':
srt_data = dc2srt(file)
else:
srt_data = dfxp2srt(file)
with io.open(srt_file, 'wt', encoding='utf-8') as f:
f.write(srt_data)

View File

@ -2721,6 +2721,35 @@ def match_filter_func(filter_str):
return _match_func
def dc_time_to_srt_time(dc_time):
return '{0:}:{1:}:{2:},{3:}'.format(*dc_time.split(':'))
def parse_dc_subtitles(dc):
subs = []
root = xml.etree.ElementTree.fromstring(dc)
font = root.find('Font')
for subtitle in font.findall('Subtitle'):
subs.append({
'number': subtitle.attrib['SpotNumber'],
'start': subtitle.attrib['TimeIn'],
'end': subtitle.attrib['TimeOut'],
'text': '\n'.join([text.text for text in subtitle.findall('Text')]),
})
return subs
def dc2srt(dc):
subs = parse_dc_subtitles(dc)
srt = []
for sub in subs:
srt.append(sub['number'])
srt.append(dc_time_to_srt_time(sub['start']) + ' --> ' + dc_time_to_srt_time(sub['end']))
srt.append(sub['text'])
srt.append('')
return '\n'.join(srt)
def parse_dfxp_time_expr(time_expr):
if not time_expr:
return