1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-15 01:57:16 +08:00

[utils] fix dfxp2srt conversion and apply size and color(fixes #8055)

This commit is contained in:
remitamine 2016-01-28 01:11:09 +01:00
parent ed7cd1e859
commit 59d2c7917b

View File

@ -2015,30 +2015,75 @@ def dfxp2srt(dfxp_data):
_x = functools.partial(xpath_with_ns, ns_map={ _x = functools.partial(xpath_with_ns, ns_map={
'ttml': 'http://www.w3.org/ns/ttml', 'ttml': 'http://www.w3.org/ns/ttml',
'ttaf1': 'http://www.w3.org/2006/10/ttaf1', 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
'ttmltts': 'http://www.w3.org/ns/ttml#styling',
'ttaf1tts': 'http://www.w3.org/2006/10/ttaf1#styling',
'ttaf1ttsg': 'http://www.w3.org/2006/10/ttaf1#style',
}) })
def parse_node(node): def get_node_style(node, styles):
str_or_empty = functools.partial(str_or_none, default='') style = styles.get(node.get('style'), {}).copy()
for ttml_attr, srt_attr in {'color': 'color', 'fontSize': 'size'}.items():
val = node.get(_x('ttmltts:%s') % ttml_attr) or node.get(_x('ttaf1tts:%s') % ttml_attr) or node.get(_x('ttaf1ttsg:%s') % ttml_attr)
if val:
style[srt_attr] = val
return style
out = str_or_empty(node.text) str_or_empty = functools.partial(str_or_none, default='')
for child in node: class TTMLElementParser:
if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'): out = ''
out += '\n' + str_or_empty(child.tail) style = {}
elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
out += str_or_empty(parse_node(child))
else:
out += str_or_empty(xml.etree.ElementTree.tostring(child))
def start(self, tag, attrib):
if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
self.out += '\n'
elif tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
self.style = get_node_style(attrib, styles)
if self.style:
attrib = ''
for key, val in self.style.items():
attrib += '%s="%s" ' % (key, val)
self.out += '<font %s>' % attrib
def end(self, tag):
if tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
if self.style:
self.out += '</font>'
def data(self, data):
self.out += str_or_empty(data)
def close(self):
return self.out.strip()
def parse_node(node, default_style, styles):
style = get_node_style(node, styles) or default_style
target = TTMLElementParser()
parser = xml.etree.ElementTree.XMLParser(target=target)
parser.feed(xml.etree.ElementTree.tostring(node))
out = parser.close()
if out and style:
attrib = ''
for key, val in style.items():
attrib += '%s="%s" ' % (key, val)
out = '<font %s>%s</font>' % (attrib, out)
return out return out
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
out = [] out = []
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
if not paras: if not paras:
raise ValueError('Invalid dfxp/TTML subtitle') raise ValueError('Invalid dfxp/TTML subtitle')
styles = {}
style_nodes = dfxp.findall(_x('.//ttml:style')) or dfxp.findall(_x('.//ttaf1:style'))
if style_nodes:
for i in range(2):
for style_node in style_nodes:
styles[style_node.get('id')] = get_node_style(style_node, styles)
default_style = get_node_style(xpath_element(dfxp, [_x('.//ttml:body'), _x('.//ttaf1:body'), 'body']), styles) or get_node_style(xpath_element(dfxp, [_x('.//ttml:div'), _x('.//ttaf1:div'), 'div']), styles)
for para, index in zip(paras, itertools.count(1)): for para, index in zip(paras, itertools.count(1)):
begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
end_time = parse_dfxp_time_expr(para.attrib.get('end')) end_time = parse_dfxp_time_expr(para.attrib.get('end'))
@ -2053,7 +2098,7 @@ def dfxp2srt(dfxp_data):
index, index,
srt_subtitles_timecode(begin_time), srt_subtitles_timecode(begin_time),
srt_subtitles_timecode(end_time), srt_subtitles_timecode(end_time),
parse_node(para))) parse_node(para, default_style, styles)))
return ''.join(out) return ''.join(out)