From faa5b2ed5463c815be7ce4b48f0fb197f1978744 Mon Sep 17 00:00:00 2001
From: Forthrin <forthrin@users.noreply.github.com>
Date: Wed, 8 May 2019 13:27:37 +0200
Subject: [PATCH] [postprocessor/ffmpeg] Support for DCSubtitle (XML) format

---
 youtube_dl/postprocessor/ffmpeg.py | 10 ++++++++--
 youtube_dl/utils.py                | 29 +++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 5bcb00ac0..3c29045d6 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -19,6 +19,7 @@ from ..utils import (
     shell_quote,
     subtitles_filename,
     dfxp2srt,
+    dc2srt,
     ISO639Utils,
     replace_extension,
 )
@@ -610,7 +611,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
             sub_filenames.append(old_file)
             new_file = subtitles_filename(filename, lang, new_ext)
 
-            if ext in ('dfxp', 'ttml', 'tt'):
+            if ext in ('dfxp', 'ttml', 'tt', 'xml'):
                 self._downloader.report_warning(
                     'You have requested to convert dfxp (TTML) subtitles into another format, '
                     'which results in style information loss')
@@ -619,7 +620,12 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
                 srt_file = subtitles_filename(filename, lang, 'srt')
 
                 with open(dfxp_file, 'rb') as f:
-                    srt_data = dfxp2srt(f.read())
+                    file = f.read()
+
+                if ext == 'xml':
+                    srt_data = dc2srt(file)
+                else:
+                    srt_data = dfxp2srt(file)
 
                 with io.open(srt_file, 'wt', encoding='utf-8') as f:
                     f.write(srt_data)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 71713f63a..5069e170f 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2721,6 +2721,35 @@ def match_filter_func(filter_str):
     return _match_func
 
 
+def dc_time_to_srt_time(dc_time):
+    return '{0:}:{1:}:{2:},{3:}'.format(*dc_time.split(':'))
+
+
+def parse_dc_subtitles(dc):
+    subs = []
+    root = xml.etree.ElementTree.fromstring(dc)
+    font = root.find('Font')
+    for subtitle in font.findall('Subtitle'):
+        subs.append({
+            'number': subtitle.attrib['SpotNumber'],
+            'start': subtitle.attrib['TimeIn'],
+            'end': subtitle.attrib['TimeOut'],
+            'text': '\n'.join([text.text for text in subtitle.findall('Text')]),
+        })
+    return subs
+
+
+def dc2srt(dc):
+    subs = parse_dc_subtitles(dc)
+    srt = []
+    for sub in subs:
+        srt.append(sub['number'])
+        srt.append(dc_time_to_srt_time(sub['start']) + ' --> ' + dc_time_to_srt_time(sub['end']))
+        srt.append(sub['text'])
+        srt.append('')
+    return '\n'.join(srt)
+
+
 def parse_dfxp_time_expr(time_expr):
     if not time_expr:
         return