From 4a27e3da814b486c8221d0e080402476892e403c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9stin=20Reed?= Date: Sat, 20 Aug 2016 15:40:02 +0200 Subject: [PATCH] [utils] Add support for DCSubtitle --- test/test_utils.py | 33 ++++++++++++++++++++++++++++++ youtube_dl/postprocessor/ffmpeg.py | 25 ++++++++++++++++++++++ youtube_dl/utils.py | 26 +++++++++++++++++++++++ 3 files changed, 84 insertions(+) diff --git a/test/test_utils.py b/test/test_utils.py index d16ea7f77..74dba496b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -79,6 +79,7 @@ from youtube_dl.utils import ( match_str, parse_dfxp_time_expr, dfxp2srt, + dc2srt, cli_option, cli_valueless_option, cli_bool_option, @@ -970,6 +971,38 @@ The first line ''' self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) + def test_dc2srt(self): + dc_data = ''' + + id + title + 1 + English + + + ^_^ + second line + + + single line + + + ''' + srt_data = '''1 +00:00:05,000 --> 00:00:08,356 +^_^ +second line + + +2 +00:00:08,356 --> 00:00:09,000 +single line + + +''' + + self.assertEqual(dc2srt(dc_data), srt_data) + def test_cli_option(self): self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index fa99b0c2a..ced06da86 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -21,6 +21,7 @@ from ..utils import ( shell_quote, subtitles_filename, dfxp2srt, + dc2srt, ISO639Utils, ) @@ -568,6 +569,30 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): continue else: sub_filenames.append(srt_file) + # TODO: Distinguish between different xml-formats + elif ext == 'xml': + self._downloader.report_warning( + 'You have requested to convert DC (XML) subtitles into another format, ' + 'which results in style information loss') + + dc_file = old_file + srt_file = subtitles_filename(filename, lang, 'srt') + + with io.open(dc_file, 'rt', encoding='utf-8') as f: + srt_data = dc2srt(f.read()) + + with io.open(srt_file, 'wt', encoding='utf-8') as f: + f.write(srt_data) + old_file = srt_file + + subs[lang] = { + 'ext': 'srt', + 'data': srt_data, + } + + if new_ext == 'srt': + continue + sub_filenames.append(srt_file) self.run_ffmpeg(old_file, new_file, ['-f', new_format]) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 41ca562f1..ead2ce471 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2432,6 +2432,32 @@ def dfxp2srt(dfxp_data): return ''.join(out) +# See https://web.archive.org/web/20140924175755/http://www.dlp.com/downloads/pdf_dlp_cinema_CineCanvas_Rev_C.pdf +def dc2srt(xml_data): + xml = compat_etree_fromstring(xml_data.encode('utf-8')) + out = [] + subtitles = xml.find('Font').findall('Subtitle') + + if not subtitles: + raise ValueError('Invalid DC/XML subtitle') + + for subtitle, index in zip(subtitles, itertools.count(1)): + begin_time = parse_dfxp_time_expr(subtitle.attrib.get('TimeIn')) + end_time = parse_dfxp_time_expr(subtitle.attrib.get('TimeOut')) + if not begin_time or not end_time: + continue + + text = '' + for line in subtitle.findall('Text'): + text += line.text + '\n' + + out.append('%d\n%s --> %s\n%s\n\n' % ( + index, + srt_subtitles_timecode(begin_time), + srt_subtitles_timecode(end_time), + text)) + + return ''.join(out) def cli_option(params, command_option, param): param = params.get(param)