From cbb5155060f597fd1e2fb54defd76bc97e94267b Mon Sep 17 00:00:00 2001 From: Niklas Date: Tue, 2 Oct 2018 18:21:48 +0200 Subject: [PATCH 1/4] Merge TTML subtitle cues with same timecodes while converting to SRT --- youtube_dl/utils.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e84d35d4d..17ea7bc09 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2831,6 +2831,9 @@ def dfxp2srt(dfxp_data): continue default_style.update(style) + last_begin_time = None + last_end_time = None + for para, index in zip(paras, itertools.count(1)): begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) end_time = parse_dfxp_time_expr(para.attrib.get('end')) @@ -2841,12 +2844,20 @@ def dfxp2srt(dfxp_data): if not dur: continue end_time = begin_time + dur - out.append('%d\n%s --> %s\n%s\n\n' % ( - index, - srt_subtitles_timecode(begin_time), - srt_subtitles_timecode(end_time), - parse_node(para))) + if begin_time == last_begin_time and end_time == last_end_time: + out.append('%s\n' % (parse_node(para))) + else: + out.append('\n%d\n%s --> %s\n%s\n' % ( + index, + srt_subtitles_timecode(begin_time), + srt_subtitles_timecode(end_time), + parse_node(para))) + + last_begin_time = begin_time + last_end_time = end_time + + out.append('\n') return ''.join(out) From 405947c32e6975c854e28e69836393b275e88539 Mon Sep 17 00:00:00 2001 From: Niklas Date: Tue, 2 Oct 2018 19:10:33 +0200 Subject: [PATCH 2/4] Reflect merged cues in indexes --- youtube_dl/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 17ea7bc09..c9586e9b1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2833,6 +2833,7 @@ def dfxp2srt(dfxp_data): last_begin_time = None last_end_time = None + index_offset = 0 for para, index in zip(paras, itertools.count(1)): begin_time = parse_dfxp_time_expr(para.attrib.get('begin')) @@ -2846,10 +2847,10 @@ def dfxp2srt(dfxp_data): end_time = begin_time + dur if begin_time == last_begin_time and end_time == last_end_time: + index_offset += 1 out.append('%s\n' % (parse_node(para))) else: - out.append('\n%d\n%s --> %s\n%s\n' % ( - index, + index - index_offset, srt_subtitles_timecode(begin_time), srt_subtitles_timecode(end_time), parse_node(para))) From 7ecd95aa9164ad47252c6fa134692f3d9e3e8d49 Mon Sep 17 00:00:00 2001 From: Niklas Date: Tue, 2 Oct 2018 19:11:12 +0200 Subject: [PATCH 3/4] Don't add preceding newline when converting TTML to SRT --- youtube_dl/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c9586e9b1..cec206742 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2850,6 +2850,9 @@ def dfxp2srt(dfxp_data): index_offset += 1 out.append('%s\n' % (parse_node(para))) else: + if out: + out.append('\n') + out.append('%d\n%s --> %s\n%s\n' % ( index - index_offset, srt_subtitles_timecode(begin_time), srt_subtitles_timecode(end_time), From 582e1d10e7ee52bd177fca3298792c329e22340f Mon Sep 17 00:00:00 2001 From: Niklas Date: Tue, 2 Oct 2018 20:33:40 +0200 Subject: [PATCH 4/4] Update TTML->SRT conversion test --- test/test_utils.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 9e28e008f..589a1c2bb 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1196,18 +1196,15 @@ The first line srt_data = '''1 00:00:02,080 --> 00:00:05,839 default stylecustom style - -2 -00:00:02,080 --> 00:00:05,839 part 1 part 2 -3 +2 00:00:05,839 --> 00:00:09,560 line 3 part 3 -4 +3 00:00:09,560 --> 00:00:12,359 inner style