From 94c7e182e2182c8cc733d513063d1c8870af8c38 Mon Sep 17 00:00:00 2001 From: Frank Wang Date: Thu, 25 Apr 2019 22:19:39 +0800 Subject: [PATCH 1/3] [linkedin:learning] add support for subtitle --- youtube_dl/extractor/linkedin.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dl/extractor/linkedin.py index 26fc703d1..25d1e01ad 100644 --- a/youtube_dl/extractor/linkedin.py +++ b/youtube_dl/extractor/linkedin.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +from itertools import zip_longest from .common import InfoExtractor from ..utils import ( @@ -126,15 +127,37 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr')) + duration = int_or_none(video_data.get('durationInSeconds')) + subtitles = self.extract_subtitles(video_data['transcript'], duration) + return { 'id': self._get_video_id(video_data, course_slug, video_slug), 'title': title, 'formats': formats, + 'subtitles': subtitles, 'thumbnail': video_data.get('defaultThumbnail'), 'timestamp': float_or_none(video_data.get('publishedOn'), 1000), - 'duration': int_or_none(video_data.get('durationInSeconds')), + 'duration': duration, } + def _get_subtitles(self, transcript, duration): + counter = 0 + records = [] + if transcript and 'lines' in transcript: + lines = transcript['lines'] + for curr, next in zip_longest(lines, lines[1:], fillvalue={"transcriptStartAt": duration*1000, "caption": "THE END"}): + text = curr["caption"].strip() + if text: + counter += 1 + show = curr["transcriptStartAt"] + hide = next["transcriptStartAt"] + show = "{:02d}:{:02d}:{:02d},{:03d}".format(show//3600000, show%3600000//60000, show%60000//1000, show%1000) + hide = "{:02d}:{:02d}:{:02d},{:03d}".format(hide//3600000, hide%3600000//60000, hide%60000//1000, hide%1000) + records.append('%s\r\n%s --> %s\r\n%s\r\n' % (counter, show, hide, text)) + return {'en': [{'ext': 'srt', 'data': '\r\n'.join(records)}]} + else: + return {} + class LinkedInLearningCourseIE(LinkedInLearningBaseIE): IE_NAME = 'linkedin:learning:course' From 4284081fdae1e599ed62afe8558a0adc39cd9654 Mon Sep 17 00:00:00 2001 From: Frank Wang Date: Thu, 25 Apr 2019 22:30:31 +0800 Subject: [PATCH 2/3] [linkedin:learning] add support for subtitle checked the code with flake8 --- youtube_dl/extractor/linkedin.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dl/extractor/linkedin.py index 25d1e01ad..690efc15c 100644 --- a/youtube_dl/extractor/linkedin.py +++ b/youtube_dl/extractor/linkedin.py @@ -145,14 +145,14 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): records = [] if transcript and 'lines' in transcript: lines = transcript['lines'] - for curr, next in zip_longest(lines, lines[1:], fillvalue={"transcriptStartAt": duration*1000, "caption": "THE END"}): + for curr, next in zip_longest(lines, lines[1:], fillvalue={"transcriptStartAt": duration * 1000, "caption": "THE END"}): text = curr["caption"].strip() if text: counter += 1 show = curr["transcriptStartAt"] hide = next["transcriptStartAt"] - show = "{:02d}:{:02d}:{:02d},{:03d}".format(show//3600000, show%3600000//60000, show%60000//1000, show%1000) - hide = "{:02d}:{:02d}:{:02d},{:03d}".format(hide//3600000, hide%3600000//60000, hide%60000//1000, hide%1000) + show = "{:02d}:{:02d}:{:02d},{:03d}".format(show // 3600000, show % 3600000 // 60000, show % 60000 // 1000, show % 1000) + hide = "{:02d}:{:02d}:{:02d},{:03d}".format(hide // 3600000, hide % 3600000 // 60000, hide % 60000 // 1000, hide % 1000) records.append('%s\r\n%s --> %s\r\n%s\r\n' % (counter, show, hide, text)) return {'en': [{'ext': 'srt', 'data': '\r\n'.join(records)}]} else: From 4157ae7d5c110ccd385e0f8512914c6d54166c0e Mon Sep 17 00:00:00 2001 From: Frank Wang Date: Fri, 26 Apr 2019 09:40:32 +0800 Subject: [PATCH 3/3] [linkedin:learning] add support for subtitle fix import error in Python 2.X --- youtube_dl/extractor/linkedin.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dl/extractor/linkedin.py index 690efc15c..e6b51a73b 100644 --- a/youtube_dl/extractor/linkedin.py +++ b/youtube_dl/extractor/linkedin.py @@ -2,7 +2,12 @@ from __future__ import unicode_literals import re -from itertools import zip_longest + +try: + from itertools import zip_longest +except ImportError: + from itertools import izip_longest as zip_longest + from .common import InfoExtractor from ..utils import (