From c5e61e32dac1a4f0a324b656e0c9ba45bfae9465 Mon Sep 17 00:00:00 2001
From: felix <felix.von.s@posteo.de>
Date: Mon, 7 Nov 2016 15:45:42 +0100
Subject: [PATCH] [common] extract subtitles info from m3u8 media

_extract_m3u8_formats is renamed to _extract_m3u8_formats_and_subtitles
and extended to properly handle subtitle references; a wrapper with the
old name is provided for compatibility.

_parse_m3u8_formats is likewise renamed and extended, but without adding
the compatibility wrapper; the test suite is adjusted to test the enhanced
method instead.
---
 test/test_InfoExtractor.py     | 29 +++++++++++++++++++++--------
 youtube_dl/extractor/common.py | 27 ++++++++++++++++++++-------
 2 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 71f6608fe..d4f12848c 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -438,7 +438,14 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'tbr': 1467,
                     'width': 1024,
                     'height': 576,
-                }]
+                }],
+                {
+                    'fra': [{
+                        'url': 'http://replayftv-pmd.francetv.fr/subtitles/2017/16/156589847-1492488987.m3u8',
+                        'ext': 'vtt',
+                        'protocol': 'm3u8_webvtt'
+                    }]
+                },
             ),
             (
                 # https://github.com/ytdl-org/youtube-dl/issues/11995
@@ -512,7 +519,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'tbr': 2374,
                     'width': 1024,
                     'height': 576,
-                }]
+                }],
+                {},
             ),
             (
                 # https://github.com/ytdl-org/youtube-dl/issues/12211
@@ -571,7 +579,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'tbr': 1396.736,
                     'width': 854,
                     'height': 480,
-                }]
+                }],
+                {},
             ),
             (
                 # http://www.twitch.tv/riotgames/v/6528877
@@ -641,7 +650,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'tbr': 3214.134,
                     'width': 1280,
                     'height': 720,
-                }]
+                }],
+                {},
             ),
             (
                 # http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
@@ -676,7 +686,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'tbr': 1200,
                     'width': 1280,
                     'height': 720,
-                }]
+                }],
+                {}
             ),
             (
                 # https://github.com/ytdl-org/youtube-dl/issues/18923
@@ -733,17 +744,19 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'acodec': 'none',
                     'width': 1280,
                     'height': 720,
-                }]
+                }],
+                {}
             ),
         ]
 
-        for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
+        for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES:
             with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
                          mode='r', encoding='utf-8') as f:
-                formats = self.ie._parse_m3u8_formats(
+                formats, subs = self.ie._parse_m3u8_formats_and_subtitles(
                     f.read(), m3u8_url, ext='mp4')
                 self.ie._sort_formats(formats)
                 expect_value(self, formats, expected_formats, None)
+                expect_value(self, subs, expected_subs, None)
 
     def test_parse_mpd_formats(self):
         _TEST_CASES = [
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index eaae5e484..824773c6b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1334,7 +1334,6 @@ class InfoExtractor(object):
 
         def _formats_key(f):
             # TODO remove the following workaround
-            from ..utils import determine_ext
             if not f.get('ext') and 'url' in f:
                 f['ext'] = determine_ext(f['url'])
 
@@ -1583,7 +1582,11 @@ class InfoExtractor(object):
             'format_note': 'Quality selection URL',
         }
 
-    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
+    def _extract_m3u8_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_m3u8_formats_and_subtitles(*args, **kwargs)
+        return fmts
+
+    def _extract_m3u8_formats_and_subtitles(self, m3u8_url, video_id, ext=None,
                               entry_protocol='m3u8', preference=None,
                               m3u8_id=None, note=None, errnote=None,
                               fatal=True, live=False, data=None, headers={},
@@ -1595,26 +1598,28 @@ class InfoExtractor(object):
             fatal=fatal, data=data, headers=headers, query=query)
 
         if res is False:
-            return []
+            return [], {}
 
         m3u8_doc, urlh = res
         m3u8_url = urlh.geturl()
 
-        return self._parse_m3u8_formats(
+        return self._parse_m3u8_formats_and_subtitles(
             m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
             preference=preference, m3u8_id=m3u8_id, live=live)
 
-    def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
+    def _parse_m3u8_formats_and_subtitles(self, m3u8_doc, m3u8_url, ext=None,
                             entry_protocol='m3u8', preference=None,
                             m3u8_id=None, live=False):
         if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
-            return []
+            return [], {}
 
         if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc):  # Apple FairPlay
             return []
 
         formats = []
 
+        subtitles = {}
+
         format_url = lambda u: (
             u
             if re.match(r'^https?://', u)
@@ -1655,6 +1660,14 @@ class InfoExtractor(object):
             if not (media_type and group_id and name):
                 return
             groups.setdefault(group_id, []).append(media)
+            # <https://tools.ietf.org/html/draft-pantos-http-live-streaming-13#section-3.4.9>
+            if media_type == 'SUBTITLES':
+                lang = media['LANGUAGE'] # XXX: normalise?
+                sub_info = {
+                    'url': media['URI'],
+                    'ext': determine_ext(media['URI'])
+                }
+                subtitles.setdefault(lang, []).append(sub_info)
             if media_type not in ('VIDEO', 'AUDIO'):
                 return
             media_url = media.get('URI')
@@ -1780,7 +1793,7 @@ class InfoExtractor(object):
                     formats.append(http_f)
 
                 last_stream_inf = {}
-        return formats
+        return formats, subtitles
 
     @staticmethod
     def _xpath_ns(path, namespace=None):