From 54269370755d7cb2462a0615a4ef5a3f2c050045 Mon Sep 17 00:00:00 2001
From: Niklas Sombert <niklas@ytvwld.de>
Date: Thu, 18 Jan 2018 13:29:18 +0100
Subject: [PATCH 1/6] [hhu] Add new extractor

---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/hhu.py        | 72 ++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 youtube_dl/extractor/hhu.py
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 44120cae2..81021d5b5 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -438,6 +438,7 @@ from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
 from .hgtv import HGTVComShowIE
+from .hhu import HHUIE
 from .hketv import HKETVIE
 from .hidive import HiDiveIE
 from .historicfilms import HistoricFilmsIE
diff --git a/youtube_dl/extractor/hhu.py b/youtube_dl/extractor/hhu.py
new file mode 100644
index 000000000..5ecf4a9bb
--- /dev/null
+++ b/youtube_dl/extractor/hhu.py
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class HHUIE(InfoExtractor):
+    _VALID_URL = r'https://mediathek\.hhu\.de/watch/(?P<id>.+)'
+    _TEST = {
+        'url': 'https://mediathek.hhu.de/watch/2dd05982-ea45-4108-9620-0c36e6ed8df5',
+        'md5': 'b99ff77f2148b1e754555abdf53f0e51',
+        'info_dict': {
+            'id': '2dd05982-ea45-4108-9620-0c36e6ed8df5',
+            'ext': 'mp4',
+            'title': 'Das Multimediazentrum',
+            'description': '',
+            'uploader_id': 'clames',
+            'thumbnail': 'https://mediathek.hhu.de/thumbs/2dd05982-ea45-4108-9620-0c36e6ed8df5/thumb_000.jpg',
+        }
+    }
+
+    def _real_extract(self, url):
+        # TODO: Login for some videos.
+        video_id = self._match_id(url)
+        webpage, webpage_url = self._download_webpage_handle(url, video_id)
+        if webpage_url.geturl().startswith("https://sts."):
+            self.raise_login_required()
+        file_id = self._html_search_regex(
+            r"{ file: '\/movies\/(.+?)\/v_100\.mp4', label: '",
+            webpage, 'file_id'
+        )
+        formats = [
+            ({'url': format_url.format(file_id)})
+            for format_url in (
+                'https://mediathek.hhu.de/movies/{}/v_10.webm',
+                'https://mediathek.hhu.de/movies/{}/v_10.mp4',
+                'https://mediathek.hhu.de/movies/{}/v_50.webm',
+                'https://mediathek.hhu.de/movies/{}/v_50.mp4',
+                'https://mediathek.hhu.de/movies/{}/v_100.webm',
+                'https://mediathek.hhu.de/movies/{}/v_100.mp4',
+            )
+        ]
+        try:
+            title = self._og_search_title(webpage)
+        except:
+            title = self._html_search_regex(
+                r'<h1 id="mt_watch-headline-title">\s+(.+?)\s+<\/h1>',
+                webpage, 'title'
+            )
+        try:
+            description = self._og_search_description(webpage)
+        except:
+            description = self._html_search_regex(
+                r'<p id="mt_watch-description" class="watch-description">\s+(.+?)\s+<\/p>',
+                webpage, 'description', fatal=False
+            )
+        thumbnail = self._og_search_property(
+            'image:secure_url', webpage, 'thumbnail'
+        )
+        uploader_id = self._html_search_regex(
+            r'<a id="mt_content_placeholder_videoinfo_createdby" class="author" href=".+">(.+?)<\/a>',
+            webpage, 'uploader', fatal=False
+        )
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'uploader_id': uploader_id,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }

From f08371c07ced991c580412e0b1672e52a4e1e5b5 Mon Sep 17 00:00:00 2001
From: Niklas Sombert <niklas@ytvwld.de>
Date: Wed, 2 Oct 2019 21:18:36 +0200
Subject: [PATCH 2/6] [hhu] Parse video player config

---
 youtube_dl/extractor/hhu.py | 98 ++++++++++++++++++++++++++-----------
 1 file changed, 69 insertions(+), 29 deletions(-)

diff --git a/youtube_dl/extractor/hhu.py b/youtube_dl/extractor/hhu.py
index 5ecf4a9bb..c994662e2 100644
--- a/youtube_dl/extractor/hhu.py
+++ b/youtube_dl/extractor/hhu.py
@@ -2,6 +2,10 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
+from ..utils import js_to_json, RegexNotFoundError, urljoin
+
+import json
+import re
 
 
 class HHUIE(InfoExtractor):
@@ -20,53 +24,89 @@ class HHUIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        # TODO: Login for some videos.
         video_id = self._match_id(url)
         webpage, webpage_url = self._download_webpage_handle(url, video_id)
         if webpage_url.geturl().startswith("https://sts."):
             self.raise_login_required()
-        file_id = self._html_search_regex(
-            r"{ file: '\/movies\/(.+?)\/v_100\.mp4', label: '",
-            webpage, 'file_id'
-        )
-        formats = [
-            ({'url': format_url.format(file_id)})
-            for format_url in (
-                'https://mediathek.hhu.de/movies/{}/v_10.webm',
-                'https://mediathek.hhu.de/movies/{}/v_10.mp4',
-                'https://mediathek.hhu.de/movies/{}/v_50.webm',
-                'https://mediathek.hhu.de/movies/{}/v_50.mp4',
-                'https://mediathek.hhu.de/movies/{}/v_100.webm',
-                'https://mediathek.hhu.de/movies/{}/v_100.mp4',
-            )
-        ]
+            # Some videos need a login, maybe TODO.
         try:
-            title = self._og_search_title(webpage)
-        except:
+            config_js = self._search_regex(
+                r'playerInstance\.setup\(([^;]+)\);', webpage, 'config_js'
+            )
+            # remove 'link: encodeURI("<our url>"),'
+            if 'link: encodeURI' in config_js:
+                encode_begin = config_js.find('link: encodeURI')
+                encode_end = config_js.find(')', encode_begin)
+                config_js = (
+                    config_js[:encode_begin] + config_js[encode_end + 2:]
+                )
+                del encode_begin, encode_end
+            config = json.loads(js_to_json(config_js))
+            if len(config['playlist']) > 1:
+                self.report_warning(
+                    'more than one video, just taking the first one'
+                )
+            video = config['playlist'][0]
+            formats = [
+                {
+                    'url': urljoin('https://mediathek.hhu.de/', source['file']),
+                    'format_note': source.get('label'),
+                    'format_id': source['file'].split("/")[-1],
+                }
+                for source in video['sources']
+            ]
+            formats.reverse()  # config sorts from highest to lowest quality
+            title = video.get('title')
+            thumbnail = video.get('image')
+            thumbnail = urljoin('https://mediathek.hhu.de/', thumbnail) if thumbnail else None
+
+        except (RegexNotFoundError, ValueError):
+            self.report_warning('failed to get player config, guessing formats')
+            # This will likely work but better warn.
+            file_id = self._html_search_regex(
+                r"{ file: '\/movies\/(.+?)\/v_100\.mp4', label: '",
+                webpage, 'file_id'
+            )
+            formats = [
+                ({'url': format_url.format(file_id)})
+                for format_url in (
+                    'https://mediathek.hhu.de/movies/{}/v_10.webm',
+                    'https://mediathek.hhu.de/movies/{}/v_10.mp4',
+                    'https://mediathek.hhu.de/movies/{}/v_50.webm',
+                    'https://mediathek.hhu.de/movies/{}/v_50.mp4',
+                    'https://mediathek.hhu.de/movies/{}/v_100.webm',
+                    'https://mediathek.hhu.de/movies/{}/v_100.mp4',
+                )
+            ]
+            title = thumbnail = None
+        if not title:
             title = self._html_search_regex(
                 r'<h1 id="mt_watch-headline-title">\s+(.+?)\s+<\/h1>',
                 webpage, 'title'
             )
-        try:
-            description = self._og_search_description(webpage)
-        except:
-            description = self._html_search_regex(
-                r'<p id="mt_watch-description" class="watch-description">\s+(.+?)\s+<\/p>',
-                webpage, 'description', fatal=False
-            )
-        thumbnail = self._og_search_property(
-            'image:secure_url', webpage, 'thumbnail'
+        if not title:
+            title = self._og_search_title(webpage, fatal=False)
+        description = self._html_search_regex(
+            r'<p id="mt_watch-description" class="watch-description">\s+(.+?)\s+<\/p>',
+            webpage, 'description', fatal=False
         )
-        uploader_id = self._html_search_regex(
+        if not description:
+            description = self._og_search_description(webpage, default='')
+        if not thumbnail:
+            thumbnail = self._og_search_property(
+                'image:secure_url', webpage, 'thumbnail', fatal=False
+            )
+        uploader = self._html_search_regex(
             r'<a id="mt_content_placeholder_videoinfo_createdby" class="author" href=".+">(.+?)<\/a>',
             webpage, 'uploader', fatal=False
         )
 
+
         return {
             'id': video_id,
             'title': title,
             'description': description,
-            'uploader_id': uploader_id,
+            'uploader': uploader,
             'thumbnail': thumbnail,
             'formats': formats,
         }

From 9f07fb23820cc962e365c4f712c9da7c81e9841d Mon Sep 17 00:00:00 2001
From: Niklas Sombert <niklas@ytvwld.de>
Date: Wed, 2 Oct 2019 21:18:57 +0200
Subject: [PATCH 3/6] [hhu] Add more details

---
 youtube_dl/extractor/hhu.py | 57 +++++++++++++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/hhu.py b/youtube_dl/extractor/hhu.py
index c994662e2..ca4a36e18 100644
--- a/youtube_dl/extractor/hhu.py
+++ b/youtube_dl/extractor/hhu.py
@@ -2,7 +2,9 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..utils import js_to_json, RegexNotFoundError, urljoin
+from ..utils import (
+    js_to_json, RegexNotFoundError, urljoin, get_element_by_id, unified_strdate
+)
 
 import json
 import re
@@ -18,7 +20,18 @@ class HHUIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Das Multimediazentrum',
             'description': '',
+            'categories': ['Imagefilme'],
+            'tags': [
+                'MMZ', 'Multimediazentrum', 'Heinrich-Heine-Universität',
+                'UKD', 'eLearning', 'Abstimmsysteme', 'Portale',
+                'Studierendenportal', 'Lehrfilme', 'Lehrfilm',
+                'Operationsfilme', 'Vorlesungsaufzeichnung', 'Multimedia',
+                'ZIM', 'HHU', 'Ute', 'Clames',  # yes, that's incorrect
+            ],
+            'uploader': 'clames',
             'uploader_id': 'clames',
+            'license': 'CC BY 3.0 DE',
+            'upload_date': '20150126',
             'thumbnail': 'https://mediathek.hhu.de/thumbs/2dd05982-ea45-4108-9620-0c36e6ed8df5/thumb_000.jpg',
         }
     }
@@ -100,13 +113,53 @@ class HHUIE(InfoExtractor):
             r'<a id="mt_content_placeholder_videoinfo_createdby" class="author" href=".+">(.+?)<\/a>',
             webpage, 'uploader', fatal=False
         )
-
+        uploader_id = self._html_search_regex(
+            r'<a id="mt_content_placeholder_videoinfo_createdby" class="author" href="/user/(.+)">.+?<\/a>',
+            webpage, 'uploader_id', fatal=False
+        )
+        # CC licenses get a image with an appropriate alt text
+        license_img = get_element_by_id('mt_watch_license', webpage)
+        if license_img:
+            license = self._search_regex(
+                r'alt="(.+)"', license_img, 'license_img', fatal=False
+            )
+        if not license_img or not license:
+            # other licenses are just text
+            license = self._html_search_regex(
+                r'<div id="mt_content_placeholder_videotabs_mt_videotabs_formview_video_license" class="video-license">(.+)<\/div>',
+                webpage, 'license_text', fatal=False
+            )
+        upload_date = _date(self._html_search_regex(
+            r'<span class="watch-information-date added">(.+?)<\/span>',
+            webpage, 'upload_date', fatal=False
+        ))
+        category = self._html_search_regex(
+            r'<a href="/category/.+">(.+)</a>', webpage, 'category', fatal=False
+        )
+        tags_html = get_element_by_id('mt_watch_info_tag_list', webpage)
+        tags = _tags(tags_html)
 
         return {
             'id': video_id,
             'title': title,
             'description': description,
+            'license': license,
+            'categories': [category],  # there's just one category per video
+            'tags': tags,
             'uploader': uploader,
+            'uploader_id': uploader_id,
+            'upload_date': upload_date,
             'thumbnail': thumbnail,
             'formats': formats,
         }
+
+
+def _date(str_containing_date):
+    """Parse the string 'at (M)M/(D)D/YYYY' to YYYYMMDD."""
+    return unified_strdate(str_containing_date.split(' ')[1], day_first=False)
+
+
+def _tags(tags_html):
+    """Parse the HTML markup containing the tags."""
+    matches = re.findall(r'<a.+>(.+)<\/a>', tags_html)
+    return [match.rstrip(',') for match in matches]

From 233400f3d92dd3ac8ef4664f9679eb09c9132d98 Mon Sep 17 00:00:00 2001
From: Niklas Sombert <niklas@ytvwld.de>
Date: Wed, 9 Oct 2019 20:04:51 +0200
Subject: [PATCH 4/6] [hhu] Don't place closing braces/brackets/parentheses on
 their own lines

---
 youtube_dl/extractor/hhu.py | 59 ++++++++++++-------------------------
 1 file changed, 19 insertions(+), 40 deletions(-)

diff --git a/youtube_dl/extractor/hhu.py b/youtube_dl/extractor/hhu.py
index ca4a36e18..a23ae7d96 100644
--- a/youtube_dl/extractor/hhu.py
+++ b/youtube_dl/extractor/hhu.py
@@ -26,15 +26,12 @@ class HHUIE(InfoExtractor):
                 'UKD', 'eLearning', 'Abstimmsysteme', 'Portale',
                 'Studierendenportal', 'Lehrfilme', 'Lehrfilm',
                 'Operationsfilme', 'Vorlesungsaufzeichnung', 'Multimedia',
-                'ZIM', 'HHU', 'Ute', 'Clames',  # yes, that's incorrect
-            ],
+                'ZIM', 'HHU', 'Ute', 'Clames', ],  # yes, that's incorrect
             'uploader': 'clames',
             'uploader_id': 'clames',
             'license': 'CC BY 3.0 DE',
             'upload_date': '20150126',
-            'thumbnail': 'https://mediathek.hhu.de/thumbs/2dd05982-ea45-4108-9620-0c36e6ed8df5/thumb_000.jpg',
-        }
-    }
+            'thumbnail': 'https://mediathek.hhu.de/thumbs/2dd05982-ea45-4108-9620-0c36e6ed8df5/thumb_000.jpg', }}
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -44,30 +41,25 @@ class HHUIE(InfoExtractor):
             # Some videos need a login, maybe TODO.
         try:
             config_js = self._search_regex(
-                r'playerInstance\.setup\(([^;]+)\);', webpage, 'config_js'
-            )
+                r'playerInstance\.setup\(([^;]+)\);', webpage, 'config_js')
             # remove 'link: encodeURI("<our url>"),'
             if 'link: encodeURI' in config_js:
                 encode_begin = config_js.find('link: encodeURI')
                 encode_end = config_js.find(')', encode_begin)
                 config_js = (
-                    config_js[:encode_begin] + config_js[encode_end + 2:]
-                )
+                    config_js[:encode_begin] + config_js[encode_end + 2:])
                 del encode_begin, encode_end
             config = json.loads(js_to_json(config_js))
             if len(config['playlist']) > 1:
                 self.report_warning(
-                    'more than one video, just taking the first one'
-                )
+                    'more than one video, just taking the first one')
             video = config['playlist'][0]
             formats = [
                 {
                     'url': urljoin('https://mediathek.hhu.de/', source['file']),
                     'format_note': source.get('label'),
-                    'format_id': source['file'].split("/")[-1],
-                }
-                for source in video['sources']
-            ]
+                    'format_id': source['file'].split("/")[-1], }
+                for source in video['sources']]
             formats.reverse()  # config sorts from highest to lowest quality
             title = video.get('title')
             thumbnail = video.get('image')
@@ -78,8 +70,7 @@ class HHUIE(InfoExtractor):
             # This will likely work but better warn.
             file_id = self._html_search_regex(
                 r"{ file: '\/movies\/(.+?)\/v_100\.mp4', label: '",
-                webpage, 'file_id'
-            )
+                webpage, 'file_id')
             formats = [
                 ({'url': format_url.format(file_id)})
                 for format_url in (
@@ -88,54 +79,43 @@ class HHUIE(InfoExtractor):
                     'https://mediathek.hhu.de/movies/{}/v_50.webm',
                     'https://mediathek.hhu.de/movies/{}/v_50.mp4',
                     'https://mediathek.hhu.de/movies/{}/v_100.webm',
-                    'https://mediathek.hhu.de/movies/{}/v_100.mp4',
-                )
-            ]
+                    'https://mediathek.hhu.de/movies/{}/v_100.mp4',)]
             title = thumbnail = None
         if not title:
             title = self._html_search_regex(
                 r'<h1 id="mt_watch-headline-title">\s+(.+?)\s+<\/h1>',
-                webpage, 'title'
-            )
+                webpage, 'title')
         if not title:
             title = self._og_search_title(webpage, fatal=False)
         description = self._html_search_regex(
             r'<p id="mt_watch-description" class="watch-description">\s+(.+?)\s+<\/p>',
-            webpage, 'description', fatal=False
-        )
+            webpage, 'description', fatal=False)
         if not description:
             description = self._og_search_description(webpage, default='')
         if not thumbnail:
             thumbnail = self._og_search_property(
-                'image:secure_url', webpage, 'thumbnail', fatal=False
-            )
+                'image:secure_url', webpage, 'thumbnail', fatal=False)
         uploader = self._html_search_regex(
             r'<a id="mt_content_placeholder_videoinfo_createdby" class="author" href=".+">(.+?)<\/a>',
-            webpage, 'uploader', fatal=False
-        )
+            webpage, 'uploader', fatal=False)
         uploader_id = self._html_search_regex(
             r'<a id="mt_content_placeholder_videoinfo_createdby" class="author" href="/user/(.+)">.+?<\/a>',
-            webpage, 'uploader_id', fatal=False
-        )
+            webpage, 'uploader_id', fatal=False)
         # CC licenses get a image with an appropriate alt text
         license_img = get_element_by_id('mt_watch_license', webpage)
         if license_img:
             license = self._search_regex(
-                r'alt="(.+)"', license_img, 'license_img', fatal=False
-            )
+                r'alt="(.+)"', license_img, 'license_img', fatal=False)
         if not license_img or not license:
             # other licenses are just text
             license = self._html_search_regex(
                 r'<div id="mt_content_placeholder_videotabs_mt_videotabs_formview_video_license" class="video-license">(.+)<\/div>',
-                webpage, 'license_text', fatal=False
-            )
+                webpage, 'license_text', fatal=False)
         upload_date = _date(self._html_search_regex(
             r'<span class="watch-information-date added">(.+?)<\/span>',
-            webpage, 'upload_date', fatal=False
-        ))
+            webpage, 'upload_date', fatal=False))
         category = self._html_search_regex(
-            r'<a href="/category/.+">(.+)</a>', webpage, 'category', fatal=False
-        )
+            r'<a href="/category/.+">(.+)</a>', webpage, 'category', fatal=False)
         tags_html = get_element_by_id('mt_watch_info_tag_list', webpage)
         tags = _tags(tags_html)
 
@@ -150,8 +130,7 @@ class HHUIE(InfoExtractor):
             'uploader_id': uploader_id,
             'upload_date': upload_date,
             'thumbnail': thumbnail,
-            'formats': formats,
-        }
+            'formats': formats, }
 
 
 def _date(str_containing_date):

From dada9f6db9427f0e8738ecf6551a9a0fc865bbb0 Mon Sep 17 00:00:00 2001
From: Niklas Sombert <niklas@ytvwld.de>
Date: Wed, 9 Oct 2019 21:46:05 +0200
Subject: [PATCH 5/6] [common] _parse_jwplayer_formats: Accept more labels as
 formats

Some labels are of the form 'low quality (320p)'.
This commit changes the regex so,
that the whole label is searched for the number, not just the beginning.
---
 youtube_dl/extractor/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 859786617..4d9ee81ab 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2750,7 +2750,7 @@ class InfoExtractor(object):
                     # Often no height is provided but there is a label in
                     # format like "1080p", "720p SD", or 1080.
                     height = int_or_none(self._search_regex(
-                        r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
+                        r'(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
                         'height', default=None))
                 a_format = {
                     'url': source_url,

From 4d52506dbefbd4dafd90689fcce820f522bad983 Mon Sep 17 00:00:00 2001
From: Niklas Sombert <niklas@ytvwld.de>
Date: Wed, 9 Oct 2019 21:39:40 +0200
Subject: [PATCH 6/6] [hhu] Use _parse_jwplayer_data

---
 youtube_dl/extractor/hhu.py | 92 ++++++++++++++-----------------------
 1 file changed, 35 insertions(+), 57 deletions(-)

diff --git a/youtube_dl/extractor/hhu.py b/youtube_dl/extractor/hhu.py
index a23ae7d96..13eaca6fc 100644
--- a/youtube_dl/extractor/hhu.py
+++ b/youtube_dl/extractor/hhu.py
@@ -3,10 +3,9 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
-    js_to_json, RegexNotFoundError, urljoin, get_element_by_id, unified_strdate
+    js_to_json, RegexNotFoundError, get_element_by_id, unified_strdate
 )
 
-import json
 import re
 
 
@@ -49,88 +48,67 @@ class HHUIE(InfoExtractor):
                 config_js = (
                     config_js[:encode_begin] + config_js[encode_end + 2:])
                 del encode_begin, encode_end
-            config = json.loads(js_to_json(config_js))
-            if len(config['playlist']) > 1:
-                self.report_warning(
-                    'more than one video, just taking the first one')
-            video = config['playlist'][0]
-            formats = [
-                {
-                    'url': urljoin('https://mediathek.hhu.de/', source['file']),
-                    'format_note': source.get('label'),
-                    'format_id': source['file'].split("/")[-1], }
-                for source in video['sources']]
-            formats.reverse()  # config sorts from highest to lowest quality
-            title = video.get('title')
-            thumbnail = video.get('image')
-            thumbnail = urljoin('https://mediathek.hhu.de/', thumbnail) if thumbnail else None
-
+            config = self._parse_json(
+                config_js, video_id, transform_source=js_to_json)
+            info = self._parse_jwplayer_data(
+                config, video_id, require_title=False,
+                base_url='https://mediathek.hhu.de/')
         except (RegexNotFoundError, ValueError):
             self.report_warning('failed to get player config, guessing formats')
             # This will likely work but better warn.
             file_id = self._html_search_regex(
                 r"{ file: '\/movies\/(.+?)\/v_100\.mp4', label: '",
                 webpage, 'file_id')
-            formats = [
-                ({'url': format_url.format(file_id)})
-                for format_url in (
-                    'https://mediathek.hhu.de/movies/{}/v_10.webm',
-                    'https://mediathek.hhu.de/movies/{}/v_10.mp4',
-                    'https://mediathek.hhu.de/movies/{}/v_50.webm',
-                    'https://mediathek.hhu.de/movies/{}/v_50.mp4',
-                    'https://mediathek.hhu.de/movies/{}/v_100.webm',
-                    'https://mediathek.hhu.de/movies/{}/v_100.mp4',)]
-            title = thumbnail = None
-        if not title:
-            title = self._html_search_regex(
+            info = {
+                'video_id': video_id,
+                'formats': [
+                    ({'url': format_url.format(file_id)})
+                    for format_url in (
+                        'https://mediathek.hhu.de/movies/{}/v_10.webm',
+                        'https://mediathek.hhu.de/movies/{}/v_10.mp4',
+                        'https://mediathek.hhu.de/movies/{}/v_50.webm',
+                        'https://mediathek.hhu.de/movies/{}/v_50.mp4',
+                        'https://mediathek.hhu.de/movies/{}/v_100.webm',
+                        'https://mediathek.hhu.de/movies/{}/v_100.mp4',)]}
+        if not info.get('title'):
+            info['title'] = self._html_search_regex(
                 r'<h1 id="mt_watch-headline-title">\s+(.+?)\s+<\/h1>',
                 webpage, 'title')
-        if not title:
-            title = self._og_search_title(webpage, fatal=False)
-        description = self._html_search_regex(
+        if not info.get('title'):
+            info['title'] = self._og_search_title(webpage, fatal=False)
+        info['description'] = self._html_search_regex(
             r'<p id="mt_watch-description" class="watch-description">\s+(.+?)\s+<\/p>',
             webpage, 'description', fatal=False)
-        if not description:
-            description = self._og_search_description(webpage, default='')
-        if not thumbnail:
-            thumbnail = self._og_search_property(
+        if not info.get('description'):
+            info['description'] = self._og_search_description(webpage, default='')
+        if not info.get('thumbnail'):
+            info['thumbnail'] = self._og_search_property(
                 'image:secure_url', webpage, 'thumbnail', fatal=False)
-        uploader = self._html_search_regex(
+        info['uploader'] = self._html_search_regex(
             r'<a id="mt_content_placeholder_videoinfo_createdby" class="author" href=".+">(.+?)<\/a>',
             webpage, 'uploader', fatal=False)
-        uploader_id = self._html_search_regex(
+        info['uploader_id'] = self._html_search_regex(
             r'<a id="mt_content_placeholder_videoinfo_createdby" class="author" href="/user/(.+)">.+?<\/a>',
             webpage, 'uploader_id', fatal=False)
         # CC licenses get a image with an appropriate alt text
         license_img = get_element_by_id('mt_watch_license', webpage)
         if license_img:
-            license = self._search_regex(
+            info['license'] = self._search_regex(
                 r'alt="(.+)"', license_img, 'license_img', fatal=False)
-        if not license_img or not license:
+        if not license_img or not info.get('license'):
             # other licenses are just text
-            license = self._html_search_regex(
+            info['license'] = self._html_search_regex(
                 r'<div id="mt_content_placeholder_videotabs_mt_videotabs_formview_video_license" class="video-license">(.+)<\/div>',
                 webpage, 'license_text', fatal=False)
-        upload_date = _date(self._html_search_regex(
+        info['upload_date'] = _date(self._html_search_regex(
             r'<span class="watch-information-date added">(.+?)<\/span>',
             webpage, 'upload_date', fatal=False))
         category = self._html_search_regex(
             r'<a href="/category/.+">(.+)</a>', webpage, 'category', fatal=False)
+        info['categories'] = [category]  # there's just one category per video
         tags_html = get_element_by_id('mt_watch_info_tag_list', webpage)
-        tags = _tags(tags_html)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'license': license,
-            'categories': [category],  # there's just one category per video
-            'tags': tags,
-            'uploader': uploader,
-            'uploader_id': uploader_id,
-            'upload_date': upload_date,
-            'thumbnail': thumbnail,
-            'formats': formats, }
+        info['tags'] = _tags(tags_html)
+        return info
 
 
 def _date(str_containing_date):