From a7ca0f930355782f60f0b75fa05c09e90814d1b0 Mon Sep 17 00:00:00 2001
From: rubyist <rubyist.personal@openmailbox.org>
Date: Thu, 27 Feb 2020 18:52:48 -0800
Subject: [PATCH 1/5] Add initial extractor for Matter Online

---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/matter.py     | 48 ++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100644 youtube_dl/extractor/matter.py
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 64d1fa251..8fd10bed8 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -601,6 +601,7 @@ from .markiza import (
 )
 from .massengeschmacktv import MassengeschmackTVIE
 from .matchtv import MatchTVIE
+from .matter import MatterIE
 from .mdr import MDRIE
 from .mediaset import MediasetIE
 from .mediasite import (
diff --git a/youtube_dl/extractor/matter.py b/youtube_dl/extractor/matter.py
new file mode 100644
index 000000000..ec427625b
--- /dev/null
+++ b/youtube_dl/extractor/matter.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MatterIE(InfoExtractor):
+    """
+    InfoExtractor for Matter Music
+
+    This class should be used to handle tracks. Another class (TODO) will be
+    used to implement playlists or other content.
+    """
+    _VALID_URL = r'https?://app.matter.online/tracks/(?P<id>\d+)/?'
+    _TESTS = {
+        # TODO: Implement
+    }
+
+    def _real_extract(self, url):
+        track_id = self._match_id(url)
+        webpage = self._download_webpage(
+            "https://api.matter.online/api/v1/open-graph/tracks/%s/embedded" % track_id, track_id
+        )
+
+        author = self._html_search_regex(
+            r'<a href="https://app.matter.online/artists/user_\d+" target="[^"]+">([^<]+)</a>',
+            webpage, "author"
+        )
+        title = self._html_search_regex(
+            r'<a href="https://app.matter.online/tracks/\d+" target="[^"]+">([^<]+)</a>',
+            webpage, "title"
+        )
+        download_url = self._html_search_regex(
+            r'<source src="(https://matter-production.s3.amazonaws.com/audios/[^\.]+\.[^"]+)"/>',
+            webpage, "download_url"
+        )
+        artwork = self._html_search_regex(
+            r'style="background: url\((https://matter-production.s3.amazonaws.com/images/[^\.]+\.[^\)]+)\)',
+            webpage, "artwork"
+        )
+
+        return {
+            'id': track_id,
+            'url': download_url,
+            'title': title,
+            'uploader': author,
+            'thumbnail': artwork,
+        }

From 8c5c97a0d36e37d552c56ff03cda0fea2651e5d7 Mon Sep 17 00:00:00 2001
From: rubyist <rubyist.personal@openmailbox.org>
Date: Thu, 27 Feb 2020 19:06:16 -0800
Subject: [PATCH 2/5] Be a little less specific about what an artist username
 looks like

---
 youtube_dl/extractor/matter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/matter.py b/youtube_dl/extractor/matter.py
index ec427625b..1d2db7451 100644
--- a/youtube_dl/extractor/matter.py
+++ b/youtube_dl/extractor/matter.py
@@ -23,7 +23,7 @@ class MatterIE(InfoExtractor):
         )
 
         author = self._html_search_regex(
-            r'<a href="https://app.matter.online/artists/user_\d+" target="[^"]+">([^<]+)</a>',
+            r'<a href="https://app.matter.online/artists/[^"]+" target="[^"]+">([^<]+)</a>',
             webpage, "author"
         )
         title = self._html_search_regex(

From b5879f6e4466a8573ce58ffc39d3f7b541d9c98a Mon Sep 17 00:00:00 2001
From: rubyist <rubyist.personal@openmailbox.org>
Date: Thu, 27 Feb 2020 19:10:31 -0800
Subject: [PATCH 3/5] Don't use _html_search_regex when there's no html to
 filter out

---
 youtube_dl/extractor/matter.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/youtube_dl/extractor/matter.py b/youtube_dl/extractor/matter.py
index 1d2db7451..fb47bbff0 100644
--- a/youtube_dl/extractor/matter.py
+++ b/youtube_dl/extractor/matter.py
@@ -22,19 +22,19 @@ class MatterIE(InfoExtractor):
             "https://api.matter.online/api/v1/open-graph/tracks/%s/embedded" % track_id, track_id
         )
 
-        author = self._html_search_regex(
+        author = self._search_regex(
             r'<a href="https://app.matter.online/artists/[^"]+" target="[^"]+">([^<]+)</a>',
             webpage, "author"
         )
-        title = self._html_search_regex(
+        title = self._search_regex(
             r'<a href="https://app.matter.online/tracks/\d+" target="[^"]+">([^<]+)</a>',
             webpage, "title"
         )
-        download_url = self._html_search_regex(
+        download_url = self._search_regex(
             r'<source src="(https://matter-production.s3.amazonaws.com/audios/[^\.]+\.[^"]+)"/>',
             webpage, "download_url"
         )
-        artwork = self._html_search_regex(
+        artwork = self._search_regex(
             r'style="background: url\((https://matter-production.s3.amazonaws.com/images/[^\.]+\.[^\)]+)\)',
             webpage, "artwork"
         )

From c1020cf113592b26057ef10dae245f2f93b08a59 Mon Sep 17 00:00:00 2001
From: rubyist <rubyist.personal@openmailbox.org>
Date: Thu, 27 Feb 2020 19:42:51 -0800
Subject: [PATCH 4/5] Added tests for Matter extractor

---
 youtube_dl/extractor/matter.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/matter.py b/youtube_dl/extractor/matter.py
index fb47bbff0..a0e96d278 100644
--- a/youtube_dl/extractor/matter.py
+++ b/youtube_dl/extractor/matter.py
@@ -12,9 +12,23 @@ class MatterIE(InfoExtractor):
     used to implement playlists or other content.
     """
     _VALID_URL = r'https?://app.matter.online/tracks/(?P<id>\d+)/?'
-    _TESTS = {
-        # TODO: Implement
-    }
+    _TESTS = [{
+        'url': 'https://app.matter.online/tracks/12866',
+        'info_dict': {
+            'id': '12866',
+            'ext': 'mp3',
+            'title': 'Beautiful type beat',
+            'uploader': 'internet user',
+        },
+    }, {
+        'url': 'https://app.matter.online/tracks/18891',
+        'info_dict': {
+            'id': '18891',
+            'ext': 'mp3',
+            'title': 'starstruck',
+            'uploader': 'iwi.',
+        }
+    }]
 
     def _real_extract(self, url):
         track_id = self._match_id(url)

From 19ba4ec21833fccd3bb6f3b441a6afa24418a6ee Mon Sep 17 00:00:00 2001
From: rubyist <rubyist.personal@openmailbox.org>
Date: Mon, 2 Mar 2020 17:48:22 -0800
Subject: [PATCH 5/5] Remove long lines, relax reqs on optional fields, and
 simplify regexes.

---
 youtube_dl/extractor/matter.py | 41 +++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/youtube_dl/extractor/matter.py b/youtube_dl/extractor/matter.py
index a0e96d278..2295cafa5 100644
--- a/youtube_dl/extractor/matter.py
+++ b/youtube_dl/extractor/matter.py
@@ -32,31 +32,40 @@ class MatterIE(InfoExtractor):
 
     def _real_extract(self, url):
         track_id = self._match_id(url)
-        webpage = self._download_webpage(
-            "https://api.matter.online/api/v1/open-graph/tracks/%s/embedded" % track_id, track_id
-        )
 
-        author = self._search_regex(
-            r'<a href="https://app.matter.online/artists/[^"]+" target="[^"]+">([^<]+)</a>',
-            webpage, "author"
-        )
+        # Fetch page with metadata and download URLs.
+        api = "https://api.matter.online/api/v1/open-graph/tracks/%s/embedded"
+        webpage = self._download_webpage(api % track_id, track_id)
+
+        # Extract required fields
         title = self._search_regex(
-            r'<a href="https://app.matter.online/tracks/\d+" target="[^"]+">([^<]+)</a>',
+            r'tracks/\d+" target="[^"]+">([^<]+)</a>',
             webpage, "title"
         )
         download_url = self._search_regex(
-            r'<source src="(https://matter-production.s3.amazonaws.com/audios/[^\.]+\.[^"]+)"/>',
+            r'(https://[^/]+/audios/[^\.]+\.[^"]+)"/>',
             webpage, "download_url"
         )
-        artwork = self._search_regex(
-            r'style="background: url\((https://matter-production.s3.amazonaws.com/images/[^\.]+\.[^\)]+)\)',
-            webpage, "artwork"
-        )
 
-        return {
+        extracted = {
             'id': track_id,
             'url': download_url,
             'title': title,
-            'uploader': author,
-            'thumbnail': artwork,
         }
+
+        # Extract optional fields
+        author = self._search_regex(
+            r'artists/[^"]+" target="[^"]+">([^<]+)</a>',
+            webpage, "author", fatal=False
+        )
+        artwork = self._search_regex(
+            r'(https://[^/]+/images/[^\.]+\.[^\)]+)\)',
+            webpage, "artwork", fatal=False
+        )
+
+        if artwork:
+            extracted['thumbnail'] = artwork
+        if author:
+            extracted['uploader'] = author
+
+        return extracted