From 471f398772782ff73542c7fa1490c9ba31eed884 Mon Sep 17 00:00:00 2001
From: rr- <rr-@sakuya.pl>
Date: Wed, 30 Aug 2017 12:01:19 +0200
Subject: [PATCH] [bandcamp] Extract more metadata

---
 test/helper.py                   |  2 +-
 youtube_dl/extractor/bandcamp.py | 80 +++++++++++++++++++++++++-------
 2 files changed, 65 insertions(+), 17 deletions(-)

diff --git a/test/helper.py b/test/helper.py
index dfee217a9..87f4f7612 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -186,7 +186,7 @@ def expect_info_dict(self, got_dict, expected_dict):
     # Are checkable fields missing from the test case definition?
     test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
                           for key, value in got_dict.items()
-                          if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location', 'age_limit'))
+                          if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location', 'age_limit', 'album', 'artist', 'track', 'track_number', 'release_year', 'release_date'))
     missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
     if missing_keys:
         def _repr(v):
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index be41bd5a2..a73cb48f7 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -30,8 +30,11 @@ class BandcampIE(InfoExtractor):
         'info_dict': {
             'id': '1812978515',
             'ext': 'mp3',
-            'title': "youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
+            'track': "youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
+            'title': "youtube-dl  \\ - youtube-dl  \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
             'duration': 9.8485,
+            'uploader': 'youtube-dl  \\',
+            'artist': 'youtube-dl  \\',
         },
         '_skip': 'There is a limit of 200 free downloads / month for the test song'
     }, {
@@ -40,8 +43,29 @@ class BandcampIE(InfoExtractor):
         'info_dict': {
             'id': '2650410135',
             'ext': 'aiff',
-            'title': 'Ben Prunty - Lanius (Battle)',
+            'album': 'FTL: Advanced Edition Soundtrack',
+            'artist': 'Ben Prunty',
             'uploader': 'Ben Prunty',
+            'release_date': '20140403',
+            'release_year': 2014,
+            'track_number': 1,
+            'track': 'Lanius (Battle)',
+            'title': 'Ben Prunty - Lanius (Battle)',
+        },
+    }, {
+        'url': 'https://billbaxter.bandcamp.com/track/drone-city-pt-3-3',
+        'md5': 'e8e24365cb38ff841b4e5df014f988ed',
+        'info_dict': {
+            'id': '3755531036',
+            'ext': 'mp3',
+            'album': 'Drone City',
+            'artist': 'The ambient drones of Bill Baxter',
+            'uploader': 'The ambient drones of Bill Baxter',
+            'release_date': '20160326',
+            'release_year': 2016,
+            'track_number': 3,
+            'track': 'Drone City, Pt. 3',
+            'title': 'The ambient drones of Bill Baxter - Drone City, Pt. 3',
         },
     }]
 
@@ -51,11 +75,25 @@ class BandcampIE(InfoExtractor):
         webpage = self._download_webpage(url, title)
         thumbnail = self._html_search_meta('og:image', webpage, default=None)
         m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
+        m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
+        json_code = m_trackinfo.group(1) if m_trackinfo else None
+        data = json.loads(json_code)[0] if json_code else None
+
+        match = re.search(r'album_title\s*:\s*"([^"]+)"', webpage)
+        album_title = match.group(1) if match else None
+
+        match = re.search(r'artist\s*:\s*"([^"]+)"', webpage)
+        artist = match.group(1) if match else None
+
+        match = re.search(r'album_release_date\s*:\s*"([^"]+)"', webpage)
+        release_date = unified_strdate(match.group(1)) if match else None
+        release_year = int(release_date[0:4]) if release_date else None
+
+        track = data['title'] if data else None
+        title = '%s - %s' % (artist, track) if artist else track
+
         if not m_download:
-            m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
-            if m_trackinfo:
-                json_code = m_trackinfo.group(1)
-                data = json.loads(json_code)[0]
+            if data:
                 track_id = compat_str(data['id'])
 
                 if not data.get('file'):
@@ -77,7 +115,15 @@ class BandcampIE(InfoExtractor):
 
                 return {
                     'id': track_id,
-                    'title': data['title'],
+                    'album': album_title,
+                    'uploader': artist,
+                    'artist': artist,
+                    'track_id': track_id,
+                    'track_number': data.get('track_num'),
+                    'release_date': release_date,
+                    'release_year': release_year,
+                    'track': track,
+                    'title': title,
                     'thumbnail': thumbnail,
                     'formats': formats,
                     'duration': float_or_none(data.get('duration')),
@@ -99,13 +145,9 @@ class BandcampIE(InfoExtractor):
                 'blob', group='blob'),
             video_id, transform_source=unescapeHTML)
 
-        info = blob['digital_items'][0]
+        digital_items = blob['digital_items'][0]
 
-        downloads = info['downloads']
-        track = info['title']
-
-        artist = info.get('artist')
-        title = '%s - %s' % (artist, track) if artist else track
+        downloads = digital_items['downloads']
 
         download_formats = {}
         for f in blob['download_formats']:
@@ -146,10 +188,16 @@ class BandcampIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'title': title,
-            'thumbnail': info.get('thumb_url') or thumbnail,
-            'uploader': info.get('artist'),
+            'album': album_title,
+            'uploader': artist,
             'artist': artist,
+            'track_id': video_id,
+            'track_number': data.get('track_num'),
+            'release_date': release_date,
+            'release_year': release_year,
+            'track': track,
+            'title': title,
+            'thumbnail': digital_items.get('thumb_url') or thumbnail,
             'track': track,
             'formats': formats,
         }