From 977e2988db1e167b83ca3b331ac3588e1f694433 Mon Sep 17 00:00:00 2001 From: Dimitrios Semitsoglou-Tsiapos Date: Thu, 20 Apr 2017 00:51:23 +0300 Subject: [PATCH 1/5] [piapro] initial support Closes #5856 --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/piapro.py | 57 ++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 youtube_dl/extractor/piapro.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4adcae1e5..e40381b85 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -850,6 +850,7 @@ from .picarto import ( PicartoIE, PicartoVodIE, ) +from .piapro import PiaproIE from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE diff --git a/youtube_dl/extractor/piapro.py b/youtube_dl/extractor/piapro.py new file mode 100644 index 000000000..79b5f4d8f --- /dev/null +++ b/youtube_dl/extractor/piapro.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from datetime import datetime + +from ..utils import get_element_by_class +from .common import InfoExtractor + +test_partial = { + 'md5': 'fe63bb94879189bd9ff7420d0b187352', + 'info_dict': { + 'artist': 'mothy_悪ノPさん', + 'description': '悪ノ娘のアレンジバージョンです。', + 'ext': 'mp3', + 'id': 'es7uj48x6bvcbtgy', + 'thumbnail': r're:https?://c1\.piapro\.jp/timg/nogoc3x8d4m0j416_20080819185021_0180_1440\.jpg', + 'timestamp': 1263600322, + 'title': '悪ノ娘~velvet mix~', + 'upload_date': '20100116', + 'uploader': 'mothy_悪ノPさん', + 'uploader_url': r're:https?://piapro\.jp/mothy', + } +} + + +class PiaproIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?piapro\.jp/(conten)?t/(?P[0-9a-zA-Z]+)' + _TESTS = [dict({'url': 'http://piapro.jp/t/KToM'}, **test_partial), + dict({'url': 'http://piapro.jp/content/es7uj48x6bvcbtgy'}, **test_partial)] + + def _real_extract(self, url): + url_id = self._match_id(url) + webpage = self._download_webpage(url, url_id) + + if re.search(r'/content/([0-9a-zA-Z]+)', url): + content_id = url_id + else: + content_id = self._search_regex(r'''contentId\s*:\s*['"]([0-9a-zA-Z]+?)['\"]''', webpage, 'content_id') + + create_date = self._search_regex(r'''createDate\s*:\s*['"]([0-9]{14})['"]''', webpage, 'create_date', fatal=False) or \ + self._search_regex(r'''["']https?://songle\.jp/songs/piapro\.jp.*([0-9]{14})['"]''', webpage, 'create_date') + + cls_userbar_name = get_element_by_class("userbar-name", webpage) + + uploader = self._search_regex(r'(.+?)', cls_userbar_name, 'uploader', fatal=False) + return { + 'artist': uploader, + 'description': get_element_by_class("dtl_cap", webpage), + 'id': content_id, + 'thumbnail': self._search_regex(r'(https?://c1\.piapro\.jp/timg/.+?_1440\.jpg)', webpage, 'thumbnail', fatal=False), + 'timestamp': int(datetime.strptime(create_date, '%Y%m%d%H%M%S').strftime("%s")), + 'title': get_element_by_class("works-title", webpage) or self._html_search_regex(r'[^<]*「(.*?)」<', webpage, 'title', fatal=False), + 'uploader': uploader, + 'uploader_url': self._search_regex(r'<a\s+.*?href="(https?://piapro\.jp/.+?)"', cls_userbar_name, 'uploader_url', fatal=False), + 'url': 'http://c1.piapro.jp/amp3/{}_{}_audition.mp3'.format(content_id, create_date) + } From fb603656c9f4d8dd165d61c193a12844ccf9eba2 Mon Sep 17 00:00:00 2001 From: Dimitrios Semitsoglou-Tsiapos <kmhzsem@gmx.com> Date: Fri, 28 Apr 2017 21:16:30 +0300 Subject: [PATCH 2/5] [piapro] remove uploader name honorific --- youtube_dl/extractor/piapro.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/piapro.py b/youtube_dl/extractor/piapro.py index 79b5f4d8f..8b8a3aa5e 100644 --- a/youtube_dl/extractor/piapro.py +++ b/youtube_dl/extractor/piapro.py @@ -10,7 +10,7 @@ from .common import InfoExtractor test_partial = { 'md5': 'fe63bb94879189bd9ff7420d0b187352', 'info_dict': { - 'artist': 'mothy_悪ノPさん', + 'artist': 'mothy_悪ノP', 'description': '悪ノ娘のアレンジバージョンです。', 'ext': 'mp3', 'id': 'es7uj48x6bvcbtgy', @@ -18,7 +18,7 @@ test_partial = { 'timestamp': 1263600322, 'title': '悪ノ娘~velvet mix~', 'upload_date': '20100116', - 'uploader': 'mothy_悪ノPさん', + 'uploader': 'mothy_悪ノP', 'uploader_url': r're:https?://piapro\.jp/mothy', } } @@ -44,14 +44,18 @@ class PiaproIE(InfoExtractor): cls_userbar_name = get_element_by_class("userbar-name", webpage) uploader = self._search_regex(r'<a.*?>(.+?)</a>', cls_userbar_name, 'uploader', fatal=False) + try: + uploader_without_honorific = re.match('.+(?=さん)', uploader).group(0) + except IndexError: + uploader_without_honorific = None return { - 'artist': uploader, + 'artist': uploader_without_honorific or uploader, 'description': get_element_by_class("dtl_cap", webpage), 'id': content_id, 'thumbnail': self._search_regex(r'(https?://c1\.piapro\.jp/timg/.+?_1440\.jpg)', webpage, 'thumbnail', fatal=False), 'timestamp': int(datetime.strptime(create_date, '%Y%m%d%H%M%S').strftime("%s")), 'title': get_element_by_class("works-title", webpage) or self._html_search_regex(r'<title>[^<]*「(.*?)」<', webpage, 'title', fatal=False), - 'uploader': uploader, + 'uploader': uploader_without_honorific or uploader, 'uploader_url': self._search_regex(r'<a\s+.*?href="(https?://piapro\.jp/.+?)"', cls_userbar_name, 'uploader_url', fatal=False), 'url': 'http://c1.piapro.jp/amp3/{}_{}_audition.mp3'.format(content_id, create_date) } From 2a9492b5bf88d84d4ab200acbb8690c8909e01f6 Mon Sep 17 00:00:00 2001 From: Dimitrios Semitsoglou-Tsiapos <kmhzsem@gmx.com> Date: Sun, 20 Jan 2019 00:00:13 +0100 Subject: [PATCH 3/5] [piapro] update for website changes Temporarily drops `uploader_url` extraction --- youtube_dl/extractor/piapro.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/piapro.py b/youtube_dl/extractor/piapro.py index 8b8a3aa5e..7ee7253bf 100644 --- a/youtube_dl/extractor/piapro.py +++ b/youtube_dl/extractor/piapro.py @@ -20,6 +20,7 @@ test_partial = { 'upload_date': '20100116', 'uploader': 'mothy_悪ノP', 'uploader_url': r're:https?://piapro\.jp/mothy', + 'url': 'https://cdn.piapro.jp/mp3_a/es/es7uj48x6bvcbtgy_20100116020522_audition.mp3', } } @@ -41,21 +42,20 @@ class PiaproIE(InfoExtractor): create_date = self._search_regex(r'''createDate\s*:\s*['"]([0-9]{14})['"]''', webpage, 'create_date', fatal=False) or \ self._search_regex(r'''["']https?://songle\.jp/songs/piapro\.jp.*([0-9]{14})['"]''', webpage, 'create_date') - cls_userbar_name = get_element_by_class("userbar-name", webpage) - - uploader = self._search_regex(r'<a.*?>(.+?)</a>', cls_userbar_name, 'uploader', fatal=False) + uploader = get_element_by_class("cd_user-name", webpage) try: uploader_without_honorific = re.match('.+(?=さん)', uploader).group(0) except IndexError: uploader_without_honorific = None + return { 'artist': uploader_without_honorific or uploader, - 'description': get_element_by_class("dtl_cap", webpage), + 'description': get_element_by_class("cd_dtl_cap", webpage), 'id': content_id, 'thumbnail': self._search_regex(r'(https?://c1\.piapro\.jp/timg/.+?_1440\.jpg)', webpage, 'thumbnail', fatal=False), 'timestamp': int(datetime.strptime(create_date, '%Y%m%d%H%M%S').strftime("%s")), 'title': get_element_by_class("works-title", webpage) or self._html_search_regex(r'<title>[^<]*「(.*?)」<', webpage, 'title', fatal=False), 'uploader': uploader_without_honorific or uploader, - 'uploader_url': self._search_regex(r'<a\s+.*?href="(https?://piapro\.jp/.+?)"', cls_userbar_name, 'uploader_url', fatal=False), - 'url': 'http://c1.piapro.jp/amp3/{}_{}_audition.mp3'.format(content_id, create_date) + # 'uploader_url': self._search_regex(r'<a\s+.*?href="(https?://piapro\.jp/.+?)"', cls_userbar_name, 'uploader_url', fatal=False), # FIXME + 'url': 'http://cdn.piapro.jp/mp3_a/{}/{}_{}_audition.mp3'.format(content_id[:2], content_id, create_date) } From cedd5da48c1b61c2bc82ac6837f220ed14c21ebf Mon Sep 17 00:00:00 2001 From: Dimitrios Semitsoglou-Tsiapos <kmhzsem@gmx.com> Date: Sun, 27 Jan 2019 17:43:05 +0100 Subject: [PATCH 4/5] piapro: suppoert uploader name without honorific --- youtube_dl/extractor/piapro.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/piapro.py b/youtube_dl/extractor/piapro.py index 7ee7253bf..85f1b4efb 100644 --- a/youtube_dl/extractor/piapro.py +++ b/youtube_dl/extractor/piapro.py @@ -47,6 +47,8 @@ class PiaproIE(InfoExtractor): uploader_without_honorific = re.match('.+(?=さん)', uploader).group(0) except IndexError: uploader_without_honorific = None + except AttributeError: + uploader_without_honorific = uploader return { 'artist': uploader_without_honorific or uploader, From 33b655e19d40e1c17ab3485fa9bd298aac556ef1 Mon Sep 17 00:00:00 2001 From: Dimitrios Semitsoglou-Tsiapos <kmhzsem@gmx.com> Date: Mon, 28 Jan 2019 19:26:26 +0100 Subject: [PATCH 5/5] OS-portable timestamp calculation --- youtube_dl/extractor/piapro.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/piapro.py b/youtube_dl/extractor/piapro.py index 85f1b4efb..e28fcc84c 100644 --- a/youtube_dl/extractor/piapro.py +++ b/youtube_dl/extractor/piapro.py @@ -39,8 +39,10 @@ class PiaproIE(InfoExtractor): else: content_id = self._search_regex(r'''contentId\s*:\s*['"]([0-9a-zA-Z]+?)['\"]''', webpage, 'content_id') - create_date = self._search_regex(r'''createDate\s*:\s*['"]([0-9]{14})['"]''', webpage, 'create_date', fatal=False) or \ + create_date_str = self._search_regex(r'''createDate\s*:\s*['"]([0-9]{14})['"]''', webpage, 'create_date', fatal=False) or \ self._search_regex(r'''["']https?://songle\.jp/songs/piapro\.jp.*([0-9]{14})['"]''', webpage, 'create_date') + create_date = datetime.strptime(create_date_str, '%Y%m%d%H%M%S') + create_date_unix = (create_date - datetime(1970,1,1)).total_seconds() uploader = get_element_by_class("cd_user-name", webpage) try: @@ -55,9 +57,9 @@ class PiaproIE(InfoExtractor): 'description': get_element_by_class("cd_dtl_cap", webpage), 'id': content_id, 'thumbnail': self._search_regex(r'(https?://c1\.piapro\.jp/timg/.+?_1440\.jpg)', webpage, 'thumbnail', fatal=False), - 'timestamp': int(datetime.strptime(create_date, '%Y%m%d%H%M%S').strftime("%s")), + 'timestamp': create_date_unix, 'title': get_element_by_class("works-title", webpage) or self._html_search_regex(r'<title>[^<]*「(.*?)」<', webpage, 'title', fatal=False), 'uploader': uploader_without_honorific or uploader, # 'uploader_url': self._search_regex(r'<a\s+.*?href="(https?://piapro\.jp/.+?)"', cls_userbar_name, 'uploader_url', fatal=False), # FIXME - 'url': 'http://cdn.piapro.jp/mp3_a/{}/{}_{}_audition.mp3'.format(content_id[:2], content_id, create_date) + 'url': 'http://cdn.piapro.jp/mp3_a/{}/{}_{}_audition.mp3'.format(content_id[:2], content_id, create_date_str) }