From 034026359e95caf0a54acf4bc6a22037a36a6ff0 Mon Sep 17 00:00:00 2001 From: Parth Verma Date: Sun, 29 Apr 2018 23:19:14 +0530 Subject: [PATCH 01/11] [peertube] Added extractor --- youtube_dl/extractor/extractors.py | 3 ++- youtube_dl/extractor/peertube.py | 35 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/peertube.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6fb65e4fe..0dc0b607a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -805,6 +805,7 @@ from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE from .pearvideo import PearVideoIE +from .peertube import PeertubeIE from .people import PeopleIE from .performgroup import PerformGroupIE from .periscope import ( @@ -1332,7 +1333,7 @@ from .webofstories import ( WebOfStoriesPlaylistIE, ) from .weibo import ( - WeiboIE, + WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py new file mode 100644 index 000000000..6b137d91b --- /dev/null +++ b/youtube_dl/extractor/peertube.py @@ -0,0 +1,35 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class PeertubeIE(InfoExtractor): + _BASE_VIDEO_URL = 'https://peertube.touhoppai.moe/static/webseed/%s-1080.mp4' + _BASE_THUMBNAIL_URL = 'https://peertube.touhoppai.moe/static/previews/%s.jpg' + IE_DESC = 'Peertube Videos' + IE_NAME = 'Peertube' + _VALID_URL = r'https?:\/\/peertube\.touhoppai\.moe\/videos\/watch\/(?P[0-9|\-|a-z]+)' + _TEST = { + 'url': 'https://peertube.touhoppai.moe/videos/watch/7f3421ae-6161-4a4a-ae38-d167aec51683', + 'md5': 'a5e1e4a978e6b789553198d1739f5643', + 'info_dict': { + 'id': '7f3421ae-6161-4a4a-ae38-d167aec51683', + 'ext': 'mp4', + 'title': 'David Revoy Live Stream: Speedpainting', + 'description': 'md5:5c09a6e3fdb5f56edce289d69fbe7567', + 'thumbnail': 'https://peertube.touhoppai.moe/static/previews/7f3421ae-6161-4a4a-ae38-d167aec51683.jpg', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + return { + 'id': video_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'url': self._BASE_VIDEO_URL % video_id, + 'thumbnail': self._BASE_THUMBNAIL_URL % video_id + } From 2d8fc9c5375732a4ee0d761543cc7faa134b6d16 Mon Sep 17 00:00:00 2001 From: Parth Verma Date: Mon, 30 Apr 2018 22:30:34 +0530 Subject: [PATCH 02/11] Integrated peertube api for peertube --- youtube_dl/extractor/peertube.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 6b137d91b..e2da11012 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -1,35 +1,35 @@ # coding: utf-8 from __future__ import unicode_literals +from ..compat import compat_urlparse from .common import InfoExtractor class PeertubeIE(InfoExtractor): - _BASE_VIDEO_URL = 'https://peertube.touhoppai.moe/static/webseed/%s-1080.mp4' - _BASE_THUMBNAIL_URL = 'https://peertube.touhoppai.moe/static/previews/%s.jpg' IE_DESC = 'Peertube Videos' IE_NAME = 'Peertube' _VALID_URL = r'https?:\/\/peertube\.touhoppai\.moe\/videos\/watch\/(?P[0-9|\-|a-z]+)' _TEST = { 'url': 'https://peertube.touhoppai.moe/videos/watch/7f3421ae-6161-4a4a-ae38-d167aec51683', - 'md5': 'a5e1e4a978e6b789553198d1739f5643', + 'md5': '051ef9823d237416d5a6fc0bd8d67812', 'info_dict': { 'id': '7f3421ae-6161-4a4a-ae38-d167aec51683', 'ext': 'mp4', 'title': 'David Revoy Live Stream: Speedpainting', - 'description': 'md5:5c09a6e3fdb5f56edce289d69fbe7567', - 'thumbnail': 'https://peertube.touhoppai.moe/static/previews/7f3421ae-6161-4a4a-ae38-d167aec51683.jpg', + 'description': 'md5:4e67c2fec55739a2ccb86052505a741e', + 'thumbnail': 'https://peertube.touhoppai.moe/static/thumbnails/7f3421ae-6161-4a4a-ae38-d167aec51683.jpg', } } def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) + url_data = compat_urlparse.urlparse(url) + api_url = "%s://%s/api/v1/videos/%s" % (url_data.scheme, url_data.hostname, video_id) + details = self._download_json(api_url, video_id) return { 'id': video_id, - 'title': title, - 'description': self._og_search_description(webpage), - 'url': self._BASE_VIDEO_URL % video_id, - 'thumbnail': self._BASE_THUMBNAIL_URL % video_id + 'title': details['name'], + 'description': details['description'], + 'url': details['files'][-1]['fileUrl'], + 'thumbnail': url_data.scheme + '://' + url_data.hostname + details['thumbnailPath'] } From b191967b76109c9cd0064cd489892250873befd9 Mon Sep 17 00:00:00 2001 From: Parth Verma Date: Tue, 1 May 2018 01:15:39 +0530 Subject: [PATCH 03/11] Used urljoin for url comprehension --- youtube_dl/extractor/peertube.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index e2da11012..15f32bb7c 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from ..compat import compat_urlparse +from ..utils import urljoin from .common import InfoExtractor @@ -24,12 +25,13 @@ class PeertubeIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) url_data = compat_urlparse.urlparse(url) - api_url = "%s://%s/api/v1/videos/%s" % (url_data.scheme, url_data.hostname, video_id) + base_url = "%s://%s" % (url_data.scheme, url_data.hostname) + api_url = urljoin(urljoin(base_url, "/api/v1/videos/"), video_id) details = self._download_json(api_url, video_id) return { 'id': video_id, - 'title': details['name'], - 'description': details['description'], + 'title': details.get('name'), + 'description': details.get('description'), 'url': details['files'][-1]['fileUrl'], - 'thumbnail': url_data.scheme + '://' + url_data.hostname + details['thumbnailPath'] + 'thumbnail': urljoin(base_url, details['thumbnailPath']) } From 45c7e97d6eae5da4cd81b1ef4bb2d466c69a86c5 Mon Sep 17 00:00:00 2001 From: Parth Verma Date: Tue, 1 May 2018 01:23:11 +0530 Subject: [PATCH 04/11] Added multiple formats for videos --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/peertube.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0dc0b607a..b5e349c0e 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1333,7 +1333,7 @@ from .webofstories import ( WebOfStoriesPlaylistIE, ) from .weibo import ( - WeiboIE, + WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 15f32bb7c..4ce24dd8b 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -12,7 +12,7 @@ class PeertubeIE(InfoExtractor): _VALID_URL = r'https?:\/\/peertube\.touhoppai\.moe\/videos\/watch\/(?P[0-9|\-|a-z]+)' _TEST = { 'url': 'https://peertube.touhoppai.moe/videos/watch/7f3421ae-6161-4a4a-ae38-d167aec51683', - 'md5': '051ef9823d237416d5a6fc0bd8d67812', + 'md5': 'a5e1e4a978e6b789553198d1739f5643', 'info_dict': { 'id': '7f3421ae-6161-4a4a-ae38-d167aec51683', 'ext': 'mp4', @@ -32,6 +32,6 @@ class PeertubeIE(InfoExtractor): 'id': video_id, 'title': details.get('name'), 'description': details.get('description'), - 'url': details['files'][-1]['fileUrl'], + 'formats': [{'url': file_data['fileUrl'], 'filesize': file_data['size']} for file_data in sorted(details['files'], key=lambda x: x['size'])], 'thumbnail': urljoin(base_url, details['thumbnailPath']) } From 06ad3fee3a66808ab283967efaa5b18a0e7bf8fe Mon Sep 17 00:00:00 2001 From: Parth Verma Date: Tue, 1 May 2018 15:45:16 +0530 Subject: [PATCH 05/11] Fixed regex for valid url capture group --- youtube_dl/extractor/peertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 4ce24dd8b..995c57526 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -9,7 +9,7 @@ from .common import InfoExtractor class PeertubeIE(InfoExtractor): IE_DESC = 'Peertube Videos' IE_NAME = 'Peertube' - _VALID_URL = r'https?:\/\/peertube\.touhoppai\.moe\/videos\/watch\/(?P[0-9|\-|a-z]+)' + _VALID_URL = r'https?:\/\/peertube\.touhoppai\.moe\/videos\/watch\/(?P[0-9|a-z]{8}-[0-9|a-z]{4}-[0-9|a-z]{4}-[0-9|a-z]{4}-[0-9|a-z]{12})' _TEST = { 'url': 'https://peertube.touhoppai.moe/videos/watch/7f3421ae-6161-4a4a-ae38-d167aec51683', 'md5': 'a5e1e4a978e6b789553198d1739f5643', From 6655bbcc13e8b44c2c4cc62030ba421bc98f047f Mon Sep 17 00:00:00 2001 From: Parth Verma Date: Tue, 1 May 2018 15:46:04 +0530 Subject: [PATCH 06/11] made filesize field optional --- youtube_dl/extractor/peertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 995c57526..382d0c20b 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -32,6 +32,6 @@ class PeertubeIE(InfoExtractor): 'id': video_id, 'title': details.get('name'), 'description': details.get('description'), - 'formats': [{'url': file_data['fileUrl'], 'filesize': file_data['size']} for file_data in sorted(details['files'], key=lambda x: x['size'])], + 'formats': [{'url': file_data['fileUrl'], 'filesize': file_data.get('size')} for file_data in sorted(details['files'], key=lambda x: x['size'])], 'thumbnail': urljoin(base_url, details['thumbnailPath']) } From 115bdf067afad610decb76bad3f05a0e3106f513 Mon Sep 17 00:00:00 2001 From: Parth Verma Date: Thu, 3 May 2018 22:39:40 +0530 Subject: [PATCH 07/11] Made requested changes --- youtube_dl/extractor/peertube.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 382d0c20b..cb72c5aab 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -9,7 +9,7 @@ from .common import InfoExtractor class PeertubeIE(InfoExtractor): IE_DESC = 'Peertube Videos' IE_NAME = 'Peertube' - _VALID_URL = r'https?:\/\/peertube\.touhoppai\.moe\/videos\/watch\/(?P[0-9|a-z]{8}-[0-9|a-z]{4}-[0-9|a-z]{4}-[0-9|a-z]{4}-[0-9|a-z]{12})' + _VALID_URL = r'(?:https?:)//peertube\.touhoppai\.moe\/videos\/watch\/(?P[0-9|a-z]{8}-[0-9|a-z]{4}-[0-9|a-z]{4}-[0-9|a-z]{4}-[0-9|a-z]{12})' _TEST = { 'url': 'https://peertube.touhoppai.moe/videos/watch/7f3421ae-6161-4a4a-ae38-d167aec51683', 'md5': 'a5e1e4a978e6b789553198d1739f5643', @@ -26,12 +26,22 @@ class PeertubeIE(InfoExtractor): video_id = self._match_id(url) url_data = compat_urlparse.urlparse(url) base_url = "%s://%s" % (url_data.scheme, url_data.hostname) - api_url = urljoin(urljoin(base_url, "/api/v1/videos/"), video_id) + api_url = urljoin(base_url, "/api/v1/videos/%s" % video_id) details = self._download_json(api_url, video_id) + formats = [{'url': file_data['fileUrl'], 'filesize': file_data.get('size'), 'format': file_data.get('resolution', {}).get('label')} for file_data in details['files']] + self._sort_formats(formats) return { 'id': video_id, - 'title': details.get('name'), + 'title': details['name'], 'description': details.get('description'), - 'formats': [{'url': file_data['fileUrl'], 'filesize': file_data.get('size')} for file_data in sorted(details['files'], key=lambda x: x['size'])], - 'thumbnail': urljoin(base_url, details['thumbnailPath']) + 'formats': formats, + 'thumbnail': urljoin(base_url, details['thumbnailPath']) if 'thumbnailPath' in details else None, + 'uploader': details.get('account', {}).get('name'), + 'uploader_id': details.get('account', {}).get('id'), + 'uploder_url': details.get('account', {}).get('url'), + 'duration': details.get('duration'), + 'view_count': details.get('views'), + 'like_count': details.get('likes'), + 'dislike_count': details.get('dislikes'), + 'tags': details.get('tags') } From cc7f194d7ccbbafa165d7c3e341d54a591ce9df0 Mon Sep 17 00:00:00 2001 From: Parth Verma Date: Thu, 3 May 2018 23:24:42 +0530 Subject: [PATCH 08/11] Added none handling of integers via int_or_none. --- youtube_dl/extractor/peertube.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index cb72c5aab..0e88da8d7 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from ..compat import compat_urlparse -from ..utils import urljoin +from ..utils import urljoin, int_or_none from .common import InfoExtractor @@ -28,7 +28,7 @@ class PeertubeIE(InfoExtractor): base_url = "%s://%s" % (url_data.scheme, url_data.hostname) api_url = urljoin(base_url, "/api/v1/videos/%s" % video_id) details = self._download_json(api_url, video_id) - formats = [{'url': file_data['fileUrl'], 'filesize': file_data.get('size'), 'format': file_data.get('resolution', {}).get('label')} for file_data in details['files']] + formats = [{'url': file_data['fileUrl'], 'filesize': int_or_none(file_data.get('size')), 'format': file_data.get('resolution', {}).get('label')} for file_data in details['files']] self._sort_formats(formats) return { 'id': video_id, @@ -39,9 +39,9 @@ class PeertubeIE(InfoExtractor): 'uploader': details.get('account', {}).get('name'), 'uploader_id': details.get('account', {}).get('id'), 'uploder_url': details.get('account', {}).get('url'), - 'duration': details.get('duration'), - 'view_count': details.get('views'), - 'like_count': details.get('likes'), - 'dislike_count': details.get('dislikes'), + 'duration': int_or_none(details.get('duration')), + 'view_count': int_or_none(details.get('views')), + 'like_count': int_or_none(details.get('likes')), + 'dislike_count': int_or_none(details.get('dislikes')), 'tags': details.get('tags') } From 4cc03978723b7f732fa097e22bb4e5ab0d2179d6 Mon Sep 17 00:00:00 2001 From: Parth Verma Date: Thu, 3 May 2018 23:52:10 +0530 Subject: [PATCH 09/11] Added try_get for multiple getters --- youtube_dl/extractor/peertube.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 0e88da8d7..97dda0a86 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -1,8 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals -from ..compat import compat_urlparse -from ..utils import urljoin, int_or_none +from ..compat import compat_urlparse, compat_str +from ..utils import urljoin, int_or_none, try_get from .common import InfoExtractor @@ -25,7 +25,7 @@ class PeertubeIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) url_data = compat_urlparse.urlparse(url) - base_url = "%s://%s" % (url_data.scheme, url_data.hostname) + base_url = "%s://%s" % (url_data.scheme or 'http', url_data.hostname) api_url = urljoin(base_url, "/api/v1/videos/%s" % video_id) details = self._download_json(api_url, video_id) formats = [{'url': file_data['fileUrl'], 'filesize': int_or_none(file_data.get('size')), 'format': file_data.get('resolution', {}).get('label')} for file_data in details['files']] @@ -36,9 +36,9 @@ class PeertubeIE(InfoExtractor): 'description': details.get('description'), 'formats': formats, 'thumbnail': urljoin(base_url, details['thumbnailPath']) if 'thumbnailPath' in details else None, - 'uploader': details.get('account', {}).get('name'), - 'uploader_id': details.get('account', {}).get('id'), - 'uploder_url': details.get('account', {}).get('url'), + 'uploader': try_get(details, lambda x: x['account']['displayName'], compat_str), + 'uploader_id': try_get(details, lambda x: x['account']['id'], int), + 'uploder_url': try_get(details, lambda x: x['account']['url'], compat_str), 'duration': int_or_none(details.get('duration')), 'view_count': int_or_none(details.get('views')), 'like_count': int_or_none(details.get('likes')), From 6f227640248b5367fa722fcf7d431ed69c0cfc4d Mon Sep 17 00:00:00 2001 From: Parth Verma Date: Fri, 4 May 2018 17:01:25 +0530 Subject: [PATCH 10/11] Fixed uuid char range --- youtube_dl/extractor/peertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 97dda0a86..0586b12da 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -9,7 +9,7 @@ from .common import InfoExtractor class PeertubeIE(InfoExtractor): IE_DESC = 'Peertube Videos' IE_NAME = 'Peertube' - _VALID_URL = r'(?:https?:)//peertube\.touhoppai\.moe\/videos\/watch\/(?P[0-9|a-z]{8}-[0-9|a-z]{4}-[0-9|a-z]{4}-[0-9|a-z]{4}-[0-9|a-z]{12})' + _VALID_URL = r'(?:https?:)//peertube\.touhoppai\.moe\/videos\/watch\/(?P[0-9|a-f]{8}-[0-9|a-f]{4}-[0-9|a-f]{4}-[0-9|a-f]{4}-[0-9|a-f]{12})' _TEST = { 'url': 'https://peertube.touhoppai.moe/videos/watch/7f3421ae-6161-4a4a-ae38-d167aec51683', 'md5': 'a5e1e4a978e6b789553198d1739f5643', From 2c08d2756c2b50cf2d26638cb882bb9359871f1a Mon Sep 17 00:00:00 2001 From: Parth Verma Date: Thu, 17 May 2018 12:55:23 +0530 Subject: [PATCH 11/11] Fixed regex --- youtube_dl/extractor/peertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py index 0586b12da..4dc7663b2 100644 --- a/youtube_dl/extractor/peertube.py +++ b/youtube_dl/extractor/peertube.py @@ -9,7 +9,7 @@ from .common import InfoExtractor class PeertubeIE(InfoExtractor): IE_DESC = 'Peertube Videos' IE_NAME = 'Peertube' - _VALID_URL = r'(?:https?:)//peertube\.touhoppai\.moe\/videos\/watch\/(?P[0-9|a-f]{8}-[0-9|a-f]{4}-[0-9|a-f]{4}-[0-9|a-f]{4}-[0-9|a-f]{12})' + _VALID_URL = r'(?:https?:)//peertube\.touhoppai\.moe\/videos\/watch\/(?P[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})' _TEST = { 'url': 'https://peertube.touhoppai.moe/videos/watch/7f3421ae-6161-4a4a-ae38-d167aec51683', 'md5': 'a5e1e4a978e6b789553198d1739f5643',