From dd64d69b7b532f58d80438c569e82357e370e12b Mon Sep 17 00:00:00 2001 From: Cloud Chagnon Date: Thu, 11 Jan 2018 23:13:55 -0700 Subject: [PATCH 1/4] [weeklybeats] Add new extractor weeklybeats --- youtube_dl/extractor/extractors.py | 3 ++- youtube_dl/extractor/weeklybeats.py | 35 +++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/weeklybeats.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 37624d37a..b82ba32c1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1299,8 +1299,9 @@ from .webofstories import ( WebOfStoriesIE, WebOfStoriesPlaylistIE, ) +from .weeklybeats import WeeklyBeatsIE from .weibo import ( - WeiboIE, + WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE diff --git a/youtube_dl/extractor/weeklybeats.py b/youtube_dl/extractor/weeklybeats.py new file mode 100644 index 000000000..7e81572d0 --- /dev/null +++ b/youtube_dl/extractor/weeklybeats.py @@ -0,0 +1,35 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class WeeklyBeatsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?weeklybeats\.com/(.+)/music/(.+)' + _TEST = { + 'url': 'https://weeklybeats.com/pulsn/music/week-1-bass-drop', + 'md5': '03465d0fa355147822d2ba1100a82c7c', + 'info_dict': { + 'id': 'week-1-bass-drop', + 'ext': 'mp3', + 'title': 'Week 1: Bass Drip ', + 'url': 'https://weeklybeats.s3.amazonaws.com/music/2012/pulsn_weeklybeats-2012_1_week-1-bass-drop.mp3', + 'uploader': 'pulsn', + 'description': 'A blend of IDM noises mixed with Berlin styled arps and ambient pads.' + } + } + + def _real_extract(self, url): + video_id = self._search_regex(r'https://weeklybeats.com/[^/]+/music/([^/]*)/?', url, 'video_id') + print(video_id) + webpage = self._download_webpage(url, video_id) + + # TODO more code goes here, for example ... + return { + 'id': video_id, + 'title': self._search_regex(r']+property="og:title"[^>]+content="([^\"]+)"[^>]*>', webpage, 'title', fatal=False), + 'description': self._search_regex(r']+property="og:description"[^>]+content="([^\"]*)"[^>]*>', webpage, 'description', fatal=False), + 'uploader': self._search_regex(r']+class="form_popular_tags ?artist"[^>]*>View by:([^<]+)<', webpage, 'uploader', fatal=False), + 'url': self._search_regex(r'mp3: \'([^\']+)\'', webpage, 'url') + # TODO more properties (see youtube_dl/extractor/common.py) + } From 546a694df4dbbfb5cf38d97aa844f05582387f8c Mon Sep 17 00:00:00 2001 From: Cloud Chagnon Date: Thu, 11 Jan 2018 23:20:58 -0700 Subject: [PATCH 2/4] [weeklybeats] Cleaned up a bit --- youtube_dl/extractor/weeklybeats.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/weeklybeats.py b/youtube_dl/extractor/weeklybeats.py index 7e81572d0..7410cd6ef 100644 --- a/youtube_dl/extractor/weeklybeats.py +++ b/youtube_dl/extractor/weeklybeats.py @@ -21,10 +21,8 @@ class WeeklyBeatsIE(InfoExtractor): def _real_extract(self, url): video_id = self._search_regex(r'https://weeklybeats.com/[^/]+/music/([^/]*)/?', url, 'video_id') - print(video_id) webpage = self._download_webpage(url, video_id) - # TODO more code goes here, for example ... return { 'id': video_id, 'title': self._search_regex(r']+property="og:title"[^>]+content="([^\"]+)"[^>]*>', webpage, 'title', fatal=False), From a169b6ccb6b861b4d0edfdbfe169e70ae40e51d8 Mon Sep 17 00:00:00 2001 From: coolsa Date: Sun, 14 Jan 2018 04:02:36 -0700 Subject: [PATCH 3/4] [weeklybeats] Corrected regex with groups --- youtube_dl/extractor/weeklybeats.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/weeklybeats.py b/youtube_dl/extractor/weeklybeats.py index 7410cd6ef..6897e8cae 100644 --- a/youtube_dl/extractor/weeklybeats.py +++ b/youtube_dl/extractor/weeklybeats.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class WeeklyBeatsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?weeklybeats\.com/(.+)/music/(.+)' + _VALID_URL = r'https?://(?:www\.)?weeklybeats\.com/(.+)/music/(?P.+)' _TEST = { 'url': 'https://weeklybeats.com/pulsn/music/week-1-bass-drop', 'md5': '03465d0fa355147822d2ba1100a82c7c', @@ -20,14 +20,14 @@ class WeeklyBeatsIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._search_regex(r'https://weeklybeats.com/[^/]+/music/([^/]*)/?', url, 'video_id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) return { 'id': video_id, - 'title': self._search_regex(r']+property="og:title"[^>]+content="([^\"]+)"[^>]*>', webpage, 'title', fatal=False), - 'description': self._search_regex(r']+property="og:description"[^>]+content="([^\"]*)"[^>]*>', webpage, 'description', fatal=False), - 'uploader': self._search_regex(r']+class="form_popular_tags ?artist"[^>]*>View by:([^<]+)<', webpage, 'uploader', fatal=False), - 'url': self._search_regex(r'mp3: \'([^\']+)\'', webpage, 'url') + 'title': self._search_regex(r']+id=(["\'])item_title\1>[^>]*

(?P[^<]+)', webpage, 'title', group='title'), + 'description': self._og_search_description(webpage), + 'uploader': self._search_regex(r'<a[^>]+class=(["\'])[^"\']+artist\1[^>]*>View by:(?P<uploader>[^<]+)', webpage, 'uploader', group='uploader'), + 'url': self._search_regex(r'<a[^>]+id=(["\'])item_download\1[^>]+href=\1(?P<url>[^"\']+)\?', webpage, 'url', group="url"), # TODO more properties (see youtube_dl/extractor/common.py) } From 9b1870c4163d446efe3a7dfb380404deefd85320 Mon Sep 17 00:00:00 2001 From: coolsa <me@cloudchagnon.xyz> Date: Sun, 14 Jan 2018 04:13:03 -0700 Subject: [PATCH 4/4] [weeklybeats] optional argument is no longer fatal. --- youtube_dl/extractor/weeklybeats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/weeklybeats.py b/youtube_dl/extractor/weeklybeats.py index 6897e8cae..443df0241 100644 --- a/youtube_dl/extractor/weeklybeats.py +++ b/youtube_dl/extractor/weeklybeats.py @@ -27,7 +27,7 @@ class WeeklyBeatsIE(InfoExtractor): 'id': video_id, 'title': self._search_regex(r'<div[^>]+id=(["\'])item_title\1>[^>]*<h3>(?P<title>[^<]+)', webpage, 'title', group='title'), 'description': self._og_search_description(webpage), - 'uploader': self._search_regex(r'<a[^>]+class=(["\'])[^"\']+artist\1[^>]*>View by:(?P<uploader>[^<]+)', webpage, 'uploader', group='uploader'), + 'uploader': self._search_regex(r'<a[^>]+class=(["\'])[^"\']+artist\1[^>]*>View by:(?P<uploader>[^<]+)', webpage, 'uploader', group='uploader', fatal=False), 'url': self._search_regex(r'<a[^>]+id=(["\'])item_download\1[^>]+href=\1(?P<url>[^"\']+)\?', webpage, 'url', group="url"), # TODO more properties (see youtube_dl/extractor/common.py) }