From 3fce765491734cc2b26cdada6ef4c2158067fdb2 Mon Sep 17 00:00:00 2001 From: Cian Ruane Date: Fri, 16 Oct 2015 00:35:11 +0100 Subject: [PATCH 1/7] Create clyp.py --- youtube_dl/extractor/clyp.py | 58 ++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 youtube_dl/extractor/clyp.py diff --git a/youtube_dl/extractor/clyp.py b/youtube_dl/extractor/clyp.py new file mode 100644 index 000000000..e4ec98b10 --- /dev/null +++ b/youtube_dl/extractor/clyp.py @@ -0,0 +1,58 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class JeuxVideoIE(InfoExtractor): + _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm' + + _TESTS = [{ + 'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', + 'md5': '046e491afb32a8aaac1f44dd4ddd54ee', + 'info_dict': { + 'id': '114765', + 'ext': 'mp4', + 'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité', + 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.', + }, + }, { + 'url': 'http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + title = mobj.group(1) + webpage = self._download_webpage(url, title) + title = self._html_search_meta('name', webpage) + if title == None: + title = 'untitled' + config_url = self._html_search_regex( + r'data-src="(/contenu/medias/video.php.*?)"', + webpage, 'config URL') + config_url = 'http://www.jeuxvideo.com' + config_url + + video_id = self._search_regex( + r'id=(\d+)', + config_url, 'video ID') + + config = self._download_json( + config_url, title, 'Downloading JSON config') + + formats = [{ + 'url': source['file'], + 'format_id': source['label'], + 'resolution': source['label'], + } for source in reversed(config['sources'])] + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': self._og_search_description(webpage), + 'thumbnail': config.get('image'), + } From 8e9234403008f1a15988d4fd67b619895eac4ee7 Mon Sep 17 00:00:00 2001 From: Cian Ruane Date: Fri, 16 Oct 2015 00:50:59 +0100 Subject: [PATCH 2/7] Update clyp.py --- youtube_dl/extractor/clyp.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/clyp.py b/youtube_dl/extractor/clyp.py index e4ec98b10..5843c8aef 100644 --- a/youtube_dl/extractor/clyp.py +++ b/youtube_dl/extractor/clyp.py @@ -7,8 +7,8 @@ import re from .common import InfoExtractor -class JeuxVideoIE(InfoExtractor): - _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm' +class ClypIE(InfoExtractor): + _VALID_URL = r'https://clyp\.it/........' _TESTS = [{ 'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', @@ -25,9 +25,10 @@ class JeuxVideoIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - title = mobj.group(1) - webpage = self._download_webpage(url, title) + video_id = url.replace("https://clyp.it/", "") + api_url = 'https://api.clyp.it/' + video_id + metadata = self._download_json(api_url, video_id) + print metadata title = self._html_search_meta('name', webpage) if title == None: title = 'untitled' From 67bc41c7d203d0f9e14c8c91b9d633047782f05e Mon Sep 17 00:00:00 2001 From: Cian Ruane Date: Fri, 16 Oct 2015 01:22:23 +0100 Subject: [PATCH 3/7] Delete clyp.py --- youtube_dl/extractor/clyp.py | 59 ------------------------------------ 1 file changed, 59 deletions(-) delete mode 100644 youtube_dl/extractor/clyp.py diff --git a/youtube_dl/extractor/clyp.py b/youtube_dl/extractor/clyp.py deleted file mode 100644 index 5843c8aef..000000000 --- a/youtube_dl/extractor/clyp.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding: utf-8 - -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - - -class ClypIE(InfoExtractor): - _VALID_URL = r'https://clyp\.it/........' - - _TESTS = [{ - 'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', - 'md5': '046e491afb32a8aaac1f44dd4ddd54ee', - 'info_dict': { - 'id': '114765', - 'ext': 'mp4', - 'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité', - 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.', - }, - }, { - 'url': 'http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = url.replace("https://clyp.it/", "") - api_url = 'https://api.clyp.it/' + video_id - metadata = self._download_json(api_url, video_id) - print metadata - title = self._html_search_meta('name', webpage) - if title == None: - title = 'untitled' - config_url = self._html_search_regex( - r'data-src="(/contenu/medias/video.php.*?)"', - webpage, 'config URL') - config_url = 'http://www.jeuxvideo.com' + config_url - - video_id = self._search_regex( - r'id=(\d+)', - config_url, 'video ID') - - config = self._download_json( - config_url, title, 'Downloading JSON config') - - formats = [{ - 'url': source['file'], - 'format_id': source['label'], - 'resolution': source['label'], - } for source in reversed(config['sources'])] - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': self._og_search_description(webpage), - 'thumbnail': config.get('image'), - } From f4f31ee6f0f3a7600870d273886a2051495f7ffd Mon Sep 17 00:00:00 2001 From: Cian Ruane Date: Fri, 16 Oct 2015 01:23:09 +0100 Subject: [PATCH 4/7] Create clyp.py --- youtube_dl/extractor/clyp.py | 53 ++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/clyp.py diff --git a/youtube_dl/extractor/clyp.py b/youtube_dl/extractor/clyp.py new file mode 100644 index 000000000..19b897826 --- /dev/null +++ b/youtube_dl/extractor/clyp.py @@ -0,0 +1,53 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class ClypIE(InfoExtractor): + _VALID_URL = r'https://clyp\.it/........' + + _TESTS = [{ + 'url': 'https://clyp.it/ojz2wfah', + 'md5': '1d4961036c41247ecfdcc439c0cddcbb', + 'info_dict': { + 'id': 'ojz2wfah', + 'ext': 'mp3', + 'title': 'Krisson80 - bits wip wip', + 'description': '#Krisson80BitsWipWip #chiptune\n#wip', + }, + }, { + 'url': 'https://clyp.it/ojz2wfah', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = url.replace("https://clyp.it/", "") + api_url = 'https://api.clyp.it/' + video_id + metadata = self._download_json(api_url, video_id) + + title = metadata['Title'] + description = metadata['Description'] + duration = int(metadata['Duration']) + + formats = [ + { + 'url': metadata['OggUrl'], + 'format_id': 'ogg', + 'preference': -2 + },{ + 'url': metadata['Mp3Url'], + 'format_id': 'mp3', + 'preference': -1 + }] + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': description, + 'duration': duration + } From e9c1f4f0d81e7d42995f595f10e26d942315117e Mon Sep 17 00:00:00 2001 From: Cian Ruane Date: Fri, 16 Oct 2015 01:24:07 +0100 Subject: [PATCH 5/7] Update __init__.py --- youtube_dl/extractor/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 462717b1e..752a736fe 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -89,6 +89,7 @@ from .cliphunter import CliphunterIE from .clipsyndicate import ClipsyndicateIE from .cloudy import CloudyIE from .clubic import ClubicIE +from .clyp import ClypIE from .cmt import CMTIE from .cnet import CNETIE from .cnn import ( From fc82521e628dbdf9594dd10c190c5f7dd4474dbd Mon Sep 17 00:00:00 2001 From: Cian Ruane Date: Fri, 16 Oct 2015 01:26:32 +0100 Subject: [PATCH 6/7] Update jeuxvideo.py --- youtube_dl/extractor/jeuxvideo.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index e4ec98b10..1df084d87 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -29,8 +29,6 @@ class JeuxVideoIE(InfoExtractor): title = mobj.group(1) webpage = self._download_webpage(url, title) title = self._html_search_meta('name', webpage) - if title == None: - title = 'untitled' config_url = self._html_search_regex( r'data-src="(/contenu/medias/video.php.*?)"', webpage, 'config URL') From 05fbdb8a6924cc309f63ab46060eb28dd50d0bb2 Mon Sep 17 00:00:00 2001 From: Cian Ruane Date: Sat, 17 Oct 2015 13:57:10 +0100 Subject: [PATCH 7/7] [clyp.it] Extract ID idiomatically and make duration and description optional --- youtube_dl/extractor/clyp.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/clyp.py b/youtube_dl/extractor/clyp.py index 19b897826..906729b30 100644 --- a/youtube_dl/extractor/clyp.py +++ b/youtube_dl/extractor/clyp.py @@ -8,7 +8,7 @@ from .common import InfoExtractor class ClypIE(InfoExtractor): - _VALID_URL = r'https://clyp\.it/........' + _VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P[a-z0-9]+)' _TESTS = [{ 'url': 'https://clyp.it/ojz2wfah', @@ -25,13 +25,17 @@ class ClypIE(InfoExtractor): }] def _real_extract(self, url): - video_id = url.replace("https://clyp.it/", "") - api_url = 'https://api.clyp.it/' + video_id - metadata = self._download_json(api_url, video_id) + audio_id = self._match_id(url) + api_url = 'https://api.clyp.it/' + audio_id + metadata = self._download_json(api_url, audio_id) title = metadata['Title'] - description = metadata['Description'] - duration = int(metadata['Duration']) + + description = None + if metadata['Description']: description = metadata['Description'] + + duration = None + if metadata['Duration']: duration = int(metadata['Duration']) formats = [ { @@ -45,7 +49,7 @@ class ClypIE(InfoExtractor): }] return { - 'id': video_id, + 'id': audio_id, 'title': title, 'formats': formats, 'description': description,