From 528668da72a6ce3506d5bb93d94c7437e4c3ecab Mon Sep 17 00:00:00 2001 From: ping Date: Mon, 18 Jan 2016 18:08:59 +0800 Subject: [PATCH 1/4] [daum.net] Support for playlists, user channels --- youtube_dl/extractor/__init__.py | 2 + youtube_dl/extractor/daum.py | 134 ++++++++++++++++++++++++++++++- 2 files changed, 135 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0ba0d226f..588312f01 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -141,6 +141,8 @@ from .dailymotion import ( from .daum import ( DaumIE, DaumClipIE, + DaumPlaylistIE, + DaumUserIE, ) from .dbtv import DBTVIE from .dcn import ( diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index f08f57157..2a91628a3 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -2,6 +2,9 @@ from __future__ import unicode_literals +import re +import itertools + from .common import InfoExtractor from ..compat import compat_urllib_parse from ..utils import ( @@ -27,6 +30,16 @@ class DaumIE(InfoExtractor): 'view_count': int, 'comment_count': int, }, + }, { + 'url': 'http://m.tvpot.daum.net/v/65139429', + 'info_dict': { + 'id': '65139429', + 'ext': 'mp4', + 'title': 'md5:a100d65d09cec246d8aa9bde7de45aed', + 'description': 'md5:79794514261164ff27e36a21ad229fc5', + 'upload_date': '20150604', + 'duration': 154 + }, }, { 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', 'only_matching': True, @@ -42,6 +55,10 @@ class DaumIE(InfoExtractor): 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query, video_id, 'Downloading video formats info') + # For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid + if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id): + return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id) + formats = [] for format_el in movie_data['output_list']['output_list']: profile = format_el['profile'] @@ -76,7 +93,7 @@ class DaumIE(InfoExtractor): class DaumClipIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.do|mypot/View.do)\?.*?clipid=(?P\d+)' + _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P\d+)' IE_NAME = 'daum.net:clip' _TESTS = [{ @@ -90,6 +107,9 @@ class DaumClipIE(InfoExtractor): 'duration': 3868, 'view_count': int, }, + }, { + 'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425', + 'only_matching': True, }] def _real_extract(self, url): @@ -110,3 +130,115 @@ class DaumClipIE(InfoExtractor): 'view_count': int_or_none(clip_info.get('play_count')), 'ie_key': 'Daum', } + + +class DaumListIE(InfoExtractor): + def _get_entries(self, list_id, list_id_type): + name = None + entries = [] + for pagenum in itertools.count(start=1): + list_info = self._download_json( + 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % ( + pagenum, list_id_type, list_id), list_id,'Downloading list info - %s' % pagenum) + + entries.extend([ + self.url_result( + 'http://tvpot.daum.net/v/%s' % clip['vid']) + for clip in list_info['clip_list'] + ]) + + if not name: + name = list_info.get('playlist_bean', {}).get('name') or \ + list_info.get('potInfo', {}).get('name') + + if not list_info.get('has_more'): + break + + return name, entries + + +class DaumPlaylistIE(DaumListIE): + _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View\.do|Top\.tv)\?.*?playlistid=(?P[0-9]+)' + IE_NAME = 'daum.net:playlist' + + _TESTS = [{ + 'note': 'Playlist url with clipid', + 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844', + 'info_dict': { + 'id': '6213966', + 'title': 'Woorissica Official', + }, + 'playlist_mincount': 181 + }, { + 'note': 'Playlist url with clipid - noplaylist', + 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844', + 'info_dict': { + 'id': '73806844', + 'ext': 'mp4', + 'title': '151017 Airport', + 'upload_date': '20160117', + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, + } + }] + + def _real_extract(self, url): + if DaumClipIE.suitable(url) and self._downloader.params.get('noplaylist'): + return self.url_result(url, 'DaumClip') + + list_id = self._match_id(url) + name, entries = self._get_entries(list_id, 'playlistid') + + return self.playlist_result(entries, list_id, name) + + +class DaumUserIE(DaumListIE): + _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View|Top)\.do\?.*?ownerid=(?P[0-9a-zA-Z]+)' + IE_NAME = 'daum.net:user' + + _TESTS = [{ + 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0', + 'info_dict': { + 'id': 'o2scDLIVbHc0', + 'title': '마이 리틀 텔레비전', + }, + 'playlist_mincount': 213 + }, { + 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&clipid=73801156', + 'info_dict': { + 'id': '73801156', + 'ext': 'mp4', + 'title': '[미공개] 김구라, 오만석이 부릅니다 '오케피' - 마이 리틀 텔레비전 20160116', + 'upload_date': '20160117', + 'description': 'md5:5e91d2d6747f53575badd24bd62b9f36' + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, + } + }, { + 'note': 'Playlist url has ownerid and playlistid, playlistid takes precedence', + 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&playlistid=6196631', + 'info_dict': { + 'id': '6196631', + 'title': '마이 리틀 텔레비전 - 20160109', + }, + 'playlist_count': 11 + }, { + 'url': 'http://tvpot.daum.net/mypot/Top.do?ownerid=o2scDLIVbHc0', + 'only_matching': True, + }] + + def _real_extract(self, url): + if DaumClipIE.suitable(url) and self._downloader.params.get('noplaylist'): + return self.url_result(url, 'DaumClip') + + if DaumPlaylistIE.suitable(url): + return self.url_result(url, 'DaumPlaylist') + + list_id = self._match_id(url) + name, entries = self._get_entries(list_id, 'ownerid') + + return self.playlist_result(entries, list_id, name) From 3e7c72bb35ee9e7541b3380b1977cb5922009b7c Mon Sep 17 00:00:00 2001 From: ping Date: Wed, 27 Jan 2016 12:38:34 +0800 Subject: [PATCH 2/4] [daum.net] Remove error fix, and improve code for playlist/user channel support --- youtube_dl/extractor/daum.py | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 2a91628a3..9b144ed58 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals -import re import itertools from .common import InfoExtractor @@ -11,6 +10,7 @@ from ..utils import ( int_or_none, str_to_int, xpath_text, + unescapeHTML, ) @@ -30,16 +30,6 @@ class DaumIE(InfoExtractor): 'view_count': int, 'comment_count': int, }, - }, { - 'url': 'http://m.tvpot.daum.net/v/65139429', - 'info_dict': { - 'id': '65139429', - 'ext': 'mp4', - 'title': 'md5:a100d65d09cec246d8aa9bde7de45aed', - 'description': 'md5:79794514261164ff27e36a21ad229fc5', - 'upload_date': '20150604', - 'duration': 154 - }, }, { 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', 'only_matching': True, @@ -55,10 +45,6 @@ class DaumIE(InfoExtractor): 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query, video_id, 'Downloading video formats info') - # For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid - if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id): - return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id) - formats = [] for format_el in movie_data['output_list']['output_list']: profile = format_el['profile'] @@ -93,7 +79,7 @@ class DaumIE(InfoExtractor): class DaumClipIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P\d+)' + _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.do|mypot/View.do)\?.*?clipid=(?P\d+)' IE_NAME = 'daum.net:clip' _TESTS = [{ @@ -107,9 +93,6 @@ class DaumClipIE(InfoExtractor): 'duration': 3868, 'view_count': int, }, - }, { - 'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425', - 'only_matching': True, }] def _real_extract(self, url): @@ -122,7 +105,7 @@ class DaumClipIE(InfoExtractor): '_type': 'url_transparent', 'id': video_id, 'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'], - 'title': clip_info['title'], + 'title': unescapeHTML(clip_info['title']), 'thumbnail': clip_info.get('thumb_url'), 'description': clip_info.get('contents'), 'duration': int_or_none(clip_info.get('duration')), @@ -136,7 +119,7 @@ class DaumListIE(InfoExtractor): def _get_entries(self, list_id, list_id_type): name = None entries = [] - for pagenum in itertools.count(start=1): + for pagenum in itertools.count(1): list_info = self._download_json( 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % ( pagenum, list_id_type, list_id), list_id,'Downloading list info - %s' % pagenum) @@ -189,6 +172,8 @@ class DaumPlaylistIE(DaumListIE): return self.url_result(url, 'DaumClip') list_id = self._match_id(url) + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id) + name, entries = self._get_entries(list_id, 'playlistid') return self.playlist_result(entries, list_id, name) @@ -210,7 +195,7 @@ class DaumUserIE(DaumListIE): 'info_dict': { 'id': '73801156', 'ext': 'mp4', - 'title': '[미공개] 김구라, 오만석이 부릅니다 '오케피' - 마이 리틀 텔레비전 20160116', + 'title': '[미공개] 김구라, 오만석이 부릅니다 \'오케피\' - 마이 리틀 텔레비전 20160116', 'upload_date': '20160117', 'description': 'md5:5e91d2d6747f53575badd24bd62b9f36' }, @@ -239,6 +224,7 @@ class DaumUserIE(DaumListIE): return self.url_result(url, 'DaumPlaylist') list_id = self._match_id(url) + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id) name, entries = self._get_entries(list_id, 'ownerid') return self.playlist_result(entries, list_id, name) From 41a8aa6bf043b489c24efdb2722108f057472de2 Mon Sep 17 00:00:00 2001 From: ping Date: Mon, 18 Jan 2016 18:08:59 +0800 Subject: [PATCH 3/4] [daum.net] Support for playlists, user channels --- youtube_dl/extractor/__init__.py | 2 + youtube_dl/extractor/daum.py | 113 +++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5e0d7d3dc..e05d986ef 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -142,6 +142,8 @@ from .dailymotion import ( from .daum import ( DaumIE, DaumClipIE, + DaumPlaylistIE, + DaumUserIE, ) from .dbtv import DBTVIE from .dcn import ( diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index 9bc345f60..d08996633 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re +import itertools from .common import InfoExtractor from ..compat import ( @@ -151,3 +152,115 @@ class DaumClipIE(InfoExtractor): 'view_count': int_or_none(clip_info.get('play_count')), 'ie_key': 'Daum', } + + +class DaumListIE(InfoExtractor): + def _get_entries(self, list_id, list_id_type): + name = None + entries = [] + for pagenum in itertools.count(start=1): + list_info = self._download_json( + 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % ( + pagenum, list_id_type, list_id), list_id,'Downloading list info - %s' % pagenum) + + entries.extend([ + self.url_result( + 'http://tvpot.daum.net/v/%s' % clip['vid']) + for clip in list_info['clip_list'] + ]) + + if not name: + name = list_info.get('playlist_bean', {}).get('name') or \ + list_info.get('potInfo', {}).get('name') + + if not list_info.get('has_more'): + break + + return name, entries + + +class DaumPlaylistIE(DaumListIE): + _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View\.do|Top\.tv)\?.*?playlistid=(?P[0-9]+)' + IE_NAME = 'daum.net:playlist' + + _TESTS = [{ + 'note': 'Playlist url with clipid', + 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844', + 'info_dict': { + 'id': '6213966', + 'title': 'Woorissica Official', + }, + 'playlist_mincount': 181 + }, { + 'note': 'Playlist url with clipid - noplaylist', + 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844', + 'info_dict': { + 'id': '73806844', + 'ext': 'mp4', + 'title': '151017 Airport', + 'upload_date': '20160117', + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, + } + }] + + def _real_extract(self, url): + if DaumClipIE.suitable(url) and self._downloader.params.get('noplaylist'): + return self.url_result(url, 'DaumClip') + + list_id = self._match_id(url) + name, entries = self._get_entries(list_id, 'playlistid') + + return self.playlist_result(entries, list_id, name) + + +class DaumUserIE(DaumListIE): + _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View|Top)\.do\?.*?ownerid=(?P[0-9a-zA-Z]+)' + IE_NAME = 'daum.net:user' + + _TESTS = [{ + 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0', + 'info_dict': { + 'id': 'o2scDLIVbHc0', + 'title': '마이 리틀 텔레비전', + }, + 'playlist_mincount': 213 + }, { + 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&clipid=73801156', + 'info_dict': { + 'id': '73801156', + 'ext': 'mp4', + 'title': '[미공개] 김구라, 오만석이 부릅니다 '오케피' - 마이 리틀 텔레비전 20160116', + 'upload_date': '20160117', + 'description': 'md5:5e91d2d6747f53575badd24bd62b9f36' + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, + } + }, { + 'note': 'Playlist url has ownerid and playlistid, playlistid takes precedence', + 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&playlistid=6196631', + 'info_dict': { + 'id': '6196631', + 'title': '마이 리틀 텔레비전 - 20160109', + }, + 'playlist_count': 11 + }, { + 'url': 'http://tvpot.daum.net/mypot/Top.do?ownerid=o2scDLIVbHc0', + 'only_matching': True, + }] + + def _real_extract(self, url): + if DaumClipIE.suitable(url) and self._downloader.params.get('noplaylist'): + return self.url_result(url, 'DaumClip') + + if DaumPlaylistIE.suitable(url): + return self.url_result(url, 'DaumPlaylist') + + list_id = self._match_id(url) + name, entries = self._get_entries(list_id, 'ownerid') + + return self.playlist_result(entries, list_id, name) From be74e48d59d68c73adbcdd1b40c8747224225b0a Mon Sep 17 00:00:00 2001 From: ping Date: Wed, 27 Jan 2016 12:38:34 +0800 Subject: [PATCH 4/4] [daum.net] Remove error fix, and improve code for playlist/user channel support --- youtube_dl/extractor/daum.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py index d08996633..a10673108 100644 --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals -import re import itertools from .common import InfoExtractor @@ -14,6 +13,7 @@ from ..utils import ( int_or_none, str_to_int, xpath_text, + unescapeHTML, ) @@ -114,7 +114,7 @@ class DaumIE(InfoExtractor): class DaumClipIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P\d+)' + _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.do|mypot/View.do)\?.*?clipid=(?P\d+)' IE_NAME = 'daum.net:clip' _TESTS = [{ @@ -129,9 +129,6 @@ class DaumClipIE(InfoExtractor): 'duration': 3868, 'view_count': int, }, - }, { - 'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425', - 'only_matching': True, }] def _real_extract(self, url): @@ -144,7 +141,7 @@ class DaumClipIE(InfoExtractor): '_type': 'url_transparent', 'id': video_id, 'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'], - 'title': clip_info['title'], + 'title': unescapeHTML(clip_info['title']), 'thumbnail': clip_info.get('thumb_url'), 'description': clip_info.get('contents'), 'duration': int_or_none(clip_info.get('duration')), @@ -158,7 +155,7 @@ class DaumListIE(InfoExtractor): def _get_entries(self, list_id, list_id_type): name = None entries = [] - for pagenum in itertools.count(start=1): + for pagenum in itertools.count(1): list_info = self._download_json( 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % ( pagenum, list_id_type, list_id), list_id,'Downloading list info - %s' % pagenum) @@ -211,6 +208,8 @@ class DaumPlaylistIE(DaumListIE): return self.url_result(url, 'DaumClip') list_id = self._match_id(url) + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id) + name, entries = self._get_entries(list_id, 'playlistid') return self.playlist_result(entries, list_id, name) @@ -232,7 +231,7 @@ class DaumUserIE(DaumListIE): 'info_dict': { 'id': '73801156', 'ext': 'mp4', - 'title': '[미공개] 김구라, 오만석이 부릅니다 '오케피' - 마이 리틀 텔레비전 20160116', + 'title': '[미공개] 김구라, 오만석이 부릅니다 \'오케피\' - 마이 리틀 텔레비전 20160116', 'upload_date': '20160117', 'description': 'md5:5e91d2d6747f53575badd24bd62b9f36' }, @@ -261,6 +260,7 @@ class DaumUserIE(DaumListIE): return self.url_result(url, 'DaumPlaylist') list_id = self._match_id(url) + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id) name, entries = self._get_entries(list_id, 'ownerid') return self.playlist_result(entries, list_id, name)