diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ecb33bc9e..22384babd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -484,7 +484,11 @@ from .jove import JoveIE from .joj import JojIE from .jwplatform import JWPlatformIE from .jpopsukitv import JpopsukiIE -from .kakao import KakaoIE +from .kakao import ( + KakaoIE, + KakaoPlaylistIE, + KakaoChannelIE, +) from .kaltura import KalturaIE from .kamcord import KamcordIE from .kanalplay import KanalPlayIE diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py index 7fa140b0c..ace47e5e6 100644 --- a/youtube_dl/extractor/kakao.py +++ b/youtube_dl/extractor/kakao.py @@ -2,18 +2,21 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( int_or_none, unified_timestamp, update_url_query, + ExtractorError ) class KakaoIE(InfoExtractor): _VALID_URL = r'https?://tv\.kakao\.com/channel/(?P\d+)/cliplink/(?P\d+)' - _API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks' + _API_BASE = 'http://tv.kakao.com/api/v1/ft' _TESTS = [{ 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083', @@ -45,6 +48,17 @@ class KakaoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + playlist_id = self._search_regex(r'playlistId=(\d+)', url, 'channel_id', default=None) + if playlist_id: + if not self._downloader.params.get('noplaylist'): + chan_id = self._search_regex(r'channel/(\d+)', url, 'playlist_id') + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + return self.url_result( + 'http://tv.kakao.com/channel/%s/playlist/%s' % (chan_id, playlist_id) + ) + else: + self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + player_header = { 'Referer': update_url_query( 'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, { @@ -67,7 +81,7 @@ class KakaoIE(InfoExtractor): query = QUERY_COMMON.copy() query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList' impress = self._download_json( - '%s/%s/impress' % (self._API_BASE, video_id), + '%s/cliplinks/%s/impress' % (self._API_BASE, video_id), video_id, 'Downloading video info', query=query, headers=player_header) @@ -84,7 +98,7 @@ class KakaoIE(InfoExtractor): 'profile': 'HIGH', }) raw = self._download_json( - '%s/%s/raw' % (self._API_BASE, video_id), + '%s/cliplinks/%s/raw' % (self._API_BASE, video_id), video_id, 'Downloading video formats info', query=query, headers=player_header) @@ -93,7 +107,7 @@ class KakaoIE(InfoExtractor): try: profile_name = fmt['profile'] fmt_url_json = self._download_json( - '%s/%s/raw/videolocation' % (self._API_BASE, video_id), + '%s/cliplinks/%s/raw/videolocation' % (self._API_BASE, video_id), video_id, 'Downloading video URL for profile %s' % profile_name, query={ @@ -147,3 +161,127 @@ class KakaoIE(InfoExtractor): 'comment_count': int_or_none(clip.get('commentCount')), 'formats': formats, } + + +class KakaoPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://tv\.kakao\.com/channel/(?P\d+)/playlist/(?P\d+)' + + _TESTS = [{ + 'url': 'http://tv.kakao.com/channel/2653401/playlist/12305', + 'info_dict': { + 'id': '12305', + 'title': '아는 형님 1회', + }, + 'params': { + 'skip_download': True + }, + 'playlist_count': 23 + }, { + 'note': 'Video url with playlist', + 'url': 'http://tv.kakao.com/channel/2657529/cliplink/301795620?playlistId=71340&metaObjectType=Playlist', + 'info_dict': { + 'id': '71340', + 'title': '오버워치 단편', + }, + 'params': { + 'skip_download': True + }, + 'playlist_mincount': 90 + }, { + 'note': 'Video url with playlist, but with --no-playlist ', + 'url': 'http://tv.kakao.com/channel/2657529/cliplink/301795620?playlistId=71340&metaObjectType=Playlist', + 'info_dict': { + 'id': '301795620', + 'ext': 'mp4', + 'title': '신영웅 떡밥 자세히 파헤치기', + 'upload_date': '20170224', + 'uploader_id': 2657529, + 'uploader': '게임친구 롤큐', + 'timestamp': 1487936269 + }, + 'params': { + 'skip_download': True, + 'noplaylist': True + } + }] + + def _real_extract(self, url): + list_id = self._match_id(url) + webpage = self._download_webpage(url, list_id) + + try: + list_name = self._html_search_regex('class="loss_word tit_epiname"\>(.*)', webpage, 'list title') + except ExtractorError: + raise ExtractorError('This playlist is empty', expected=True) + + listelement = self._search_regex('(\
    )', webpage, 'lists', flags=re.DOTALL) + + entries = [] + for entry in re.findall(r'