# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor import re import time from datetime import datetime class TvpleIE(InfoExtractor): _VALID_URL = r'https?://(?P(?:www\.)?tvple\.com/(?P[0-9]+))' _TESTS = [{ 'url': 'http://tvple.com/311090', 'md5': '46329fca94a29b5517a30d7e88f48dbf', 'info_dict': { 'id': '311090', 'ext': 'mp4', 'uploader': '[디지털 드럭] 나비붙이', 'uploader_id': 'jack1609', 'title': '팜플렛으로 yee를 연주하는 김병만', 'description': '자작입니다. 첫 조교..인가..? 조교라긴 애매하지만, 어쨋든 노래로 만드는 건 이번이 처음입니다.\n원본 영상 출처: https://www.youtube.com/watch?v=E4BPHBL35dE\nyee는 유튜브에 치면 원본 영상이 나오는데 다들 아시죠??? 저작권 문제가 될 경우는 지우겠습니다...\n\n병만로이드라고 불러야 하나??', 'duration': 9, 'upload_date': '20150531', 'timestamp': 1433094762 } }, { 'url': 'http://tvple.com/208230', 'md5': '98e4f705fbb77b0ad9afe6e86751d89a', 'info_dict': { 'id': '208230', 'ext': 'mp4', 'uploader': 'mesenghe', 'uploader_id': 'mesenghe', 'title': '소환사 협곡의 개새끼', 'description': 'http://youtu.be/LGABUervp48\n재밌게 봐라\n유튜브나 네이버 동영상으로 퍼가지 말고\n이젠 롤 관련된 건 안 만든다', 'duration': 71, 'upload_date': '20140927', 'timestamp': 1411776051 } }] def _convert_srt_subtitle(self, json, duration): """convert tvple subtitle to srt subtitle""" sec = [] sub = "" timecode = [] text = [] for i in json: if(i != 'status'): sec.append(int(i)) sec.sort() for second in sec: msec = [] for i in json[str(second)]: msec.append(int(i)) msec.sort() for millisecond in msec: timecode.append("%02d:%02d:%02d,%03d" % (second // 60 // 60, second // 60 % 60, second % 60, millisecond)) text.append(json[str(second)][str(millisecond)].replace('
', '\n').replace(' ', '')) timecode.append("%02d:%02d:%02d,%03d" % (duration // 60 // 60, duration // 60 % 60, duration % 60, int(("%0.3f" % duration)[-3:]))) for i in range(1, len(timecode)): sub += str(i) + '\n' + timecode[i - 1] + ' --> ' + timecode[i] + '\n' + text[i - 1] + '\n\n' return sub def _convert_ass_cloud(self, json, videoid, title, width, height): """convert tvple cloud to ass subtitle""" sec = [] asstemp1 = "[Script Info]\nTitle: %s\nScriptType: v4.00+\nWrapStyle: 0\nPlayResX: %d\nPlayResY: %d\nScaledBorderAndShadow: yes\n\n[V4+ Styles]\nFormat: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\nStyle: Default,Arial,25,&H23FFFFFF,&H000000FF,&HC8000000,&HC8000000,-1,0,0,0,100,100,0,0,1,2,2,5,10,10,10,1\n\n" % (title + '-' + videoid, width, height) for i in json: if(i != '_warning'): sec.append(int(i)) sec.sort() asstemp2 = "[Events]\nFormat: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n" for second in sec: for subs in json[str(second)]: timecodea = "%02d:%02d:%02d.00" % (second // 60 // 60, second // 60 % 60, second % 60) timecodeb = "%02d:%02d:%02d.00" % ((second + 2) // 60 // 60, (second + 2) // 60 % 60, (second + 2) % 60) asstemp2 += "Dialogue: 0,%s,%s,Default,,0,0,0,,{\\an4\pos(%d,%d)\\fad(0,50)}%s\n" % (timecodea, timecodeb, subs['x'] * width, subs['y'] * height, subs['text']) return (asstemp1 + asstemp2) def _get_subtitles(self, json, title, videoid, duration, width, height): subs = {} subs['tvple'] = [] if json['cloud']['read_url'][0] != '': subs['tvple'].append({ 'ext': 'ass', 'data': self._convert_ass_cloud(self._download_json(json['cloud']['read_url'], 'cloud_%d' % int(videoid)), videoid, title, width, height) }) if json['subtitle'] != '': subs['tvple'].append({ 'ext': 'srt', 'data': self._convert_srt_subtitle(self._download_json(json['subtitle'], 'subtitle_%d' % int(videoid)), duration) }) return subs def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) playpage = self._download_json(re.search(r'data-meta="(.*?)"', webpage).group(1), "playurl_%d" % int(video_id)) title = re.search("\s*(.*?)\s*<', webpage).group(1) # username uploader_id = re.search(r'"/ch/(.*)/videos"', webpage).group(1) # userid description = re.search(r'collapse-content linkify break-word video-body">\s*(.*)\s*