From 248d456db3c7ba86eb7037433d425fd7d4afa91f Mon Sep 17 00:00:00 2001 From: difeng Date: Tue, 3 Jan 2017 17:57:44 +0800 Subject: [PATCH 1/2] [ncpaclassic] Add new extractor --- youtube_dl/extractor/extractors.py | 4 ++ youtube_dl/extractor/ncpaclassic.py | 104 ++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 youtube_dl/extractor/ncpaclassic.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 811db6219..9b359b373 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -664,6 +664,10 @@ from .nytimes import ( ) from .nuvid import NuvidIE from .nzz import NZZIE +from .ncpaclassic import ( + NcpaClassicVideoIE, + NcpaClassicAudioIE +) from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE diff --git a/youtube_dl/extractor/ncpaclassic.py b/youtube_dl/extractor/ncpaclassic.py new file mode 100644 index 000000000..dc1679360 --- /dev/null +++ b/youtube_dl/extractor/ncpaclassic.py @@ -0,0 +1,104 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + +class NcpaClassicVideoIE(InfoExtractor): + _VALID_URL = r'http://www\.ncpa-classic\.com/[0-9]{4}/[0-9]{2}/[0-9]{2}/VID[E A](?P\w*)\.shtml' + _TESTS = [{ + 'url': 'http://www.ncpa-classic.com/2013/05/22/VIDE1369219508996867.shtml', + 'info_dict': { + 'id': '1369219508996867', + 'title': '小泽征尔音乐塾 音乐梦想无国界_古典音乐频道' + }, + 'playlist_count': 8, + },{ + 'url': 'http://ncpa-classic.cntv.cn/2013/05/22/VIDE1369219508996867.shtml', + 'info_dict': { + 'id': '1369219508996867', + 'title': '小泽征尔音乐塾 音乐梦想无国界_古典音乐频道' + }, + 'playlist_count': 8, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url,playlist_id) + entries = [] + if 'VIDE' in url: + videoCenterId = self._html_search_regex(r'var initMyAray=\s *\'(?P\w*)\'',webpage,'videoCenterId', group='videoCenterId') + playlist_title = self._html_search_regex( + r'(?P<title>.*)', webpage, + 'title', group='title') + api_result = self._download_json( + 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=%s&tz=-8&from=000dajuyuan&url=%s&idl=32&idlr=32&modifyed=false' % ( + videoCenterId,url),playlist_id, 'Get playlist links') + entries = [{'_type': 'video', + 'id':'%s' % idx, + 'title':playlist_title, + 'url': video.get('url') + } for idx,video in enumerate(api_result['video']['chapters2'])] + + elif 'VIDA' in url: + playlist_title = self._html_search_regex( + r'(?P<title>.*)', webpage, + 'title', group='title') + sub_titles = re.findall(r'(.*)',webpage) + vida_ids = re.findall(r'"(\w{32})"',webpage) + for idx,vida_id in enumerate(vida_ids): + title = sub_titles[idx] + api_result = self._download_json( + 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=%s&tz=-8&from=000dajuyuan&url=%s&idl=32&idlr=32&modifyed=false' % ( + vida_id,url),playlist_id, 'Get playlist links') + video_json = api_result['video']['chapters'] + real_url = video_json[0]['url'] + entries.append({'_type': 'video', + 'id':'%s' % idx, + 'title':title, + 'url': real_url}) + else: + raise ExtractorError('Unexpected url %s' % url, expected=True) + + return self.playlist_result( + entries, playlist_id, playlist_title) + + +class NcpaClassicAudioIE(InfoExtractor): + _VALID_URL = r'http://www\.ncpa-classic\.com/clt/more/(?P[0-9]*)/index.shtml' + _TESTS = [{ + 'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml', + 'info_dict': { + 'id': '416', + 'title': '来自维也纳的新年贺礼' + }, + 'playlist_count': 1, + },{ + 'url': 'http://ncpa-classic.cntv.cn/clt/more/416/index.shtml', + 'info_dict': { + 'id': '416', + 'title': '来自维也纳的新年贺礼' + }, + 'playlist_count': 1, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url,playlist_id) + videoCenterId = self._html_search_regex(r'\"(?P\w{32})\"',webpage,'videoCenterId', group='videoCenterId') + playlist_title = self._html_search_regex( + r'(?P<title>.*)', webpage, + 'title', group='title') + api_result = self._download_json( + 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=%s&tz=-8&tai=dajuyuanaudio' % ( + videoCenterId),playlist_id, 'Get playlist links') + entries = [{'_type': 'video', + 'id': '%s' % idx, + 'title':playlist_title, + 'url': video.get('url') + } for idx,video in enumerate(api_result['video']['chapters'])] + + return self.playlist_result( + entries, playlist_id, playlist_title) From af47470b93533680e66cc38510a4bb61aab1f19e Mon Sep 17 00:00:00 2001 From: difeng Date: Tue, 3 Jan 2017 18:02:33 +0800 Subject: [PATCH 2/2] [ncpaclassic] Add new extractor --- youtube_dl/extractor/ncpaclassic.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/ncpaclassic.py b/youtube_dl/extractor/ncpaclassic.py index dc1679360..c125bcb77 100644 --- a/youtube_dl/extractor/ncpaclassic.py +++ b/youtube_dl/extractor/ncpaclassic.py @@ -14,14 +14,7 @@ class NcpaClassicVideoIE(InfoExtractor): 'id': '1369219508996867', 'title': '小泽征尔音乐塾 音乐梦想无国界_古典音乐频道' }, - 'playlist_count': 8, - },{ - 'url': 'http://ncpa-classic.cntv.cn/2013/05/22/VIDE1369219508996867.shtml', - 'info_dict': { - 'id': '1369219508996867', - 'title': '小泽征尔音乐塾 音乐梦想无国界_古典音乐频道' - }, - 'playlist_count': 8, + 'playlist_count': 8 }] def _real_extract(self, url): @@ -74,14 +67,7 @@ class NcpaClassicAudioIE(InfoExtractor): 'id': '416', 'title': '来自维也纳的新年贺礼' }, - 'playlist_count': 1, - },{ - 'url': 'http://ncpa-classic.cntv.cn/clt/more/416/index.shtml', - 'info_dict': { - 'id': '416', - 'title': '来自维也纳的新年贺礼' - }, - 'playlist_count': 1, + 'playlist_count': 1 }] def _real_extract(self, url):