From 54fd7ac5c802e065c6912f0d899d734f4520714b Mon Sep 17 00:00:00 2001 From: qsniyg Date: Wed, 14 Nov 2018 20:24:57 -0800 Subject: [PATCH] [yizhibo] Add new extractor --- youtube_dl/extractor/extractors.py | 3 ++- youtube_dl/extractor/yizhibo.py | 42 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/yizhibo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b2b00c86f..d15f9e40b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1371,7 +1371,7 @@ from .webofstories import ( WebOfStoriesPlaylistIE, ) from .weibo import ( - WeiboIE, + WeiboIE, WeiboMobileIE ) from .weiqitv import WeiqiTVIE @@ -1423,6 +1423,7 @@ from .yandexdisk import YandexDiskIE from .yapfiles import YapFilesIE from .yesjapan import YesJapanIE from .yinyuetai import YinYueTaiIE +from .yizhibo import YizhiboIE from .ynet import YnetIE from .youjizz import YouJizzIE from .youku import ( diff --git a/youtube_dl/extractor/yizhibo.py b/youtube_dl/extractor/yizhibo.py new file mode 100644 index 000000000..03b6b5aaa --- /dev/null +++ b/youtube_dl/extractor/yizhibo.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + js_to_json, +) + + +class YizhiboIE(InfoExtractor): + _VALID_URL = r'https?://(?:wb\.)?yizhibo\.com/l/(?P[^/?#&.]+)\.html(?:[?#].*)?' + + _TEST = { + 'url': 'https://wb.yizhibo.com/l/QcnhER5fkh_drtI3.html', + 'md5': '4a61687d770de05fd2b67bd7f1b52bc3', + 'info_dict': { + 'id': 'QcnhER5fkh_drtI3', + 'ext': 'm3u8', + 'title': 'hyominnn00-一直播' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._search_regex(r'(.*?)', webpage, 'video title') + + json_raw = self._search_regex(r'window\.anchor *= *({[\s\S]*?});', webpage, 'video data') + json = self._parse_json(json_raw, video_id, transform_source=js_to_json) + + play_url = json['play_url'] + + formats = self._extract_m3u8_formats(play_url, video_id) + + return { + 'id': video_id, + 'url': url, + 'title': title, + 'formats': formats, + }