From 540db7fffc23dd1dfb8495b069ba4ae0ebe65fb7 Mon Sep 17 00:00:00 2001 From: caiwan Date: Tue, 16 Jan 2018 16:25:13 +0100 Subject: [PATCH 1/2] [Picarto] Add new extractor for recorded Picarto VODs --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/picarto.py | 38 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 youtube_dl/extractor/picarto.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c82614bf9..03e1417f2 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -799,6 +799,7 @@ from .periscope import ( from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .picarto import PicartoVodIE from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py new file mode 100644 index 000000000..d3596893b --- /dev/null +++ b/youtube_dl/extractor/picarto.py @@ -0,0 +1,38 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class PicartoVodIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?picarto\.tv/videopopout/(?P[a-zA-Z_\-.0-9]+).flv' + _TEST = { + 'url': 'https://picarto.tv/videopopout/Carrot_2018.01.11.07.55.12.flv', + 'md5': '1ecd32f358fee23d8b3e6954880f78d4', + 'info_dict': { + 'id': 'Carrot_2018.01.11.07.55.12', + 'ext': 'm3u8', + 'title': 'Carrot_2018.01.11.07.55.12', + 'thumbnail': r're:^https?://.*\.jpg$' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + vod_regex = "[^<]*riot.mount\(([^<]+)\)[^<]*<\/" + vod_script = self._html_search_regex(vod_regex, webpage, "vod_script") + vod_url = self._search_regex('vod: \\"([^\\"]*)\\"', vod_script, "vod_url") + vod_thumb = self._search_regex('vodThumb: \\"([^\\"]*)\\"', vod_script, "vod_thumb") + + title = video_id + + return { + 'id': video_id, + 'title': title, + 'description': "", + 'uploader': "", + 'url' : vod_url, + 'thumbnail' : vod_thumb + } From 8f6bb35c83ac18aa5f02660934894115a3f9e806 Mon Sep 17 00:00:00 2001 From: caiwan Date: Tue, 30 Jan 2018 16:42:49 +0100 Subject: [PATCH 2/2] WIP fix --- youtube_dl/extractor/picarto.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py index d3596893b..3b58f6cab 100644 --- a/youtube_dl/extractor/picarto.py +++ b/youtube_dl/extractor/picarto.py @@ -1,9 +1,10 @@ # coding: utf-8 +import re + from __future__ import unicode_literals from .common import InfoExtractor - class PicartoVodIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?picarto\.tv/videopopout/(?P[a-zA-Z_\-.0-9]+).flv' _TEST = { @@ -21,18 +22,20 @@ class PicartoVodIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - vod_regex = "[^<]*riot.mount\(([^<]+)\)[^<]*<\/" - vod_script = self._html_search_regex(vod_regex, webpage, "vod_script") - vod_url = self._search_regex('vod: \\"([^\\"]*)\\"', vod_script, "vod_url") - vod_thumb = self._search_regex('vodThumb: \\"([^\\"]*)\\"', vod_script, "vod_thumb") - + vod_regex = r'[^<]*riot\.mount\([^<]+({[^<]+})\)[^<]*<\/script>' + script = self._html_search_regex(vod_regex, webpage, 'vod_script') + + print("penis", script) + + params = self._parse_json(script, video_id, lambda x : re.sub(r'(\w+)(:\s+)', '"$1"$2', x)); + title = video_id + url = params.vod + thumb = params.vodThumb return { 'id': video_id, 'title': title, - 'description': "", - 'uploader': "", - 'url' : vod_url, - 'thumbnail' : vod_thumb + 'url' : url, + 'thumbnail' : thumb }