From e489984c1f7e6c11bd40ecc5c3a9f37b118f55de Mon Sep 17 00:00:00 2001 From: sichuan-pepper Date: Sat, 27 Oct 2018 03:40:44 +0900 Subject: [PATCH 1/3] [Twitcasting] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/twitcasting.py | 44 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/twitcasting.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 464c8d690..be70dd162 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1190,6 +1190,7 @@ from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE from .twentythreevideo import TwentyThreeVideoIE +from .twitcasting import TwitcastingIE from .twitch import ( TwitchVideoIE, TwitchChapterIE, diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py new file mode 100644 index 000000000..f7b078945 --- /dev/null +++ b/youtube_dl/extractor/twitcasting.py @@ -0,0 +1,44 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +import re + + +class TwitcastingIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^\/]+)?\.?twitcasting\.tv/(?P[^\/]+)/movie/(?P[0-9]+)$' + _TEST = { + 'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609', + 'md5': '745243cad58c4681dc752490f7540d7f', + 'info_dict': { + 'id': '2357609', + 'ext': 'mp4', + 'title': 'Recorded Live #2357609', + 'uploader_id': 'ivetesangalo', + 'description': "Moi! I'm live on TwitCasting from my iPhone.", + 'thumbnail': r're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + uploader_id = mobj.group('uploader_id') + + webpage = self._download_webpage(url, video_id) + + for m in re.finditer(r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage): + formats = self._extract_m3u8_formats(m.group('url'), video_id, ext='mp4') + thumbnail = self._og_search_thumbnail(webpage) + title = self._html_search_meta('twitter:title', webpage) + description = self._og_search_description(webpage) or self._html_search_meta('twitter:description', webpage) + return({ + 'id': video_id, + 'url': url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader_id': uploader_id, + 'formats': formats, + }) From 2417bf0daadca2c90278b4cc3ce2405a0a4f9d7c Mon Sep 17 00:00:00 2001 From: sichuan-pepper Date: Mon, 29 Oct 2018 04:11:32 +0900 Subject: [PATCH 2/3] [Twitcasting] Implemented requested changes. --- youtube_dl/extractor/twitcasting.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py index f7b078945..f35ed412f 100644 --- a/youtube_dl/extractor/twitcasting.py +++ b/youtube_dl/extractor/twitcasting.py @@ -7,7 +7,7 @@ import re class TwitcastingIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^\/]+)?\.?twitcasting\.tv/(?P[^\/]+)/movie/(?P[0-9]+)$' + _VALID_URL = r'https?://(?:www|ssl|en|pt|es|ja|ko\.)?twitcasting\.tv/(?P[^\/]+)/movie/(?P[0-9]+)' _TEST = { 'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609', 'md5': '745243cad58c4681dc752490f7540d7f', @@ -28,12 +28,12 @@ class TwitcastingIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - for m in re.finditer(r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage): - formats = self._extract_m3u8_formats(m.group('url'), video_id, ext='mp4') + playlist_url = self._html_search_regex(r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage, name='playlist url', group='url') + formats = self._extract_m3u8_formats(playlist_url.group('url'), video_id, ext='mp4') thumbnail = self._og_search_thumbnail(webpage) title = self._html_search_meta('twitter:title', webpage) description = self._og_search_description(webpage) or self._html_search_meta('twitter:description', webpage) - return({ + return{ 'id': video_id, 'url': url, 'title': title, @@ -41,4 +41,4 @@ class TwitcastingIE(InfoExtractor): 'thumbnail': thumbnail, 'uploader_id': uploader_id, 'formats': formats, - }) + } From f3c73dd40f757ad9addae1644b2b45266263a2c6 Mon Sep 17 00:00:00 2001 From: sichuan-pepper Date: Mon, 29 Oct 2018 04:43:36 +0900 Subject: [PATCH 3/3] [Twitcasting] fixed regex --- youtube_dl/extractor/twitcasting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py index f35ed412f..856df5c0b 100644 --- a/youtube_dl/extractor/twitcasting.py +++ b/youtube_dl/extractor/twitcasting.py @@ -7,7 +7,7 @@ import re class TwitcastingIE(InfoExtractor): - _VALID_URL = r'https?://(?:www|ssl|en|pt|es|ja|ko\.)?twitcasting\.tv/(?P[^\/]+)/movie/(?P[0-9]+)' + _VALID_URL = r'https?://(?:(?:www|ssl|en|pt|es|ja|ko)\.)?twitcasting\.tv/(?P[^\/]+)/movie/(?P[0-9]+)' _TEST = { 'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609', 'md5': '745243cad58c4681dc752490f7540d7f', @@ -29,7 +29,7 @@ class TwitcastingIE(InfoExtractor): webpage = self._download_webpage(url, video_id) playlist_url = self._html_search_regex(r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage, name='playlist url', group='url') - formats = self._extract_m3u8_formats(playlist_url.group('url'), video_id, ext='mp4') + formats = self._extract_m3u8_formats(playlist_url, video_id, ext='mp4') thumbnail = self._og_search_thumbnail(webpage) title = self._html_search_meta('twitter:title', webpage) description = self._og_search_description(webpage) or self._html_search_meta('twitter:description', webpage)