From e38b29453fcb34e21f1a93aed3ad77ce7aaab62a Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 17 Jul 2016 00:10:39 +0200 Subject: [PATCH] [Oddshot] Add new extractor (Closes #9685) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/oddshot.py | 36 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 youtube_dl/extractor/oddshot.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 10b2390bf..9cf6bd29c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -583,6 +583,7 @@ from .nytimes import ( NYTimesArticleIE, ) from .nuvid import NuvidIE +from .oddshot import OddshotIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE from .onet import ( diff --git a/youtube_dl/extractor/oddshot.py b/youtube_dl/extractor/oddshot.py new file mode 100644 index 000000000..3eeb2fa9f --- /dev/null +++ b/youtube_dl/extractor/oddshot.py @@ -0,0 +1,36 @@ +import re +from .common import InfoExtractor +from youtube_dl.utils import compat_str + +class OddshotIE(InfoExtractor): + _VALID_URL = r'(?:https?://)?(?:www\.)?oddshot\.tv/shot/.+/(?P.+)' + _TEST = { + 'url': 'https://oddshot.tv/shot/IzakOOO/UzoKaNE4KaNBtJu8zTVvomJd', + 'md5': '1a927556df771148d20657120f096af5', + 'info_dict': { + 'id': 'UzoKaNE4KaNBtJu8zTVvomJd', + 'ext': 'mp4', + 'title': 'pozamiatane', + 'description': 'Twitch - Counter-Strike: Global Offensive - IzakOOO' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + # Using compat_str for running test : if not isinstance(info_dict['id'], compat_str): YoutubeDL.py l1235 + video_id = compat_str(mobj.group('id')) + # Default User-Agent not working, but working with curl User-Agent + webpage = self._download_webpage(url,video_id,headers={'User-Agent':'curl/7.8 (i386-redhat-linux-gnu) libcurl 7.8 (OpenSSL 0.9.6b) (ipv6 enabled)'}) + self.report_extraction(video_id) + # Perhaps a proper way with helpers but 'data-react-helmet="true"' ruins + title = self._html_search_regex(r'property="og:title" content="(.+?)"', webpage, u'video Title') + url = self._html_search_regex(r'property="og:video:secure_url" content="(.+?)"', webpage, u'video URL') + description = self._html_search_regex(r'property="og:description" content="(.+?)"', webpage, u'video Description') + thumbnail = self._html_search_regex(r'property="og:image" content="(.+?)"', webpage, u'video Thumbnail') + return { + 'id': video_id, + 'url': url, + 'description': description, + 'title': title, + 'thumbnail': thumbnail + }