diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6a7028a4d..c41fe24b6 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -715,6 +715,7 @@ from .onet import ( OnetMVPIE, OnetPlIE, ) +from .onetvcrimea import TvCrimeaIE from .onionstudios import OnionStudiosIE from .ooyala import ( OoyalaIE, diff --git a/youtube_dl/extractor/onetvcrimea.py b/youtube_dl/extractor/onetvcrimea.py new file mode 100644 index 000000000..d6bb514f8 --- /dev/null +++ b/youtube_dl/extractor/onetvcrimea.py @@ -0,0 +1,39 @@ +# for issue #12466 +# https://github.com/rg3/youtube-dl/issues/12466 +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TvCrimeaIE(InfoExtractor): + IE_NAME = '1tvcrimea' + IE_DESC = '1 TV Crimea' + _VALID_URL = r'https?://(?:www\.)?1tvcrimea.ru/(?P.*)' + _TESTS = { + 'url': 'http://1tvcrimea.ru/pages/category/000003-sportivnye/program/000031-moj-sport/video/025239-moj-sport', + 'md5': 'f79ca8774b6b276715038f26cfbb5c1b', + 'file': 'a', + 'info_dict': { + 'id': '2017-03-09-17-41-23-5769-moy-sport-efir-9-marta', + 'ext': 'mp4', + # I had to encode this string because of my encoding and the fact that the title is in Cyrillic encoding + 'title': u'\u041C\u043E\u0439 \u0441\u043F\u043E\u0440\u0442 / \u0422\u0435\u043B\u0435\u0440\u0430\u0434\u0438\u043E\u043A\u043E\u043C\u043F\u0430\u043D\u0438\u044F "\u041A\u0440\u044B\u043C"', + }, + } + + def _real_extract(self, url): + # encode to UTF-8 because the webpage contains Cyrillic + webpage = self._download_webpage(url, "") + + # url_pattern = re.compile('', webpage, 'download') + video_id = self._html_search_regex(r'', webpage, 'url') + video_url = "http://vid.techbee.pro:8080/vod/" + video_id + ".mp4" + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'url': video_url, + 'ext': 'mp4', + }