From 337a100b9371482871865f1e94377161ebd72fb6 Mon Sep 17 00:00:00 2001 From: Itay Perl Date: Sat, 30 Mar 2019 19:07:51 +0300 Subject: [PATCH 1/2] add extractor for 13tv.co.il --- docs/supportedsites.md | 1 + youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/reshet.py | 42 ++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 youtube_dl/extractor/reshet.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 02bc088ab..7e88b869f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1,4 +1,5 @@ # Supported sites + - **13tv.co.il** - **1tv**: Первый канал - **1up.com** - **20min** diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 64d1fa251..bb924627a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -916,6 +916,7 @@ from .rentv import ( RENTVIE, RENTVArticleIE, ) +from .reshet import ReshetIE from .restudy import RestudyIE from .reuters import ReutersIE from .reverbnation import ReverbNationIE diff --git a/youtube_dl/extractor/reshet.py b/youtube_dl/extractor/reshet.py new file mode 100644 index 000000000..f5cd21183 --- /dev/null +++ b/youtube_dl/extractor/reshet.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + urljoin, +) + + +class ReshetIE(InfoExtractor): + _VALID_URL = r'https?://13tv\.co\.il/item/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)/(?P[^/]+)/?' + + _TEST = { + 'url': 'https://13tv.co.il/item/entertainment/gav-hauma/season-10/episodes/jz1a1-1028855', + 'note': 'Test brightcove URL extraction', + 'info_dict': { + 'id': '6015811232001', + 'ext': 'mp4', + 'timestamp': 1553031049, + 'title': 'entertainment-gav-hauma-season-10-episodes-10-full_au8bmF8M', + 'uploader_id': '1551111274001', + 'upload_date': '20190319', + } + } + + def _real_extract(self, url): + reshet_id = self._match_id(url) + + page = self._download_webpage(url, reshet_id) + data = self._parse_json(re.search(r'var initial_data = (.*?);\n', page).group(1), reshet_id) + item = data['items'][str(data['curItem'])] + brightcove_id = item['video']['videoID'] + + main_js_url = urljoin(url, re.search(r'