From f9e54710ad7ee6eaaf2a50ff68633246e4411941 Mon Sep 17 00:00:00 2001 From: Enes Date: Wed, 19 Dec 2018 18:22:49 +0300 Subject: [PATCH 1/2] [tv8] add support for tv8.com.tr --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tv8.py | 41 ++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 youtube_dl/extractor/tv8.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c7a91a986..117525fc9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1143,6 +1143,7 @@ from .tv2hu import TV2HuIE from .tv3 import TV3IE from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE +from .tv8 import TV8IE from .tva import TVAIE from .tvanouvelles import ( TVANouvellesIE, diff --git a/youtube_dl/extractor/tv8.py b/youtube_dl/extractor/tv8.py new file mode 100644 index 000000000..0d26e4a01 --- /dev/null +++ b/youtube_dl/extractor/tv8.py @@ -0,0 +1,41 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TV8IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tv8\.com\.tr/[^/]+/(?P[^?#&]+)-video\.htm' + IE_NAME = 'tv8' + _TESTS = [{ + 'url': 'https://www.tv8.com.tr/yemekteyiz/yemekteyiz-281-bolum-13122018-52578-video.htm', + 'md5': '73be7e69708d37eb77643c12e8598b35', + 'info_dict': { + 'id': 'yemekteyiz-281-bolum-13122018-52578', + 'ext': 'mp4', + 'title': 'Yemekteyiz 281. bölüm (13/12/2018)', + 'description': 'md5:01a9cc2115550dfa3b51772239082f6a', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 8667, + 'timestamp': 1544780098, + 'upload_date': '20181214', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + webpage = webpage.replace('URL', 'Url') + + info = { + 'id': video_id, + } + + json_ld = self._search_regex( + r'(?is)]+type=(["\'])application/ld\+json\1[^>]*>.*(?P{[^<]+VideoObject[^<]+}).*', webpage, 'JSON-LD', group='json_ld') + + ld_info = self._json_ld(json_ld, video_id) + info.update(ld_info) + + return info From 795aea8120640e8acf12f1fc988f662befa26297 Mon Sep 17 00:00:00 2001 From: Enes Date: Wed, 19 Dec 2018 18:40:49 +0300 Subject: [PATCH 2/2] [tv8] add support for chapters --- youtube_dl/extractor/tv8.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/tv8.py b/youtube_dl/extractor/tv8.py index 0d26e4a01..cd3effb8f 100644 --- a/youtube_dl/extractor/tv8.py +++ b/youtube_dl/extractor/tv8.py @@ -1,17 +1,22 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..utils import ( + int_or_none, +) class TV8IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tv8\.com\.tr/[^/]+/(?P[^?#&]+)-video\.htm' + _VALID_URL = r'https?://(?:www\.)?tv8\.com\.tr/[^/]+/(?P[^?#&]+-video-?(?P[0-9]+)?)\.htm' IE_NAME = 'tv8' _TESTS = [{ 'url': 'https://www.tv8.com.tr/yemekteyiz/yemekteyiz-281-bolum-13122018-52578-video.htm', 'md5': '73be7e69708d37eb77643c12e8598b35', 'info_dict': { - 'id': 'yemekteyiz-281-bolum-13122018-52578', + 'id': 'yemekteyiz-281-bolum-13122018-52578-video', 'ext': 'mp4', 'title': 'Yemekteyiz 281. bölüm (13/12/2018)', 'description': 'md5:01a9cc2115550dfa3b51772239082f6a', @@ -20,16 +25,24 @@ class TV8IE(InfoExtractor): 'timestamp': 1544780098, 'upload_date': '20181214', }, + }, + { + 'url': 'https://www.tv8.com.tr/masterchef-turkiye/masterchef-turkiye-30-bolum-11122018-52440-video-3.htm', + 'only_matching': True }] def _real_extract(self, url): - video_id = self._match_id(url) + + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + chapter_number = int_or_none(mobj.group('chapter')) or 1 webpage = self._download_webpage(url, video_id) webpage = webpage.replace('URL', 'Url') info = { 'id': video_id, + 'chapter_number': chapter_number, } json_ld = self._search_regex(