From f56bb72e7c46def243adcf1cbf6c3141d2c51c09 Mon Sep 17 00:00:00 2001 From: Vukkk Date: Wed, 31 Aug 2016 10:56:11 +0200 Subject: [PATCH 1/7] [tv2hu] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tv2hu.py | 85 ++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 youtube_dl/extractor/tv2hu.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 21efa96b2..7207d0b69 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -899,6 +899,7 @@ from .tv2 import ( TV2IE, TV2ArticleIE, ) +from .tv2hu import TV2HUIE from .tv3 import TV3IE from .tv4 import TV4IE from .tvc import ( diff --git a/youtube_dl/extractor/tv2hu.py b/youtube_dl/extractor/tv2hu.py new file mode 100644 index 000000000..d9f250ff8 --- /dev/null +++ b/youtube_dl/extractor/tv2hu.py @@ -0,0 +1,85 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +class TV2HUIE(InfoExtractor): + IE_NAME = 'tv2.hu' + _VALID_URL = r'https?://(?:www\.)?tv2\.hu/(?:musoraink/)?(?P[^/]+)/(?:teljes_adasok/)?(?P[0-9]+)_(.+?)\.html' + _JSON_URL = r'(?Phttps?://.+?\.tv2\.hu/vod/(?P\d+)/id_(?P\d+).+?&type=json)' + + _TESTS = [{ + 'url': 'http://tv2.hu/ezek_megorultek/217679_ezek-megorultek---1.-adas-1.-resz.html', + 'info_dict': { + 'id': '217679', + 'ext': 'mp4', + 'title': 'Ezek megőrültek! - 1. adás 1. rész', + 'upload_id': '220289', + 'upload_date': '20160826', + 'uploader': 'ezek_megorultek', + 'thumbnail': 're:^https?://.*\.jpg$' + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://tv2.hu/ezek_megorultek/teljes_adasok/217677_ezek-megorultek---1.-adas-2.-resz.html', + 'info_dict': { + 'id': '217677', + 'ext': 'mp4', + 'title': 'Ezek megőrültek! - 1. adás 2. rész', + 'upload_id': '220290', + 'upload_date': '20160826', + 'uploader': 'ezek_megorultek', + 'thumbnail': 're:^https?://.*\.jpg$' + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://tv2.hu/musoraink/aktiv/aktiv_teljes_adas/217963_aktiv-teljes-adas---2016.08.30..html', + 'info_dict': { + 'id': '217963', + 'ext': 'mp4', + 'title': 'AKTÍV / Aktív teljes adás - 2016.08.30. / tv2.hu', + 'upload_id': '220700', + 'upload_date': '20160830', + 'uploader': 'aktiv', + 'thumbnail': 're:^https?://.*\.jpg$' + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + url, video_id, 'Downloading info page') + + json_url = re.search(self._JSON_URL, webpage) + + json_data = self._download_json( + json_url.group('json_url'), video_id, 'Downloading video info') + + manifest_url = json_data['bitrates']['hls'] + + formats = self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', entry_protocol='m3u8_native') + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage).strip(), + 'thumbnail': self._og_search_property('image', webpage), + 'uploader': self._search_regex(self._VALID_URL, url, 'uploader'), + 'upload_id': json_url.group('upload_id'), + 'upload_date': json_url.group('upload_date'), + 'formats': formats + } \ No newline at end of file From 537abfbbc5d0a5f7e04032c910bc2a41a7bb33cd Mon Sep 17 00:00:00 2001 From: Vukkk Date: Thu, 1 Sep 2016 02:36:51 +0200 Subject: [PATCH 2/7] [tv2hu] formats extended with http protocol --- youtube_dl/extractor/tv2hu.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tv2hu.py b/youtube_dl/extractor/tv2hu.py index d9f250ff8..59041248d 100644 --- a/youtube_dl/extractor/tv2hu.py +++ b/youtube_dl/extractor/tv2hu.py @@ -9,7 +9,7 @@ class TV2HUIE(InfoExtractor): IE_NAME = 'tv2.hu' _VALID_URL = r'https?://(?:www\.)?tv2\.hu/(?:musoraink/)?(?P[^/]+)/(?:teljes_adasok/)?(?P[0-9]+)_(.+?)\.html' _JSON_URL = r'(?Phttps?://.+?\.tv2\.hu/vod/(?P\d+)/id_(?P\d+).+?&type=json)' - + _TESTS = [{ 'url': 'http://tv2.hu/ezek_megorultek/217679_ezek-megorultek---1.-adas-1.-resz.html', 'info_dict': { @@ -72,6 +72,21 @@ class TV2HUIE(InfoExtractor): formats = self._extract_m3u8_formats( manifest_url, video_id, 'mp4', entry_protocol='m3u8_native') + # skip first, 'auto' format, same as 3rd (360p) + for i in range(len(json_data['bitrates']['mp4'])-1): + quality = str_to_int(json_data['mp4Labels'][i+1][:-1]) + + formats.append({ + 'protocol': 'http', + 'url': json_data['bitrates']['mp4'][i+1], + 'height': quality, + 'width': quality/9*16, + 'ext': 'mp4', + 'format_id': json_data['mp4Labels'][i+1], + 'format_note': 'HTTP', + 'preference': str_to_int(json_data['mp4Labels'][i+1][:-1]) + }) + self._sort_formats(formats) return { From 8e097d55580ebbd4c13121c6f0b7a2f5e7580819 Mon Sep 17 00:00:00 2001 From: Vukkk Date: Thu, 1 Sep 2016 02:42:29 +0200 Subject: [PATCH 3/7] [tv2hu] fixed iterator --- youtube_dl/extractor/tv2hu.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tv2hu.py b/youtube_dl/extractor/tv2hu.py index 59041248d..2edbf729b 100644 --- a/youtube_dl/extractor/tv2hu.py +++ b/youtube_dl/extractor/tv2hu.py @@ -73,18 +73,18 @@ class TV2HUIE(InfoExtractor): manifest_url, video_id, 'mp4', entry_protocol='m3u8_native') # skip first, 'auto' format, same as 3rd (360p) - for i in range(len(json_data['bitrates']['mp4'])-1): - quality = str_to_int(json_data['mp4Labels'][i+1][:-1]) + for i in range(1, len(json_data['bitrates']['mp4'])): + quality = str_to_int(json_data['mp4Labels'][i][:-1]) formats.append({ 'protocol': 'http', - 'url': json_data['bitrates']['mp4'][i+1], + 'url': json_data['bitrates']['mp4'][i], 'height': quality, 'width': quality/9*16, 'ext': 'mp4', - 'format_id': json_data['mp4Labels'][i+1], + 'format_id': json_data['mp4Labels'][i], 'format_note': 'HTTP', - 'preference': str_to_int(json_data['mp4Labels'][i+1][:-1]) + 'preference': str_to_int(json_data['mp4Labels'][i][:-1]) }) self._sort_formats(formats) From 27a33de61cb4a37c15982b9077603d9edbbe5a35 Mon Sep 17 00:00:00 2001 From: Vukkk Date: Thu, 1 Sep 2016 02:52:37 +0200 Subject: [PATCH 4/7] [tv2hu] skip 'auto' quality --- youtube_dl/extractor/tv2hu.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tv2hu.py b/youtube_dl/extractor/tv2hu.py index 2edbf729b..39b8ce0d0 100644 --- a/youtube_dl/extractor/tv2hu.py +++ b/youtube_dl/extractor/tv2hu.py @@ -72,8 +72,10 @@ class TV2HUIE(InfoExtractor): formats = self._extract_m3u8_formats( manifest_url, video_id, 'mp4', entry_protocol='m3u8_native') - # skip first, 'auto' format, same as 3rd (360p) for i in range(1, len(json_data['bitrates']['mp4'])): + if json_data['mp4Labels'][i].lower() == 'auto': + continue + quality = str_to_int(json_data['mp4Labels'][i][:-1]) formats.append({ From df5c8a8865c11191c95706f50384f6202e470af0 Mon Sep 17 00:00:00 2001 From: Vukkk Date: Thu, 1 Sep 2016 02:58:13 +0200 Subject: [PATCH 5/7] [tv2hu] parse every quality --- youtube_dl/extractor/tv2hu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tv2hu.py b/youtube_dl/extractor/tv2hu.py index 39b8ce0d0..3f996bfab 100644 --- a/youtube_dl/extractor/tv2hu.py +++ b/youtube_dl/extractor/tv2hu.py @@ -72,7 +72,7 @@ class TV2HUIE(InfoExtractor): formats = self._extract_m3u8_formats( manifest_url, video_id, 'mp4', entry_protocol='m3u8_native') - for i in range(1, len(json_data['bitrates']['mp4'])): + for i in range(len(json_data['bitrates']['mp4'])): if json_data['mp4Labels'][i].lower() == 'auto': continue From 6a90314a26fc22c54df908e0532698cf414fb96b Mon Sep 17 00:00:00 2001 From: Vukkk Date: Mon, 26 Sep 2016 13:07:34 +0200 Subject: [PATCH 6/7] [tv2hu] using .get() on json --- youtube_dl/extractor/tv2hu.py | 42 ++++++++--------------------------- 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/tv2hu.py b/youtube_dl/extractor/tv2hu.py index 3f996bfab..a3ca1a1c2 100644 --- a/youtube_dl/extractor/tv2hu.py +++ b/youtube_dl/extractor/tv2hu.py @@ -27,34 +27,10 @@ class TV2HUIE(InfoExtractor): } }, { 'url': 'http://tv2.hu/ezek_megorultek/teljes_adasok/217677_ezek-megorultek---1.-adas-2.-resz.html', - 'info_dict': { - 'id': '217677', - 'ext': 'mp4', - 'title': 'Ezek megőrültek! - 1. adás 2. rész', - 'upload_id': '220290', - 'upload_date': '20160826', - 'uploader': 'ezek_megorultek', - 'thumbnail': 're:^https?://.*\.jpg$' - }, - 'params': { - # m3u8 download - 'skip_download': True, - } + 'only_matching': True }, { 'url': 'http://tv2.hu/musoraink/aktiv/aktiv_teljes_adas/217963_aktiv-teljes-adas---2016.08.30..html', - 'info_dict': { - 'id': '217963', - 'ext': 'mp4', - 'title': 'AKTÍV / Aktív teljes adás - 2016.08.30. / tv2.hu', - 'upload_id': '220700', - 'upload_date': '20160830', - 'uploader': 'aktiv', - 'thumbnail': 're:^https?://.*\.jpg$' - }, - 'params': { - # m3u8 download - 'skip_download': True, - } + 'only_matching': True }] def _real_extract(self, url): @@ -73,20 +49,20 @@ class TV2HUIE(InfoExtractor): manifest_url, video_id, 'mp4', entry_protocol='m3u8_native') for i in range(len(json_data['bitrates']['mp4'])): + quality = json_data.get('mp4Labels')[i] + if json_data['mp4Labels'][i].lower() == 'auto': continue - quality = str_to_int(json_data['mp4Labels'][i][:-1]) - formats.append({ 'protocol': 'http', 'url': json_data['bitrates']['mp4'][i], - 'height': quality, - 'width': quality/9*16, + 'height': int(quality[:-1]), + 'width': int(quality[:-1])/9*16, 'ext': 'mp4', - 'format_id': json_data['mp4Labels'][i], + 'format_id': quality, 'format_note': 'HTTP', - 'preference': str_to_int(json_data['mp4Labels'][i][:-1]) + 'preference': int(quality[:-1]) }) self._sort_formats(formats) @@ -99,4 +75,4 @@ class TV2HUIE(InfoExtractor): 'upload_id': json_url.group('upload_id'), 'upload_date': json_url.group('upload_date'), 'formats': formats - } \ No newline at end of file + } From 2081f98628f3f968b121f301b83fdef96de050a2 Mon Sep 17 00:00:00 2001 From: Vukkk Date: Mon, 26 Sep 2016 13:12:28 +0200 Subject: [PATCH 7/7] [tv2hu] using local variable --- youtube_dl/extractor/tv2hu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tv2hu.py b/youtube_dl/extractor/tv2hu.py index a3ca1a1c2..cb1ce822a 100644 --- a/youtube_dl/extractor/tv2hu.py +++ b/youtube_dl/extractor/tv2hu.py @@ -51,7 +51,7 @@ class TV2HUIE(InfoExtractor): for i in range(len(json_data['bitrates']['mp4'])): quality = json_data.get('mp4Labels')[i] - if json_data['mp4Labels'][i].lower() == 'auto': + if quality.lower() == 'auto': continue formats.append({