From d39c087c3275ba9761dfc05aa71e1c589cbfccef Mon Sep 17 00:00:00 2001 From: Kay B <> Date: Sun, 3 Sep 2017 14:37:07 +0200 Subject: [PATCH 1/7] [Heise] Add support for embedded youtube videos --- youtube_dl/extractor/heise.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 382f32771..676012d10 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -9,6 +9,8 @@ from ..utils import ( xpath_text, ) +import re + class HeiseIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P[0-9]+)\.html' @@ -40,6 +42,17 @@ class HeiseIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + title = self._html_search_meta('fulltitle', webpage, default=None) + if not title or title == "c't": + title = self._search_regex( + r']+class="videoplayerjw"[^>]+data-title="([^"]+)"', + webpage, 'title') + + yt_videos = re.findall( + r']+class="yt_video"[^>]+src="//([^"]+)', webpage) + if yt_videos: + return self.playlist_from_matches(yt_videos, title, 'Youtube') + container_id = self._search_regex( r'
]+data-container="([0-9]+)"', webpage, 'container ID') @@ -47,12 +60,6 @@ class HeiseIE(InfoExtractor): r'
]+data-sequenz="([0-9]+)"', webpage, 'sequenz ID') - title = self._html_search_meta('fulltitle', webpage, default=None) - if not title or title == "c't": - title = self._search_regex( - r']+class="videoplayerjw"[^>]+data-title="([^"]+)"', - webpage, 'title') - doc = self._download_xml( 'http://www.heise.de/videout/feed', video_id, query={ 'container': container_id, From 5fc3b08e536974475d0c67aa669bc483d0f41381 Mon Sep 17 00:00:00 2001 From: Kay B <> Date: Sun, 3 Sep 2017 20:11:37 +0200 Subject: [PATCH 2/7] [Heise] Added test, fixed URL handed to YoutubeIE --- youtube_dl/extractor/heise.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 676012d10..4d9d1c2fd 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -27,6 +27,18 @@ class HeiseIE(InfoExtractor): 'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20', 'thumbnail': r're:^https?://.*/gallery/$', } + }, { + 'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html', + 'md5': 'e403d2b43fea8e405e88e3f8623909f1', + 'info_dict': { + 'id': '6kmWbXleKW4', + 'ext': 'mp4', + 'title': 'NEU IM SEPTEMBER | Netflix', + 'description': 'md5:2131f3c7525e540d5fd841de938bd452', + 'upload_date': '20170830', + 'uploader': 'Netflix Deutschland, Österreich und Schweiz', + 'uploader_id': 'netflixdach', + } }, { 'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html', 'only_matching': True, @@ -51,6 +63,8 @@ class HeiseIE(InfoExtractor): yt_videos = re.findall( r']+class="yt_video"[^>]+src="//([^"]+)', webpage) if yt_videos: + for i in range(len(yt_videos)): + yt_videos[i] = 'https://' + yt_videos[i] return self.playlist_from_matches(yt_videos, title, 'Youtube') container_id = self._search_regex( From c636e0fd25d91b99ee0351418e8b2f9aab7a9b13 Mon Sep 17 00:00:00 2001 From: Kay B <> Date: Sun, 3 Sep 2017 14:37:07 +0200 Subject: [PATCH 3/7] [Heise] Add support for embedded youtube videos --- youtube_dl/extractor/heise.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 382f32771..676012d10 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -9,6 +9,8 @@ from ..utils import ( xpath_text, ) +import re + class HeiseIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?heise\.de/(?:[^/]+/)+[^/]+-(?P[0-9]+)\.html' @@ -40,6 +42,17 @@ class HeiseIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + title = self._html_search_meta('fulltitle', webpage, default=None) + if not title or title == "c't": + title = self._search_regex( + r']+class="videoplayerjw"[^>]+data-title="([^"]+)"', + webpage, 'title') + + yt_videos = re.findall( + r']+class="yt_video"[^>]+src="//([^"]+)', webpage) + if yt_videos: + return self.playlist_from_matches(yt_videos, title, 'Youtube') + container_id = self._search_regex( r'
]+data-container="([0-9]+)"', webpage, 'container ID') @@ -47,12 +60,6 @@ class HeiseIE(InfoExtractor): r'
]+data-sequenz="([0-9]+)"', webpage, 'sequenz ID') - title = self._html_search_meta('fulltitle', webpage, default=None) - if not title or title == "c't": - title = self._search_regex( - r']+class="videoplayerjw"[^>]+data-title="([^"]+)"', - webpage, 'title') - doc = self._download_xml( 'http://www.heise.de/videout/feed', video_id, query={ 'container': container_id, From f1f2c4832381b1acac77e5ba9c5ebe1c0e23dcb9 Mon Sep 17 00:00:00 2001 From: Kay B <> Date: Sun, 3 Sep 2017 20:11:37 +0200 Subject: [PATCH 4/7] [Heise] Added test, fixed URL handed to YoutubeIE --- youtube_dl/extractor/heise.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 676012d10..4d9d1c2fd 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -27,6 +27,18 @@ class HeiseIE(InfoExtractor): 'description': 'md5:c934cbfb326c669c2bcabcbe3d3fcd20', 'thumbnail': r're:^https?://.*/gallery/$', } + }, { + 'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html', + 'md5': 'e403d2b43fea8e405e88e3f8623909f1', + 'info_dict': { + 'id': '6kmWbXleKW4', + 'ext': 'mp4', + 'title': 'NEU IM SEPTEMBER | Netflix', + 'description': 'md5:2131f3c7525e540d5fd841de938bd452', + 'upload_date': '20170830', + 'uploader': 'Netflix Deutschland, Österreich und Schweiz', + 'uploader_id': 'netflixdach', + } }, { 'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html', 'only_matching': True, @@ -51,6 +63,8 @@ class HeiseIE(InfoExtractor): yt_videos = re.findall( r']+class="yt_video"[^>]+src="//([^"]+)', webpage) if yt_videos: + for i in range(len(yt_videos)): + yt_videos[i] = 'https://' + yt_videos[i] return self.playlist_from_matches(yt_videos, title, 'Youtube') container_id = self._search_regex( From 6ef7228b3086a39cf9371e6bf1159696d7114c61 Mon Sep 17 00:00:00 2001 From: Kay B <> Date: Tue, 5 Sep 2017 21:23:52 +0200 Subject: [PATCH 5/7] [Heise] Fix for PR #14109 --- youtube_dl/extractor/heise.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 4d9d1c2fd..a4f1dcd2f 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -8,8 +8,7 @@ from ..utils import ( parse_iso8601, xpath_text, ) - -import re +from .youtube import YoutubeIE class HeiseIE(InfoExtractor): @@ -60,13 +59,10 @@ class HeiseIE(InfoExtractor): r']+class="videoplayerjw"[^>]+data-title="([^"]+)"', webpage, 'title') - yt_videos = re.findall( - r']+class="yt_video"[^>]+src="//([^"]+)', webpage) - if yt_videos: - for i in range(len(yt_videos)): - yt_videos[i] = 'https://' + yt_videos[i] - return self.playlist_from_matches(yt_videos, title, 'Youtube') - + yt_urls = YoutubeIE._extract_urls(webpage) + if yt_urls: + return self.playlist_from_matches(yt_urls, video_id, title, ie=YoutubeIE.ie_key()) + container_id = self._search_regex( r'
]+data-container="([0-9]+)"', webpage, 'container ID') From 4713395b1db2025a09d7359267a15840fc3ab850 Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 17 Sep 2017 22:45:40 +0700 Subject: [PATCH 6/7] Update heise.py --- youtube_dl/extractor/heise.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index a4f1dcd2f..8c1ba58d9 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -27,6 +27,7 @@ class HeiseIE(InfoExtractor): 'thumbnail': r're:^https?://.*/gallery/$', } }, { + # YouTube embed 'url': 'http://www.heise.de/newsticker/meldung/Netflix-In-20-Jahren-vom-Videoverleih-zum-TV-Revolutionaer-3814130.html', 'md5': 'e403d2b43fea8e405e88e3f8623909f1', 'info_dict': { @@ -37,7 +38,10 @@ class HeiseIE(InfoExtractor): 'upload_date': '20170830', 'uploader': 'Netflix Deutschland, Österreich und Schweiz', 'uploader_id': 'netflixdach', - } + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html', 'only_matching': True, From 09c58a44ee5f6431f21cbb70d4a6170370a5cc4c Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 17 Sep 2017 22:45:58 +0700 Subject: [PATCH 7/7] Update heise.py --- youtube_dl/extractor/heise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py index 8c1ba58d9..495ffb7dc 100644 --- a/youtube_dl/extractor/heise.py +++ b/youtube_dl/extractor/heise.py @@ -2,13 +2,13 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .youtube import YoutubeIE from ..utils import ( determine_ext, int_or_none, parse_iso8601, xpath_text, ) -from .youtube import YoutubeIE class HeiseIE(InfoExtractor):