From 587d53ea81758e5c497ed9d3aff73a26ee7f52bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?RADICS=20=C3=81ron?= Date: Thu, 28 Dec 2017 02:21:03 +0100 Subject: [PATCH 1/2] [BrightcoveNew] Fix: find videos embedded in xilinx.com site In xilinx.com sites there are no video tags, the video_id can be found in a div tag. (the brightcove script is before the div (video-id) tag.) --- youtube_dl/extractor/brightcove.py | 36 ++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index f04505011..b6b290c1a 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -484,6 +484,19 @@ class BrightcoveNewIE(AdobePassIE): # m3u8 download 'skip_download': True, } + }, { + # xilinx.com url embed + 'url': 'https://www.xilinx.com/video/soc/how-to-use-the-zynq-7000-verification-ip-verify-debug-simulation.html', + 'info_dict': { + 'id': '5607699465001', + 'ext': 'mp4', + 'title': 'How to use the Zynq 7000 Verification IP to verify and debug using simulation', + 'description': 'Learn how to efficiently verify designs that use Zynq 7000 Processing System using the Zynq 7000 VIP. This video introduces you to how to configure and how to simulate with the example project.', + 'duration': 456.66, + 'timestamp': 1507851806, + 'upload_date': '20171012', + 'uploader_id': '17209957001', + }, }, { # ref: prefixed video id 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442', @@ -562,6 +575,29 @@ class BrightcoveNewIE(AdobePassIE): entries.append(bc_url) + for account_id, player_id, embed in re.findall( + r''']+src=["\'](?:https?:)?//players\.brightcove\.net/(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js''', webpage): + for video_id in re.findall(r''']*data-video-id=['"](\d+)['"]''', webpage): + + if not video_id: + continue + + if not account_id: + continue + + player_id = player_id or 'default' + + embed = embed or 'default' + + bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % ( + account_id, player_id, embed, video_id) + + if not ie._is_valid_url( + bc_url, video_id, 'possible brightcove video'): + continue + + entries.append(bc_url) + return entries def _parse_brightcove_metadata(self, json_data, video_id): From a04f6012d2a8f15030ab9a723d4edbf72c169175 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81ron=20RADICS?= Date: Sat, 30 Dec 2017 00:34:34 +0100 Subject: [PATCH 2/2] [Xilinx] Move xilinx.com specific code into a separate extractor --- youtube_dl/extractor/brightcove.py | 36 ---------------- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/xilinx.py | 67 ++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 36 deletions(-) create mode 100644 youtube_dl/extractor/xilinx.py diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index b6b290c1a..f04505011 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -484,19 +484,6 @@ class BrightcoveNewIE(AdobePassIE): # m3u8 download 'skip_download': True, } - }, { - # xilinx.com url embed - 'url': 'https://www.xilinx.com/video/soc/how-to-use-the-zynq-7000-verification-ip-verify-debug-simulation.html', - 'info_dict': { - 'id': '5607699465001', - 'ext': 'mp4', - 'title': 'How to use the Zynq 7000 Verification IP to verify and debug using simulation', - 'description': 'Learn how to efficiently verify designs that use Zynq 7000 Processing System using the Zynq 7000 VIP. This video introduces you to how to configure and how to simulate with the example project.', - 'duration': 456.66, - 'timestamp': 1507851806, - 'upload_date': '20171012', - 'uploader_id': '17209957001', - }, }, { # ref: prefixed video id 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442', @@ -575,29 +562,6 @@ class BrightcoveNewIE(AdobePassIE): entries.append(bc_url) - for account_id, player_id, embed in re.findall( - r''']+src=["\'](?:https?:)?//players\.brightcove\.net/(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js''', webpage): - for video_id in re.findall(r''']*data-video-id=['"](\d+)['"]''', webpage): - - if not video_id: - continue - - if not account_id: - continue - - player_id = player_id or 'default' - - embed = embed or 'default' - - bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % ( - account_id, player_id, embed, video_id) - - if not ie._is_valid_url( - bc_url, video_id, 'possible brightcove video'): - continue - - entries.append(bc_url) - return entries def _parse_brightcove_metadata(self, json_data, video_id): diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e7b93a699..f33c1b65a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1320,6 +1320,7 @@ from .xiami import ( XiamiArtistIE, XiamiCollectionIE ) +from .xilinx import XilinxIE from .xminus import XMinusIE from .xnxx import XNXXIE from .xstream import XstreamIE diff --git a/youtube_dl/extractor/xilinx.py b/youtube_dl/extractor/xilinx.py new file mode 100644 index 000000000..c63e9a7c1 --- /dev/null +++ b/youtube_dl/extractor/xilinx.py @@ -0,0 +1,67 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .brightcove import BrightcoveNewIE + + +class XilinxIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?xilinx\.com/video/(?P[^/]+)/(?P[\w-]+)\.html' + _TEST = { + 'url': 'https://www.xilinx.com/video/hardware/model-composer-product-overview.html', + 'info_dict': { + 'id': '5678303886001', + 'ext': 'mp4', + 'title': 'Model Composer Product Overview', + 'description': 'md5:806e3831788848342777cdc3947c3d58', + 'timestamp': 1513121997, + 'upload_date': '20171212', + 'uploader_id': '17209957001', + 'categories': 'hardware', + }, + 'params': { + 't': True, + }, + } + + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' + + def _real_extract(self, url): + + page_id = self._match_id(url) + category = re.match(self._VALID_URL, url).group('cat') + webpage = self._download_webpage(url, page_id) + + urls = [] + for account_id, player_id, embed in re.findall( + r''']+src=["\'](?:https?:)?//players\.brightcove\.net/(\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js''', webpage): + for video_id in re.findall(r''']*data-video-id=['"](\d+)['"]''', webpage): + + if not video_id: + continue + + if not account_id: + continue + + player_id = player_id or 'default' + + embed = embed or 'default' + + bc_url = self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, embed, video_id) + + urls.append(bc_url) + + if (len(urls) == 0): + self.report_warning("Couldn't get any video urls.") + + if (len(urls) > 1): + self.report_warning("Got more than one video urls, using the first one.") + + return { + '_type': 'url_transparent', + 'url': urls[0], + 'categories': category, + 'ie_key': BrightcoveNewIE.ie_key(), + }