From 0bfe6b1405c9eda062039910e9c09009cd2129cb Mon Sep 17 00:00:00 2001 From: Bagira Date: Mon, 6 Feb 2017 13:30:27 +0100 Subject: [PATCH 1/3] [indavideo] Bugfix, support embedding Bugfix - Issue #11221 Site structure changed Code simplify Support embedded videos --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/generic.py | 24 ++++++ youtube_dl/extractor/indavideo.py | 117 ++++++++++++++--------------- 3 files changed, 80 insertions(+), 66 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 12cda36cc..419cd3235 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -416,10 +416,7 @@ from .imgur import ( ) from .ina import InaIE from .inc import IncIE -from .indavideo import ( - IndavideoIE, - IndavideoEmbedIE, -) +from .indavideo import IndavideoIE from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE from .internetvideoarchive import InternetVideoArchiveIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4156cf27d..76b026f6a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -82,6 +82,7 @@ from .twentymin import TwentyMinutenIE from .ustream import UstreamIE from .openload import OpenloadIE from .videopress import VideoPressIE +from .indavideo import IndavideoIE class GenericIE(InfoExtractor): @@ -1461,6 +1462,24 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 2, }, + { + # Indavideo embeds + 'url': 'http://streetkitchen.hu/2015/03/15/igy_kell_otthon_hamburgert_sutni', + 'info_dict': { + 'title': 'Így kell otthon hamburgert sütni', + 'id': '1693903', + 'ext': 'mp4', + 'upload_date': '20150314', + 'uploader': 'StreetKitchen', + 'description': 'Hogy készül a tökéletes házi hamburger buci? Mi a titka egy valódi hamburger húsnak? Mitől lesz csodaszép színe a savanyított hagymának? Mi az ultimate hamburger szósz? Megannyi kérdés, amelyekre a válasz a videóban érkezik.', + 'uploader_id': '546363', + 'timestamp': 1426330212, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [IndavideoIE.ie_key()], + }, { # 20 minuten embed 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', @@ -2473,6 +2492,11 @@ class GenericIE(InfoExtractor): info_dict.update(json_ld) return info_dict + # Look for Indavideo embeds + indavideo_urls = IndavideoIE._extract_urls(webpage) + if indavideo_urls: + return _playlist_from_matches(indavideo_urls, ie=IndavideoIE.ie_key()) + # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py index 11cf3c609..1cd6f1b3d 100644 --- a/youtube_dl/extractor/indavideo.py +++ b/youtube_dl/extractor/indavideo.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -9,8 +11,14 @@ from ..utils import ( ) -class IndavideoEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P[\da-f]+)' +class IndavideoIE(InfoExtractor): + _VALID_URL = r'''(?x) + (?:https?:)// + (?: + (?:.+?\.)?indavideo\.hu/(?:player/)?video/| + assets\.indavideo\.hu/swf/(?:inda)player\.swf\?.*\b(?:v(?:ID|id))= + ) + (?P[a-zA-Z0-9-_]+)''' _TESTS = [{ 'url': 'http://indavideo.hu/player/video/1bdc3c6d80/', 'md5': 'f79b009c66194acacd40712a6778acfa', @@ -27,15 +35,39 @@ class IndavideoEmbedIE(InfoExtractor): 'duration': 72, 'age_limit': 0, 'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'], - }, + } + }, { + 'url': 'http://indavideo.hu/video/Vicces_cica_1', + 'only_matching': True, }, { 'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1', 'only_matching': True, }, { 'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1', 'only_matching': True, + }, { + 'url': 'http://index.indavideo.hu/video/2015_0728_beregszasz', + 'only_matching': True, + }, { + 'url': 'http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko', + 'only_matching': True, + }, { + 'url': 'http://erotika.indavideo.hu/video/Amator_tini_punci', + 'only_matching': True, + }, { + 'url': 'http://film.indavideo.hu/video/f_hrom_nagymamm_volt', + 'only_matching': True, + }, { + 'url': 'http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes', + 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [url for _, url in re.findall( + r'<(?:iframe[^>]+src|object[^>]+data)=(["\'])(?P(?:https?:)?//(?:(?:.+?\.)?indavideo\.hu/(?:player/)?video/|assets\.indavideo\.hu/swf/(?:inda)?player\.swf\?.*\b(?:v(?:ID|id))=)[a-zA-Z0-9-_]+)(?:\?|&|\1)', + webpage)] + def _real_extract(self, url): video_id = self._match_id(url) @@ -43,26 +75,39 @@ class IndavideoEmbedIE(InfoExtractor): 'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id, video_id)['data'] - title = video['title'] + title = video.get('title') + + filesh = video.get('filesh') video_urls = video.get('video_files', []) video_file = video.get('video_file') - if video: + if video_file: video_urls.append(video_file) video_urls = list(set(video_urls)) video_prefix = video_urls[0].rsplit('/', 1)[0] + ''' + ### flv files has not filesh in every format to get token for flv_file in video.get('flv_files', []): flv_url = '%s/%s' % (video_prefix, flv_file) if flv_url not in video_urls: video_urls.append(flv_url) + ''' - formats = [{ - 'url': video_url, - 'height': int_or_none(self._search_regex( - r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)), - } for video_url in video_urls] + formats = [] + for video_url in video_urls: + _height = self._search_regex( + r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None) + _url = video_url + if filesh: + if _height in filesh: + _url += '&' if '?' in _url else '?' + _url += "token=" + filesh.get(_height) + formats.append({ + 'url': _url, + 'height': int_or_none(_height), + }) self._sort_formats(formats) timestamp = video.get('date') @@ -89,55 +134,3 @@ class IndavideoEmbedIE(InfoExtractor): 'tags': tags, 'formats': formats, } - - -class IndavideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?indavideo\.hu/video/(?P[^/#?]+)' - _TESTS = [{ - 'url': 'http://indavideo.hu/video/Vicces_cica_1', - 'md5': '8c82244ba85d2a2310275b318eb51eac', - 'info_dict': { - 'id': '1335611', - 'display_id': 'Vicces_cica_1', - 'ext': 'mp4', - 'title': 'Vicces cica', - 'description': 'Játszik a tablettel. :D', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Jet_Pack', - 'uploader_id': '491217', - 'timestamp': 1390821212, - 'upload_date': '20140127', - 'duration': 7, - 'age_limit': 0, - 'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'], - }, - }, { - 'url': 'http://index.indavideo.hu/video/2015_0728_beregszasz', - 'only_matching': True, - }, { - 'url': 'http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko', - 'only_matching': True, - }, { - 'url': 'http://erotika.indavideo.hu/video/Amator_tini_punci', - 'only_matching': True, - }, { - 'url': 'http://film.indavideo.hu/video/f_hrom_nagymamm_volt', - 'only_matching': True, - }, { - 'url': 'http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - embed_url = self._search_regex( - r']+rel="video_src"[^>]+href="(.+?)"', webpage, 'embed url') - - return { - '_type': 'url_transparent', - 'ie_key': 'IndavideoEmbed', - 'url': embed_url, - 'display_id': display_id, - } From 9f5bd79a0f2c3e7f30f5d403f00127f2e2124c30 Mon Sep 17 00:00:00 2001 From: Bagira Date: Thu, 6 Jul 2017 15:00:36 +0200 Subject: [PATCH 2/3] [indavideo] title mandatory; backward compability Fixed mandatory field title Added IndavideoEmbedIE due backward compability --- youtube_dl/extractor/indavideo.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py index 1cd6f1b3d..c1b07cc51 100644 --- a/youtube_dl/extractor/indavideo.py +++ b/youtube_dl/extractor/indavideo.py @@ -75,7 +75,7 @@ class IndavideoIE(InfoExtractor): 'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id, video_id)['data'] - title = video.get('title') + title = video['title'] filesh = video.get('filesh') @@ -134,3 +134,10 @@ class IndavideoIE(InfoExtractor): 'tags': tags, 'formats': formats, } + +class IndavideoEmbedIE(InfoExtractor): + _VALID_URL = IndavideoIE._VALID_URL; + _TESTS = IndavideoIE._TESTS; + + def _real_extract(self, url): + return IndavideoIE._real_extract(self, url); From 337be3801203d996f5d2fb2fcfe519d860bb0fcd Mon Sep 17 00:00:00 2001 From: Bagira Date: Thu, 6 Jul 2017 15:22:03 +0200 Subject: [PATCH 3/3] [indavideo] using update_url_query --- youtube_dl/extractor/indavideo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py index c1b07cc51..d2bd63189 100644 --- a/youtube_dl/extractor/indavideo.py +++ b/youtube_dl/extractor/indavideo.py @@ -8,6 +8,7 @@ from ..utils import ( int_or_none, parse_age_limit, parse_iso8601, + update_url_query ) @@ -102,8 +103,8 @@ class IndavideoIE(InfoExtractor): _url = video_url if filesh: if _height in filesh: - _url += '&' if '?' in _url else '?' - _url += "token=" + filesh.get(_height) + _url = update_url_query(_url, {'token': filesh.get(_height)}); + formats.append({ 'url': _url, 'height': int_or_none(_height),