From e8f2857fa1727484cbe34d5213fbe952f44f3e0b Mon Sep 17 00:00:00 2001 From: dabiboo Date: Mon, 24 Aug 2015 20:28:54 +0200 Subject: [PATCH 001/215] [universalmusicfrance] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/universalmusicfrance.py | 62 ++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 youtube_dl/extractor/universalmusicfrance.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d59882598..88757a382 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -687,6 +687,7 @@ from .udemy import ( ) from .udn import UDNEmbedIE from .ultimedia import UltimediaIE +from .universalmusicfrance import UniversalMusicFranceIE from .unistra import UnistraIE from .urort import UrortIE from .ustream import UstreamIE, UstreamChannelIE diff --git a/youtube_dl/extractor/universalmusicfrance.py b/youtube_dl/extractor/universalmusicfrance.py new file mode 100644 index 000000000..a2b4fc722 --- /dev/null +++ b/youtube_dl/extractor/universalmusicfrance.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..compat import ( + compat_urllib_request, +) + +from ..utils import ( + urlencode_postdata, +) + + +class UniversalMusicFranceIE(InfoExtractor): + _VALID_URL = r'https?://www\.universalmusic\.fr/artiste/.*/videos/(?P.*)#' + _TESTS = [ + { + 'url': 'https://www.universalmusic.fr/artiste/4428-avicii/videos/1881-waiting-for-love-lyric-video#contentPart', + 'md5': '159cda7568b9fc1e5e3de6aeca5d4bfc)', + 'info_dict': { + 'id': '1881-waiting-for-love-lyric-video', + 'ext': 'mp4', + 'title': '1881-waiting-for-love-lyric-video' + } + } + , + { + # from http://www.wat.tv/video/anna-bergendahl-for-you-2015-7dvjn_76lkz_.html + 'url': 'http://www.universalmusic.fr/artiste/7415-anna-bergendahl/videos/4555-for-you-remix-lyric-video', + 'md5': '159cda7568b9fc1e5e3de6aeca5d4bfc)', + 'info_dict': { + 'id': '4555-for-you-remix-lyric-video', + 'ext': 'mp4', + 'title': 'anna-bergendahl - for-you' + } + } + ] + GET_TOKEN_URL = 'http://www.universalmusic.fr/_/artiste/video/token' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + urlVideo = self._html_search_regex(r'var urlVideo = \'(.*)\';', webpage, 'urlVideo') + title = self._html_search_regex(r'', webpage, 'title') + + request = compat_urllib_request.Request(self.GET_TOKEN_URL, urlencode_postdata({'videoUrl': urlVideo})) + request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') + request.add_header('X-Requested-With', 'XMLHttpRequest') + manifest_json = self._download_webpage(request, None, note='Getting token', errnote='unable to get token') + + manifestUrl = self._parse_json(manifest_json, video_id).get("video") + print(manifestUrl); + return { + 'id': video_id, + 'title': title, + 'description': title, + 'formats': + self._extract_m3u8_formats( + manifestUrl, video_id, 'mp4') + } From ed11f1461e010ddb6f6b1f8584481f6315cb2841 Mon Sep 17 00:00:00 2001 From: dabiboo Date: Tue, 15 Sep 2015 23:34:13 +0200 Subject: [PATCH 002/215] wat : ultimedia and universalmusic redirect add redirection from wat extractor to ultimedia and universalmusicfrance extractors when ultimedia or universalmusicfrance videos are played in wat. --- youtube_dl/extractor/universalmusicfrance.py | 14 +- youtube_dl/extractor/wat.py | 171 +++++++++++-------- 2 files changed, 116 insertions(+), 69 deletions(-) diff --git a/youtube_dl/extractor/universalmusicfrance.py b/youtube_dl/extractor/universalmusicfrance.py index a2b4fc722..a9551d752 100644 --- a/youtube_dl/extractor/universalmusicfrance.py +++ b/youtube_dl/extractor/universalmusicfrance.py @@ -13,8 +13,18 @@ from ..utils import ( class UniversalMusicFranceIE(InfoExtractor): - _VALID_URL = r'https?://www\.universalmusic\.fr/artiste/.*/videos/(?P.*)#' + _VALID_URL = r'https?://www\.universalmusic\.fr/artiste/.*/videos/(?P.*)#?' _TESTS = [ + { + 'url': 'http://www.universalmusic.fr/artiste/7415-anna-bergendahl/videos/4555-for-you-remix-lyric-video.iframe', + 'md5': '159cda7568b9fc1e5e3de6aeca5d4bfc)', + 'info_dict': { + 'id': '1881-waiting-for-love-lyric-video', + 'ext': 'mp4', + 'title': '1881-waiting-for-love-lyric-video' + } + } + , { 'url': 'https://www.universalmusic.fr/artiste/4428-avicii/videos/1881-waiting-for-love-lyric-video#contentPart', 'md5': '159cda7568b9fc1e5e3de6aeca5d4bfc)', @@ -43,7 +53,7 @@ class UniversalMusicFranceIE(InfoExtractor): webpage = self._download_webpage(url, video_id) urlVideo = self._html_search_regex(r'var urlVideo = \'(.*)\';', webpage, 'urlVideo') - title = self._html_search_regex(r'', webpage, 'title') + title = self._html_search_regex(r'', webpage, 'title') request = compat_urllib_request.Request(self.GET_TOKEN_URL, urlencode_postdata({'videoUrl': urlVideo})) request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8') diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index affcc52f6..6465ae885 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals import re import hashlib +import universalmusicfrance +import ultimedia from .common import InfoExtractor from ..utils import ( @@ -15,6 +17,29 @@ class WatIE(InfoExtractor): _VALID_URL = r'http://www\.wat\.tv/video/(?P.*)-(?P.*?)_.*?\.html' IE_NAME = 'wat.tv' _TESTS = [ + { + 'url': 'http://www.wat.tv/video/anna-bergendahl-for-you-2015-7dvjn_76lkz_.html', + 'md5': '159cda7568b9fc1e5e3de6aeca5d4bfc', + 'info_dict': { + 'id': '4555-for-you-remix-lyric-video', + 'display_id': '4555-for-you-remix-lyric-video', + 'ext': 'mp4', + 'title': 'For You - Anna Bergendahl - Universal Music France', + 'description': 'md5:1bbdde8d44751f43367ba68e8b9966a6' + }, + }, + { + 'url': 'http://www.wat.tv/video/david-guetta-titanium-feat-sia-4v6p5_4v69t_.html', + 'md5': '5c31a70358cd5019595297a26390cd46', + 'info_dict': { + 'id': 'qzkfx3', + 'display_id': 'qzkfx3', + 'ext': 'mp4', + 'title': 'David Guetta - Titanium feat. Sia (Clip)', + 'description': 'md5:bb28f8c4a84586e2eb1c3d092ab94f4b', + 'upload_date': '20111220' + }, + }, { 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html', 'md5': 'ce70e9223945ed26a8056d413ca55dc9', @@ -57,81 +82,93 @@ class WatIE(InfoExtractor): short_id = mobj.group('short_id') display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id or short_id) - real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id') + srcIFrame = self._html_search_regex(r'