# coding: utf-8 from __future__ import unicode_literals import re import hashlib from .common import InfoExtractor from ..utils import ( ExtractorError, unified_strdate, ) class WatIE(InfoExtractor): _VALID_URL = r'http://www\.wat\.tv/video/(?P.*)-(?P.*?)_.*?\.html' IE_NAME = 'wat.tv' _TESTS = [ { 'url': 'http://www.wat.tv/video/lady-gaga-but-beautiful-2014-72611_2ey39_.html', 'md5': 'b7a3a5d2af2c7f16551265ef8929d8e5', 'info_dict': { 'id': '18879-but-beautiful', 'display_id': '18879-but-beautiful', 'ext': 'mp4', 'title': 'But Beautiful - Lady Gaga - Universal Music France', 'description': 'md5:7e0bff92535f1e5912d61dffe9845aa7' }, }, { 'url': 'http://www.wat.tv/video/anna-bergendahl-for-you-2015-7dvjn_76lkz_.html', 'md5': '159cda7568b9fc1e5e3de6aeca5d4bfc', 'info_dict': { 'id': '4555-for-you-remix-lyric-video', 'display_id': '4555-for-you-remix-lyric-video', 'ext': 'mp4', 'title': 'For You - Anna Bergendahl - Universal Music France', 'description': 'md5:1bbdde8d44751f43367ba68e8b9966a6' }, }, { 'url': 'http://www.wat.tv/video/david-guetta-titanium-feat-sia-4v6p5_4v69t_.html', 'md5': '5c31a70358cd5019595297a26390cd46', 'info_dict': { 'id': 'qzkfx3', 'display_id': 'qzkfx3', 'ext': 'mp4', 'title': 'David Guetta - Titanium feat. Sia (Clip)', 'description': 'md5:bb28f8c4a84586e2eb1c3d092ab94f4b', 'upload_date': '20111220' }, }, { 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html', 'md5': 'ce70e9223945ed26a8056d413ca55dc9', 'info_dict': { 'id': '11713067', 'display_id': 'soupe-figues-l-orange-aux-epices', 'ext': 'mp4', 'title': 'Soupe de figues à l\'orange et aux épices', 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.', 'upload_date': '20140819', 'duration': 120, }, }, { 'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html', 'md5': 'fbc84e4378165278e743956d9c1bf16b', 'info_dict': { 'id': '11713075', 'display_id': 'gregory-lemarchal-voix-ange', 'ext': 'mp4', 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)', 'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3', 'upload_date': '20140816', 'duration': 2910, }, 'skip': "Ce contenu n'est pas disponible pour l'instant.", }, ] def download_video_info(self, real_id): # 'contentv4' is used in the website, but it also returns the related # videos, we don't need them info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id) return info['media'] def _real_extract(self, url): def real_id_for_chapter(chapter): return chapter['tc_start'].split('-')[0] mobj = re.match(self._VALID_URL, url) short_id = mobj.group('short_id') display_id = mobj.group('display_id') webpage = self._download_webpage(url, display_id or short_id) srcIFrame = self._html_search_regex(r'