1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-18 06:57:24 +08:00

[Filmweb] Add extractor

This commit is contained in:
Déstin Reed 2016-08-17 16:02:59 +02:00
parent 7273e5849b
commit 10d7458601
3 changed files with 92 additions and 0 deletions

View File

@ -258,6 +258,7 @@ from .facebook import FacebookIE
from .faz import FazIE
from .fc2 import FC2IE
from .fczenit import FczenitIE
from .filmweb import FilmwebIE
from .firstpost import FirstpostIE
from .firsttv import FirstTVIE
from .fivemin import FiveMinIE
@ -905,6 +906,7 @@ from .tvplay import (
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
from .twentythreevideo import TwentyThreeVideoIE
from .twentytwotracks import (
TwentyTwoTracksIE,
TwentyTwoTracksGenreIE

View File

@ -0,0 +1,45 @@
from __future__ import unicode_literals
from .twentythreevideo import TwentyThreeVideoIE
class FilmwebIE(TwentyThreeVideoIE):
IE_NAME = 'Filmweb'
_VALID_URL = r'https?://(?:www\.)?filmweb\.no/trailere/article(?P<id>\d+).ece'
_TEST = {
'url': 'http://www.filmweb.no/trailere/article1264921.ece',
'md5': 'e353f47df98e557d67edaceda9dece89',
'info_dict': {
'id': '1264921',
'title': 'Det som en gang var',
'ext': 'mp4',
'description': 'Trailer: Scener fra et vennskap',
}
}
_CLIENT_NAME = 'filmweb'
_CLIENT_ID = '12732917'
_EMBED_BASE_URL = 'http://www.filmweb.no/template/ajax/json_trailerEmbed.jsp?articleId=%s&autoplay=true'
def _real_extract(self, url):
article_id = self._match_id(url)
webpage = self._download_webpage(url, article_id)
title = self._search_regex(r'var\s+jsTitle\s*=\s*escape\("([^"]+)"\);',
webpage, 'title', fatal=True)
format_url = self._proto_relative_url(
self._html_search_regex(r'"(//filmweb\.23video\.com/[^"]+)"',
self._download_json(self._EMBED_BASE_URL % article_id,
article_id)['embedCode'], 'format url'))
formats = self._extract_formats(format_url, self._CLIENT_ID)
self._sort_formats(formats)
return {
'id': article_id,
'title': title,
'alt_title': self._og_search_title(webpage),
'formats': formats,
'description': self._og_search_description(webpage),
}

View File

@ -0,0 +1,45 @@
from __future__ import unicode_literals
from .common import InfoExtractor
class TwentyThreeVideoIE(InfoExtractor):
IE_NAME = '23video'
_VALID_URL = r'https?://(?:www\.)?(?P<client>[\w-]+)\.23video\.com/v.ihtml/player.html.*photo_id=(?P<id>\d+)'
_TEST = {}
_URL_TEMPLATE = 'https://%s.23video.com/%s/%s/%s/%s/download-video.mp4'
_FORMATS = {
'video_hd': {
'width': 1280,
'height': 720,
},
'video_medium': {
'width': 640,
'height': 360,
},
'video_mobile_high': {
'width': 320,
'height': 180,
}
}
def _extract_formats(self, url, client_id):
client_name = self._search_regex(r'([a-z]+)\.23video\.com', url, 'client name')
video_id = self._search_regex(r'photo%5fid=([^?&]+)', url, 'video id')
token = self._search_regex(r'token=([^?&]+)', url, 'token')
formats = []
for format_key in self._FORMATS.keys():
formats.append({
'url': self._URL_TEMPLATE % (client_name, client_id, video_id,
token, format_key),
'width': self._FORMATS.get(format_key, {}).get('width'),
'height': self._FORMATS.get(format_key, {}).get('height'),
})
return formats
def _real_extract(self, url):
# TODO: Find out how to extract client_id
raise NotImplementedError('Not able to extract the `client_id`')