1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-13 05:27:17 +08:00

[telebasel] [simplex] Handle Telebasel articles in the generic

information extractor.
This commit is contained in:
Alex Seiler 2017-02-06 18:13:05 +01:00
parent 91d21e0a84
commit 36144cfe1b
4 changed files with 29 additions and 55 deletions

View File

@ -935,10 +935,7 @@ from .teamfourstar import TeamFourStarIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tele13 import Tele13IE
from .telebasel import (
TelebaselMediathekIE,
TelebaselArticleIE,
)
from .telebasel import TelebaselIE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE

View File

@ -83,6 +83,7 @@ from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE
from .openload import OpenloadIE
from .videopress import VideoPressIE
from .simplex import SimplexIE
class GenericIE(InfoExtractor):
@ -1499,10 +1500,19 @@ class GenericIE(InfoExtractor):
'timestamp': 1435711927,
'upload_date': '20150701',
},
'add_ie': [VideoPressIE.ie_key()],
},
{
# Simplex embed
'url': 'https://telebasel.ch/2017/02/01/report-usr-iii-einfach-erklaert/?channel=105100',
'info_dict': {
'id': '?channel=105100',
'title': 'Report: USR III einfach erklärt - Telebasel',
},
'params': {
'skip_download': True,
},
'add_ie': [VideoPressIE.ie_key()],
'playlist_count': 3,
}
# {
# # TODO: find another test
@ -2474,6 +2484,12 @@ class GenericIE(InfoExtractor):
return _playlist_from_matches(
videopress_urls, ie=VideoPressIE.ie_key())
# Look for Simplex embeds
simplex_urls = SimplexIE._extract_urls(webpage)
if simplex_urls:
return _playlist_from_matches(
simplex_urls, ie=SimplexIE.ie_key())
# Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject')

View File

@ -29,6 +29,13 @@ class SimplexIE(InfoExtractor):
'only_matching': True,
}
@staticmethod
def _extract_urls(webpage):
return ['simplex:%s:%s:%s:%s' % (
m.group('server_url'), m.group('customer_id'),
m.group('author_id'), m.group('project_id'))
for m in re.finditer(r'<iframe[^>]+src=["\']%s.+["\']' % SimplexHostsIE._VALID_URL, webpage)]
@staticmethod
def _extract_width_height(resolution):
try:

View File

@ -8,20 +8,12 @@ from .simplex import SimplexIE
from ..utils import (
ExtractorError,
str_or_none,
strip_or_none,
remove_end,
try_get,
urljoin,
)
class TelebaselBaseIE(InfoExtractor):
_SERVER_URL = 'https://video.telebasel.ch/'
_CUSTOMER_ID = '4062'
_AUTHOR_ID = '4063'
class TelebaselMediathekIE(TelebaselBaseIE):
class TelebaselIE(InfoExtractor):
IE_DESC = 'telebasel.ch Mediathek'
_VALID_URL = r'''(?x)
https?://
@ -34,6 +26,9 @@ class TelebaselMediathekIE(TelebaselBaseIE):
/.*pid=(?P<pid>\d+).*
)?
'''
_SERVER_URL = 'https://video.telebasel.ch/'
_CUSTOMER_ID = '4062'
_AUTHOR_ID = '4063'
_TESTS = [{
'url': 'https://telebasel.ch/telebasel-gastro-tipp/?aid=4063&pid=75290&channel=15881',
@ -82,44 +77,3 @@ class TelebaselMediathekIE(TelebaselBaseIE):
self._SERVER_URL, self._CUSTOMER_ID,
self._AUTHOR_ID, video_id),
ie=SimplexIE.ie_key())
class TelebaselArticleIE(TelebaselBaseIE):
IE_DESC = 'telebasel.ch articles'
_VALID_URL = r'https?://(?:www\.)?telebasel\.ch/(?P<id>\d{4}/\d{2}/\d{2}/[^/]+)/?'
_TEST = {
'url': 'https://telebasel.ch/2017/02/01/report-usr-iii-einfach-erklaert/?channel=105100',
'info_dict': {
'id': '2017/02/01/report-usr-iii-einfach-erklaert',
'title': 'Report: USR III einfach erklärt',
'description': 'md5:2cb2b94ac023a6a9517cffc58d500c7e',
},
'playlist_count': 3,
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
search_url = urljoin(
self._SERVER_URL,
r'content/%s/%s/(?P<pid>\d+)' % (self._CUSTOMER_ID, self._AUTHOR_ID))
embed_regex = r'<iframe[^>]+src=["\']%s.+["\']' % search_url
entries = [
self.url_result(
'simplex:%s:%s:%s:%s' % (
self._SERVER_URL, self._CUSTOMER_ID,
self._AUTHOR_ID, m.group('pid')),
ie=SimplexIE.ie_key())
for m in re.finditer(embed_regex, webpage)]
title = strip_or_none(
remove_end(self._og_search_title(webpage), '- Telebasel'))
description = self._og_search_description(webpage)
return self.playlist_result(
entries,
playlist_id=display_id,
playlist_title=title,
playlist_description=description)