1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-13 11:27:30 +08:00

[telebasel] [simplex] Handle Telebasel articles in the generic

information extractor.
This commit is contained in:
Alex Seiler 2017-02-06 18:13:05 +01:00
parent 91d21e0a84
commit 36144cfe1b
4 changed files with 29 additions and 55 deletions

View File

@ -935,10 +935,7 @@ from .teamfourstar import TeamFourStarIE
from .techtalks import TechTalksIE from .techtalks import TechTalksIE
from .ted import TEDIE from .ted import TEDIE
from .tele13 import Tele13IE from .tele13 import Tele13IE
from .telebasel import ( from .telebasel import TelebaselIE
TelebaselMediathekIE,
TelebaselArticleIE,
)
from .telebruxelles import TeleBruxellesIE from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE from .telegraaf import TelegraafIE

View File

@ -83,6 +83,7 @@ from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE from .ustream import UstreamIE
from .openload import OpenloadIE from .openload import OpenloadIE
from .videopress import VideoPressIE from .videopress import VideoPressIE
from .simplex import SimplexIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1499,10 +1500,19 @@ class GenericIE(InfoExtractor):
'timestamp': 1435711927, 'timestamp': 1435711927,
'upload_date': '20150701', 'upload_date': '20150701',
}, },
'add_ie': [VideoPressIE.ie_key()],
},
{
# Simplex embed
'url': 'https://telebasel.ch/2017/02/01/report-usr-iii-einfach-erklaert/?channel=105100',
'info_dict': {
'id': '?channel=105100',
'title': 'Report: USR III einfach erklärt - Telebasel',
},
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': [VideoPressIE.ie_key()], 'playlist_count': 3,
} }
# { # {
# # TODO: find another test # # TODO: find another test
@ -2474,6 +2484,12 @@ class GenericIE(InfoExtractor):
return _playlist_from_matches( return _playlist_from_matches(
videopress_urls, ie=VideoPressIE.ie_key()) videopress_urls, ie=VideoPressIE.ie_key())
# Look for Simplex embeds
simplex_urls = SimplexIE._extract_urls(webpage)
if simplex_urls:
return _playlist_from_matches(
simplex_urls, ie=SimplexIE.ie_key())
# Looking for http://schema.org/VideoObject # Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld( json_ld = self._search_json_ld(
webpage, video_id, default={}, expected_type='VideoObject') webpage, video_id, default={}, expected_type='VideoObject')

View File

@ -29,6 +29,13 @@ class SimplexIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
} }
@staticmethod
def _extract_urls(webpage):
return ['simplex:%s:%s:%s:%s' % (
m.group('server_url'), m.group('customer_id'),
m.group('author_id'), m.group('project_id'))
for m in re.finditer(r'<iframe[^>]+src=["\']%s.+["\']' % SimplexHostsIE._VALID_URL, webpage)]
@staticmethod @staticmethod
def _extract_width_height(resolution): def _extract_width_height(resolution):
try: try:

View File

@ -8,20 +8,12 @@ from .simplex import SimplexIE
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
str_or_none, str_or_none,
strip_or_none,
remove_end,
try_get, try_get,
urljoin, urljoin,
) )
class TelebaselBaseIE(InfoExtractor): class TelebaselIE(InfoExtractor):
_SERVER_URL = 'https://video.telebasel.ch/'
_CUSTOMER_ID = '4062'
_AUTHOR_ID = '4063'
class TelebaselMediathekIE(TelebaselBaseIE):
IE_DESC = 'telebasel.ch Mediathek' IE_DESC = 'telebasel.ch Mediathek'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
@ -34,6 +26,9 @@ class TelebaselMediathekIE(TelebaselBaseIE):
/.*pid=(?P<pid>\d+).* /.*pid=(?P<pid>\d+).*
)? )?
''' '''
_SERVER_URL = 'https://video.telebasel.ch/'
_CUSTOMER_ID = '4062'
_AUTHOR_ID = '4063'
_TESTS = [{ _TESTS = [{
'url': 'https://telebasel.ch/telebasel-gastro-tipp/?aid=4063&pid=75290&channel=15881', 'url': 'https://telebasel.ch/telebasel-gastro-tipp/?aid=4063&pid=75290&channel=15881',
@ -82,44 +77,3 @@ class TelebaselMediathekIE(TelebaselBaseIE):
self._SERVER_URL, self._CUSTOMER_ID, self._SERVER_URL, self._CUSTOMER_ID,
self._AUTHOR_ID, video_id), self._AUTHOR_ID, video_id),
ie=SimplexIE.ie_key()) ie=SimplexIE.ie_key())
class TelebaselArticleIE(TelebaselBaseIE):
IE_DESC = 'telebasel.ch articles'
_VALID_URL = r'https?://(?:www\.)?telebasel\.ch/(?P<id>\d{4}/\d{2}/\d{2}/[^/]+)/?'
_TEST = {
'url': 'https://telebasel.ch/2017/02/01/report-usr-iii-einfach-erklaert/?channel=105100',
'info_dict': {
'id': '2017/02/01/report-usr-iii-einfach-erklaert',
'title': 'Report: USR III einfach erklärt',
'description': 'md5:2cb2b94ac023a6a9517cffc58d500c7e',
},
'playlist_count': 3,
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
search_url = urljoin(
self._SERVER_URL,
r'content/%s/%s/(?P<pid>\d+)' % (self._CUSTOMER_ID, self._AUTHOR_ID))
embed_regex = r'<iframe[^>]+src=["\']%s.+["\']' % search_url
entries = [
self.url_result(
'simplex:%s:%s:%s:%s' % (
self._SERVER_URL, self._CUSTOMER_ID,
self._AUTHOR_ID, m.group('pid')),
ie=SimplexIE.ie_key())
for m in re.finditer(embed_regex, webpage)]
title = strip_or_none(
remove_end(self._og_search_title(webpage), '- Telebasel'))
description = self._og_search_description(webpage)
return self.playlist_result(
entries,
playlist_id=display_id,
playlist_title=title,
playlist_description=description)