mirror of
https://github.com/l1ving/youtube-dl
synced 2025-03-13 11:27:30 +08:00
[telebasel] [simplex] Handle Telebasel articles in the generic
information extractor.
This commit is contained in:
parent
91d21e0a84
commit
36144cfe1b
@ -935,10 +935,7 @@ from .teamfourstar import TeamFourStarIE
|
|||||||
from .techtalks import TechTalksIE
|
from .techtalks import TechTalksIE
|
||||||
from .ted import TEDIE
|
from .ted import TEDIE
|
||||||
from .tele13 import Tele13IE
|
from .tele13 import Tele13IE
|
||||||
from .telebasel import (
|
from .telebasel import TelebaselIE
|
||||||
TelebaselMediathekIE,
|
|
||||||
TelebaselArticleIE,
|
|
||||||
)
|
|
||||||
from .telebruxelles import TeleBruxellesIE
|
from .telebruxelles import TeleBruxellesIE
|
||||||
from .telecinco import TelecincoIE
|
from .telecinco import TelecincoIE
|
||||||
from .telegraaf import TelegraafIE
|
from .telegraaf import TelegraafIE
|
||||||
|
@ -83,6 +83,7 @@ from .twentymin import TwentyMinutenIE
|
|||||||
from .ustream import UstreamIE
|
from .ustream import UstreamIE
|
||||||
from .openload import OpenloadIE
|
from .openload import OpenloadIE
|
||||||
from .videopress import VideoPressIE
|
from .videopress import VideoPressIE
|
||||||
|
from .simplex import SimplexIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -1499,10 +1500,19 @@ class GenericIE(InfoExtractor):
|
|||||||
'timestamp': 1435711927,
|
'timestamp': 1435711927,
|
||||||
'upload_date': '20150701',
|
'upload_date': '20150701',
|
||||||
},
|
},
|
||||||
|
'add_ie': [VideoPressIE.ie_key()],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# Simplex embed
|
||||||
|
'url': 'https://telebasel.ch/2017/02/01/report-usr-iii-einfach-erklaert/?channel=105100',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '?channel=105100',
|
||||||
|
'title': 'Report: USR III einfach erklärt - Telebasel',
|
||||||
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': [VideoPressIE.ie_key()],
|
'playlist_count': 3,
|
||||||
}
|
}
|
||||||
# {
|
# {
|
||||||
# # TODO: find another test
|
# # TODO: find another test
|
||||||
@ -2474,6 +2484,12 @@ class GenericIE(InfoExtractor):
|
|||||||
return _playlist_from_matches(
|
return _playlist_from_matches(
|
||||||
videopress_urls, ie=VideoPressIE.ie_key())
|
videopress_urls, ie=VideoPressIE.ie_key())
|
||||||
|
|
||||||
|
# Look for Simplex embeds
|
||||||
|
simplex_urls = SimplexIE._extract_urls(webpage)
|
||||||
|
if simplex_urls:
|
||||||
|
return _playlist_from_matches(
|
||||||
|
simplex_urls, ie=SimplexIE.ie_key())
|
||||||
|
|
||||||
# Looking for http://schema.org/VideoObject
|
# Looking for http://schema.org/VideoObject
|
||||||
json_ld = self._search_json_ld(
|
json_ld = self._search_json_ld(
|
||||||
webpage, video_id, default={}, expected_type='VideoObject')
|
webpage, video_id, default={}, expected_type='VideoObject')
|
||||||
|
@ -29,6 +29,13 @@ class SimplexIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return ['simplex:%s:%s:%s:%s' % (
|
||||||
|
m.group('server_url'), m.group('customer_id'),
|
||||||
|
m.group('author_id'), m.group('project_id'))
|
||||||
|
for m in re.finditer(r'<iframe[^>]+src=["\']%s.+["\']' % SimplexHostsIE._VALID_URL, webpage)]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_width_height(resolution):
|
def _extract_width_height(resolution):
|
||||||
try:
|
try:
|
||||||
|
@ -8,20 +8,12 @@ from .simplex import SimplexIE
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
strip_or_none,
|
|
||||||
remove_end,
|
|
||||||
try_get,
|
try_get,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TelebaselBaseIE(InfoExtractor):
|
class TelebaselIE(InfoExtractor):
|
||||||
_SERVER_URL = 'https://video.telebasel.ch/'
|
|
||||||
_CUSTOMER_ID = '4062'
|
|
||||||
_AUTHOR_ID = '4063'
|
|
||||||
|
|
||||||
|
|
||||||
class TelebaselMediathekIE(TelebaselBaseIE):
|
|
||||||
IE_DESC = 'telebasel.ch Mediathek'
|
IE_DESC = 'telebasel.ch Mediathek'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
@ -34,6 +26,9 @@ class TelebaselMediathekIE(TelebaselBaseIE):
|
|||||||
/.*pid=(?P<pid>\d+).*
|
/.*pid=(?P<pid>\d+).*
|
||||||
)?
|
)?
|
||||||
'''
|
'''
|
||||||
|
_SERVER_URL = 'https://video.telebasel.ch/'
|
||||||
|
_CUSTOMER_ID = '4062'
|
||||||
|
_AUTHOR_ID = '4063'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://telebasel.ch/telebasel-gastro-tipp/?aid=4063&pid=75290&channel=15881',
|
'url': 'https://telebasel.ch/telebasel-gastro-tipp/?aid=4063&pid=75290&channel=15881',
|
||||||
@ -82,44 +77,3 @@ class TelebaselMediathekIE(TelebaselBaseIE):
|
|||||||
self._SERVER_URL, self._CUSTOMER_ID,
|
self._SERVER_URL, self._CUSTOMER_ID,
|
||||||
self._AUTHOR_ID, video_id),
|
self._AUTHOR_ID, video_id),
|
||||||
ie=SimplexIE.ie_key())
|
ie=SimplexIE.ie_key())
|
||||||
|
|
||||||
|
|
||||||
class TelebaselArticleIE(TelebaselBaseIE):
|
|
||||||
IE_DESC = 'telebasel.ch articles'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?telebasel\.ch/(?P<id>\d{4}/\d{2}/\d{2}/[^/]+)/?'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://telebasel.ch/2017/02/01/report-usr-iii-einfach-erklaert/?channel=105100',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2017/02/01/report-usr-iii-einfach-erklaert',
|
|
||||||
'title': 'Report: USR III einfach erklärt',
|
|
||||||
'description': 'md5:2cb2b94ac023a6a9517cffc58d500c7e',
|
|
||||||
},
|
|
||||||
'playlist_count': 3,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
search_url = urljoin(
|
|
||||||
self._SERVER_URL,
|
|
||||||
r'content/%s/%s/(?P<pid>\d+)' % (self._CUSTOMER_ID, self._AUTHOR_ID))
|
|
||||||
embed_regex = r'<iframe[^>]+src=["\']%s.+["\']' % search_url
|
|
||||||
entries = [
|
|
||||||
self.url_result(
|
|
||||||
'simplex:%s:%s:%s:%s' % (
|
|
||||||
self._SERVER_URL, self._CUSTOMER_ID,
|
|
||||||
self._AUTHOR_ID, m.group('pid')),
|
|
||||||
ie=SimplexIE.ie_key())
|
|
||||||
for m in re.finditer(embed_regex, webpage)]
|
|
||||||
|
|
||||||
title = strip_or_none(
|
|
||||||
remove_end(self._og_search_title(webpage), '- Telebasel'))
|
|
||||||
description = self._og_search_description(webpage)
|
|
||||||
|
|
||||||
return self.playlist_result(
|
|
||||||
entries,
|
|
||||||
playlist_id=display_id,
|
|
||||||
playlist_title=title,
|
|
||||||
playlist_description=description)
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user