1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-01-23 13:32:58 +08:00

scientology.py Add new extractor

This commit is contained in:
Sam 2018-04-14 03:10:00 -04:00
parent 68ddba20ae
commit b0434adccc
2 changed files with 43 additions and 0 deletions

View File

@ -941,6 +941,7 @@ from .safari import (
from .sapo import SapoIE from .sapo import SapoIE
from .savefrom import SaveFromIE from .savefrom import SaveFromIE
from .sbs import SBSIE from .sbs import SBSIE
from .scientology import ScientologyIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE from .screencastomatic import ScreencastOMaticIE
from .scrippsnetworks import ScrippsNetworksWatchIE from .scrippsnetworks import ScrippsNetworksWatchIE

View File

@ -0,0 +1,42 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
import re
class ScientologyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?scientology\.tv/series/(?P<series_path>[^/?#]+)/(?P<id>[^/?#]+).html'
_TEST = {
'url': 'https://www.scientology.tv/series/l-ron-hubbard-in-his-own-voice/life-as-an-author.html',
'info_dict': {
'id': 'life-as-an-author',
'ext': 'm3u8',
'title': 'Life as an Author | L. Ron Hubbard: In his Own Voice',
'description': 'The author on his real life adventures that thrilled millions, to his discoveries behind Dianetics.'
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title').strip()
description = self._html_search_regex(r'<meta name="description" content="(.+?)" />', webpage, 'description').strip()
description = re.sub("[^a-zA-Z0-9.,_\s-]+", " ", description)
# changing address for extration url
extract_ext = re.search(r'<episode-video>(.*?)</episode-video>', webpage).group(0)
extract_ext = extract_ext.replace('<episode-video>', '').replace('</episode-video>', '')
url = url[:url.find('/', 10)] + extract_ext
return {
'id': video_id,
'title': title,
'description': description,
'url': url,
}