1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-02-13 21:42:50 +08:00

Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Ryan Hayward 2018-05-25 14:20:13 -05:00
commit 5c84fbb3d4
14 changed files with 576 additions and 179 deletions

View File

@ -519,6 +519,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_age_limit('PG-13'), 13)
self.assertEqual(parse_age_limit('TV-14'), 14)
self.assertEqual(parse_age_limit('TV-MA'), 17)
self.assertEqual(parse_age_limit('TV14'), 14)
self.assertEqual(parse_age_limit('TV_G'), 0)
def test_parse_duration(self):
self.assertEqual(parse_duration(None), None)

View File

@ -91,17 +91,6 @@ class DVTVIE(InfoExtractor):
}, {
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
'only_matching': True,
}, {
'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/',
'md5': '87defe16681b1429c91f7a74809823c6',
'info_dict': {
'id': 'f5ae72f6fad611e794dbac1f6b220ee8',
'ext': 'mp4',
'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová',
},
'params': {
'skip_download': True,
},
}]
def _parse_video_metadata(self, js, video_id, live_js=None):

View File

@ -469,10 +469,7 @@ from .imgur import (
)
from .ina import InaIE
from .inc import IncIE
from .indavideo import (
IndavideoIE,
IndavideoEmbedIE,
)
from .indavideo import IndavideoEmbedIE
from .infoq import InfoQIE
from .instagram import InstagramIE, InstagramUserIE
from .internazionale import InternazionaleIE
@ -666,6 +663,7 @@ from .nbc import (
NBCOlympicsIE,
NBCOlympicsStreamIE,
NBCSportsIE,
NBCSportsStreamIE,
NBCSportsVPlayerIE,
)
from .ndr import (
@ -810,6 +808,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
from .pearvideo import PearVideoIE
from .peertube import PeerTubeIE
from .people import PeopleIE
from .performgroup import PerformGroupIE
from .periscope import (

View File

@ -108,6 +108,8 @@ from .yapfiles import YapFilesIE
from .vice import ViceIE
from .xfileshare import XFileShareIE
from .cloudflarestream import CloudflareStreamIE
from .peertube import PeerTubeIE
from .indavideo import IndavideoEmbedIE
class GenericIE(InfoExtractor):
@ -2012,6 +2014,33 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
{
# PeerTube embed
'url': 'https://joinpeertube.org/fr/home/',
'info_dict': {
'id': 'home',
'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
},
'playlist_count': 2,
},
{
# Indavideo embed
'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
'info_dict': {
'id': '1693903',
'ext': 'mp4',
'title': 'Így kell otthon hamburgert sütni',
'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
'timestamp': 1426330212,
'upload_date': '20150314',
'uploader': 'StreetKitchen',
'uploader_id': '546363',
},
'add_ie': [IndavideoEmbedIE.ie_key()],
'params': {
'skip_download': True,
},
},
{
'url': 'http://share-videos.se/auto/video/83645793?uid=13',
'md5': 'b68d276de422ab07ee1d49388103f457',
@ -3029,6 +3058,16 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
peertube_urls = PeerTubeIE._extract_urls(webpage)
if peertube_urls:
return self.playlist_from_matches(
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
if indavideo_urls:
return self.playlist_from_matches(
indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
webpage)]

View File

@ -126,16 +126,16 @@ class GloboIE(InfoExtractor):
continue
hash_code = security_hash[:2]
received_time = int(security_hash[2:12])
received_time = security_hash[2:12]
received_random = security_hash[12:22]
received_md5 = security_hash[22:]
sign_time = received_time + 86400
sign_time = compat_str(int(received_time) + 86400)
padding = '%010d' % random.randint(1, 10000000000)
md5_data = (received_md5 + str(sign_time) + padding + '0xFF01DD').encode()
md5_data = (received_md5 + sign_time + padding + '0xFF01DD').encode()
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5
signed_hash = hash_code + received_time + received_random + sign_time + padding + signed_md5
signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):

View File

@ -6,7 +6,9 @@ import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
parse_age_limit,
parse_iso8601,
)
@ -23,6 +25,7 @@ class Go90IE(InfoExtractor):
'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
'timestamp': 1491868800,
'upload_date': '20170411',
'age_limit': 14,
}
}
@ -33,6 +36,8 @@ class Go90IE(InfoExtractor):
video_id, headers={
'Content-Type': 'application/json; charset=utf-8',
}, data=b'{"client":"web","device_type":"pc"}')
if video_data.get('requires_drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
main_video_asset = video_data['main_video_asset']
episode_number = int_or_none(video_data.get('episode_number'))
@ -123,4 +128,5 @@ class Go90IE(InfoExtractor):
'season_number': season_number,
'episode_number': episode_number,
'subtitles': subtitles,
'age_limit': parse_age_limit(video_data.get('rating')),
}

View File

@ -17,6 +17,9 @@ class HiDiveIE(InfoExtractor):
# Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
# so disabling geo bypass completely
_GEO_BYPASS = False
_NETRC_MACHINE = 'hidive'
_LOGGED_IN = False
_LOGIN_URL = 'https://www.hidive.com/account/login'
_TESTS = [{
'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
@ -31,8 +34,30 @@ class HiDiveIE(InfoExtractor):
'params': {
'skip_download': True,
},
'skip': 'Requires Authentication',
}]
def _real_initialize(self):
if self._LOGGED_IN:
return
(email, password) = self._get_login_info()
if email is None:
return
webpage = self._download_webpage(self._LOGIN_URL, None)
form = self._search_regex(
r'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>',
webpage, 'login form')
data = self._hidden_inputs(form)
data.update({
'Email': email,
'Password': password,
})
self._download_webpage(
self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data))
self._LOGGED_IN = True
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
title, key = mobj.group('title', 'key')
@ -43,6 +68,7 @@ class HiDiveIE(InfoExtractor):
data=urlencode_postdata({
'Title': title,
'Key': key,
'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783',
}))
restriction = settings.get('restrictionReason')
@ -79,6 +105,7 @@ class HiDiveIE(InfoExtractor):
subtitles.setdefault(cc_lang, []).append({
'url': cc_url,
})
self._sort_formats(formats)
season_number = int_or_none(self._search_regex(
r's(\d+)', key, 'season number', default=None))

View File

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
@ -21,7 +20,7 @@ class ImgurIE(InfoExtractor):
'id': 'A61SaA1',
'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
'description': 'Imgur: The most awesome images on the Internet.',
'description': 'Imgur: The magic of the Internet',
},
}, {
'url': 'https://imgur.com/A61SaA1',
@ -29,7 +28,7 @@ class ImgurIE(InfoExtractor):
'id': 'A61SaA1',
'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
'description': 'Imgur: The most awesome images on the Internet.',
'description': 'Imgur: The magic of the Internet',
},
}, {
'url': 'https://imgur.com/gallery/YcAQlkx',
@ -37,8 +36,6 @@ class ImgurIE(InfoExtractor):
'id': 'YcAQlkx',
'ext': 'mp4',
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
'description': 'Imgur: The most awesome images on the Internet.'
}
}, {
'url': 'http://imgur.com/topic/Funny/N8rOudd',
@ -50,8 +47,8 @@ class ImgurIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
compat_urlparse.urljoin(url, video_id), video_id)
gifv_url = 'https://i.imgur.com/{id}.gifv'.format(id=video_id)
webpage = self._download_webpage(gifv_url, video_id)
width = int_or_none(self._og_search_property(
'video:width', webpage, default=None))
@ -107,7 +104,7 @@ class ImgurIE(InfoExtractor):
return {
'id': video_id,
'formats': formats,
'description': self._og_search_description(webpage),
'description': self._og_search_description(webpage, default=None),
'title': self._og_search_title(webpage),
}

View File

@ -1,11 +1,15 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
parse_age_limit,
parse_iso8601,
update_url_query,
)
@ -13,7 +17,7 @@ class IndavideoEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
_TESTS = [{
'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
'md5': 'f79b009c66194acacd40712a6778acfa',
'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
'info_dict': {
'id': '1837039',
'ext': 'mp4',
@ -36,6 +40,20 @@ class IndavideoEmbedIE(InfoExtractor):
'only_matching': True,
}]
# Some example URLs covered by generic extractor:
# http://indavideo.hu/video/Vicces_cica_1
# http://index.indavideo.hu/video/2015_0728_beregszasz
# http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
# http://erotika.indavideo.hu/video/Amator_tini_punci
# http://film.indavideo.hu/video/f_hrom_nagymamm_volt
# http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)',
webpage)
def _real_extract(self, url):
video_id = self._match_id(url)
@ -45,7 +63,14 @@ class IndavideoEmbedIE(InfoExtractor):
title = video['title']
video_urls = video.get('video_files', [])
video_urls = []
video_files = video.get('video_files')
if isinstance(video_files, list):
video_urls.extend(video_files)
elif isinstance(video_files, dict):
video_urls.extend(video_files.values())
video_file = video.get('video_file')
if video:
video_urls.append(video_file)
@ -58,11 +83,23 @@ class IndavideoEmbedIE(InfoExtractor):
if flv_url not in video_urls:
video_urls.append(flv_url)
formats = [{
'url': video_url,
'height': int_or_none(self._search_regex(
r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)),
} for video_url in video_urls]
filesh = video.get('filesh')
formats = []
for video_url in video_urls:
height = int_or_none(self._search_regex(
r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
if filesh:
if not height:
continue
token = filesh.get(compat_str(height))
if token is None:
continue
video_url = update_url_query(video_url, {'token': token})
formats.append({
'url': video_url,
'height': height,
})
self._sort_formats(formats)
timestamp = video.get('date')
@ -89,55 +126,3 @@ class IndavideoEmbedIE(InfoExtractor):
'tags': tags,
'formats': formats,
}
class IndavideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?indavideo\.hu/video/(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'http://indavideo.hu/video/Vicces_cica_1',
'md5': '8c82244ba85d2a2310275b318eb51eac',
'info_dict': {
'id': '1335611',
'display_id': 'Vicces_cica_1',
'ext': 'mp4',
'title': 'Vicces cica',
'description': 'Játszik a tablettel. :D',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Jet_Pack',
'uploader_id': '491217',
'timestamp': 1390821212,
'upload_date': '20140127',
'duration': 7,
'age_limit': 0,
'tags': ['vicces', 'macska', 'cica', 'ügyes', 'nevetés', 'játszik', 'Cukiság', 'Jet_Pack'],
},
}, {
'url': 'http://index.indavideo.hu/video/2015_0728_beregszasz',
'only_matching': True,
}, {
'url': 'http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko',
'only_matching': True,
}, {
'url': 'http://erotika.indavideo.hu/video/Amator_tini_punci',
'only_matching': True,
}, {
'url': 'http://film.indavideo.hu/video/f_hrom_nagymamm_volt',
'only_matching': True,
}, {
'url': 'http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
embed_url = self._search_regex(
r'<link[^>]+rel="video_src"[^>]+href="(.+?)"', webpage, 'embed url')
return {
'_type': 'url_transparent',
'ie_key': 'IndavideoEmbed',
'url': embed_url,
'display_id': display_id,
}

View File

@ -1,10 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import (
determine_ext,
float_or_none,
@ -57,12 +58,33 @@ class IzleseneIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
url = 'http://www.izlesene.com/video/%s' % video_id
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage('http://www.izlesene.com/video/%s' % video_id, video_id)
video = self._parse_json(
self._search_regex(
r'videoObj\s*=\s*({.+?})\s*;\s*\n', webpage, 'streams'),
video_id)
title = video.get('videoTitle') or self._og_search_title(webpage)
formats = []
for stream in video['media']['level']:
source_url = stream.get('source')
if not source_url or not isinstance(source_url, compat_str):
continue
ext = determine_ext(url, 'mp4')
quality = stream.get('value')
height = int_or_none(quality)
formats.append({
'format_id': '%sp' % quality if quality else 'sd',
'url': compat_urllib_parse_unquote(source_url),
'ext': ext,
'height': height,
})
self._sort_formats(formats)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage, default=None)
thumbnail = self._proto_relative_url(
thumbnail = video.get('posterURL') or self._proto_relative_url(
self._og_search_thumbnail(webpage), scheme='http:')
uploader = self._html_search_regex(
@ -71,41 +93,15 @@ class IzleseneIE(InfoExtractor):
timestamp = parse_iso8601(self._html_search_meta(
'uploadDate', webpage, 'upload date'))
duration = float_or_none(self._html_search_regex(
r'"videoduration"\s*:\s*"([^"]+)"',
webpage, 'duration', fatal=False), scale=1000)
duration = float_or_none(video.get('duration') or self._html_search_regex(
r'videoduration["\']?\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
webpage, 'duration', fatal=False, group='value'), scale=1000)
view_count = str_to_int(get_element_by_id('videoViewCount', webpage))
comment_count = self._html_search_regex(
r'comment_count\s*=\s*\'([^\']+)\';',
webpage, 'comment_count', fatal=False)
content_url = self._html_search_meta(
'contentURL', webpage, 'content URL', fatal=False)
ext = determine_ext(content_url, 'mp4')
# Might be empty for some videos.
streams = self._html_search_regex(
r'"qualitylevel"\s*:\s*"([^"]+)"', webpage, 'streams', default='')
formats = []
if streams:
for stream in streams.split('|'):
quality, url = re.search(r'\[(\w+)\](.+)', stream).groups()
formats.append({
'format_id': '%sp' % quality if quality else 'sd',
'url': compat_urllib_parse_unquote(url),
'ext': ext,
})
else:
stream_url = self._search_regex(
r'"streamurl"\s*:\s*"([^"]+)"', webpage, 'stream URL')
formats.append({
'format_id': 'sd',
'url': compat_urllib_parse_unquote(stream_url),
'ext': ext,
})
return {
'id': video_id,
'title': title,

View File

@ -1,7 +1,8 @@
from __future__ import unicode_literals
import re
import base64
import json
import re
from .common import InfoExtractor
from .theplatform import ThePlatformIE
@ -175,6 +176,65 @@ class NBCSportsIE(InfoExtractor):
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
class NBCSportsStreamIE(AdobePassIE):
_VALID_URL = r'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
_TEST = {
'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
'info_dict': {
'id': '206559',
'ext': 'mp4',
'title': 'Amgen Tour of California Women\'s Recap',
'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Requires Adobe Pass Authentication',
}
def _real_extract(self, url):
video_id = self._match_id(url)
live_source = self._download_json(
'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id,
video_id)
video_source = live_source['videoSources'][0]
title = video_source['title']
source_url = None
for k in ('source', 'msl4source', 'iossource', 'hlsv4'):
sk = k + 'Url'
source_url = video_source.get(sk) or video_source.get(sk + 'Alt')
if source_url:
break
else:
source_url = video_source['ottStreamUrl']
is_live = video_source.get('type') == 'live' or video_source.get('status') == 'Live'
resource = self._get_mvpd_resource('nbcsports', title, video_id, '')
token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource)
tokenized_url = self._download_json(
'https://token.playmakerservices.com/cdn',
video_id, data=json.dumps({
'requestorId': 'nbcsports',
'pid': video_id,
'application': 'NBCSports',
'version': 'v1',
'platform': 'desktop',
'cdn': 'akamai',
'url': video_source['sourceUrl'],
'token': base64.b64encode(token.encode()).decode(),
'resourceId': base64.b64encode(resource.encode()).decode(),
}).encode())['tokenizedUrl']
formats = self._extract_m3u8_formats(tokenized_url, video_id, 'mp4')
self._sort_formats(formats)
return {
'id': video_id,
'title': self._live_title(title) if is_live else title,
'description': live_source.get('description'),
'formats': formats,
'is_live': is_live,
}
class CSNNEIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'

View File

@ -0,0 +1,228 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
parse_resolution,
try_get,
unified_timestamp,
urljoin,
)
class PeerTubeIE(InfoExtractor):
_INSTANCES_RE = r'''(?:
# Taken from https://instances.joinpeertube.org/instances
tube\.openalgeria\.org|
peertube\.pointsecu\.fr|
peertube\.nogafa\.org|
peertube\.pl|
megatube\.lilomoino\.fr|
peertube\.tamanoir\.foucry\.net|
peertube\.inapurna\.org|
peertube\.netzspielplatz\.de|
video\.deadsuperhero\.com|
peertube\.devosi\.org|
peertube\.1312\.media|
tube\.worldofhauru\.xyz|
tube\.bootlicker\.party|
skeptikon\.fr|
peertube\.geekshell\.fr|
tube\.opportunis\.me|
peertube\.peshane\.net|
video\.blueline\.mg|
tube\.homecomputing\.fr|
videos\.cloudfrancois\.fr|
peertube\.viviers-fibre\.net|
tube\.ouahpiti\.info|
video\.tedomum\.net|
video\.g3l\.org|
fontube\.fr|
peertube\.gaialabs\.ch|
peertube\.extremely\.online|
peertube\.public-infrastructure\.eu|
tube\.kher\.nl|
peertube\.qtg\.fr|
tube\.22decembre\.eu|
facegirl\.me|
video\.migennes\.net|
janny\.moe|
tube\.p2p\.legal|
video\.atlanti\.se|
troll\.tv|
peertube\.geekael\.fr|
vid\.leotindall\.com|
video\.anormallostpod\.ovh|
p-tube\.h3z\.jp|
tube\.darfweb\.eu|
videos\.iut-orsay\.fr|
peertube\.solidev\.net|
videos\.symphonie-of-code\.fr|
testtube\.ortg\.de|
videos\.cemea\.org|
peertube\.gwendalavir\.eu|
video\.passageenseine\.fr|
videos\.festivalparminous\.org|
peertube\.touhoppai\.moe|
peertube\.duckdns\.org|
sikke\.fi|
peertube\.mastodon\.host|
firedragonvideos\.com|
vidz\.dou\.bet|
peertube\.koehn\.com|
peer\.hostux\.social|
share\.tube|
peertube\.walkingmountains\.fr|
medias\.libox\.fr|
peertube\.moe|
peertube\.xyz|
jp\.peertube\.network|
videos\.benpro\.fr|
tube\.otter\.sh|
peertube\.angristan\.xyz|
peertube\.parleur\.net|
peer\.ecutsa\.fr|
peertube\.heraut\.eu|
peertube\.tifox\.fr|
peertube\.maly\.io|
vod\.mochi\.academy|
exode\.me|
coste\.video|
tube\.aquilenet\.fr|
peertube\.gegeweb\.eu|
framatube\.org|
thinkerview\.video|
tube\.conferences-gesticulees\.net|
peertube\.datagueule\.tv|
video\.lqdn\.fr|
meilleurtube\.delire\.party|
tube\.mochi\.academy|
peertube\.dav\.li|
media\.zat\.im|
pytu\.be|
peertube\.valvin\.fr|
peertube\.nsa\.ovh|
video\.colibris-outilslibres\.org|
video\.hispagatos\.org|
tube\.svnet\.fr|
peertube\.video|
videos\.lecygnenoir\.info|
peertube3\.cpy\.re|
peertube2\.cpy\.re|
videos\.tcit\.fr|
peertube\.cpy\.re
)'''
_VALID_URL = r'''(?x)
https?://
%s
/(?:videos/(?:watch|embed)|api/v\d/videos)/
(?P<id>[^/?\#&]+)
''' % _INSTANCES_RE
_TESTS = [{
'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
'md5': '80f24ff364cc9d333529506a263e7feb',
'info_dict': {
'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
'ext': 'mp4',
'title': 'wow',
'description': 'wow such video, so gif',
'thumbnail': r're:https?://.*\.(?:jpg|png)',
'timestamp': 1519297480,
'upload_date': '20180222',
'uploader': 'Luclu7',
'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
'license': 'Unknown',
'duration': 3,
'view_count': int,
'like_count': int,
'dislike_count': int,
'tags': list,
'categories': list,
}
}, {
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
'only_matching': True,
}, {
# nsfw
'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
'only_matching': True,
}, {
'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
'only_matching': True,
}, {
'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'''(?x)<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s/videos/embed/[^/?\#&]+)\1'''
% PeerTubeIE._INSTANCES_RE, webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
urljoin(url, '/api/v1/videos/%s' % video_id), video_id)
title = video['name']
formats = []
for file_ in video['files']:
if not isinstance(file_, dict):
continue
file_url = file_.get('fileUrl')
if not file_url or not isinstance(file_url, compat_str):
continue
file_size = int_or_none(file_.get('size'))
format_id = try_get(
file_, lambda x: x['resolution']['label'], compat_str)
f = parse_resolution(format_id)
f.update({
'url': file_url,
'format_id': format_id,
'filesize': file_size,
})
formats.append(f)
self._sort_formats(formats)
def account_data(field):
return try_get(video, lambda x: x['account'][field], compat_str)
category = try_get(video, lambda x: x['category']['label'], compat_str)
categories = [category] if category else None
nsfw = video.get('nsfw')
if nsfw is bool:
age_limit = 18 if nsfw else 0
else:
age_limit = None
return {
'id': video_id,
'title': title,
'description': video.get('description'),
'thumbnail': urljoin(url, video.get('thumbnailPath')),
'timestamp': unified_timestamp(video.get('publishedAt')),
'uploader': account_data('displayName'),
'uploader_id': account_data('uuid'),
'uploder_url': account_data('url'),
'license': try_get(
video, lambda x: x['licence']['label'], compat_str),
'duration': int_or_none(video.get('duration')),
'view_count': int_or_none(video.get('views')),
'like_count': int_or_none(video.get('likes')),
'dislike_count': int_or_none(video.get('dislikes')),
'age_limit': age_limit,
'tags': try_get(video, lambda x: x['tags'], list),
'categories': categories,
'formats': formats,
}

View File

@ -1,24 +1,27 @@
from __future__ import unicode_literals
import base64
import re
from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
clean_html,
determine_ext,
int_or_none,
js_to_json,
parse_age_limit,
parse_duration,
)
class ViewLiftBaseIE(InfoExtractor):
_DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv'
_DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv'
class ViewLiftEmbedIE(ViewLiftBaseIE):
_VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' % ViewLiftBaseIE._DOMAINS_REGEX
_VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{
'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
'md5': '2924e9215c6eff7a55ed35b72276bd93',
@ -60,8 +63,10 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
formats = []
has_bitrate = False
for source in self._parse_json(js_to_json(self._search_regex(
r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
sources = self._parse_json(self._search_regex(
r'(?s)sources:\s*(\[.+?\]),', webpage,
'sources', default='[]'), video_id, js_to_json)
for source in sources:
file_ = source.get('file')
if not file_:
continue
@ -70,7 +75,8 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
format_id = source.get('label') or ext
if all(v in ('m3u8', 'hls') for v in (type_, ext)):
formats.extend(self._extract_m3u8_formats(
file_, video_id, 'mp4', m3u8_id='hls'))
file_, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
bitrate = int_or_none(self._search_regex(
[r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
@ -85,6 +91,13 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
'tbr': bitrate,
'height': height,
})
if not formats:
hls_url = self._parse_json(self._search_regex(
r'filmInfo\.src\s*=\s*({.+?});',
webpage, 'src'), video_id, js_to_json)['src']
formats = self._extract_m3u8_formats(
hls_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
self._sort_formats(formats, field_preference)
@ -109,10 +122,13 @@ class ViewLiftIE(ViewLiftBaseIE):
'display_id': 'lost_for_life',
'ext': 'mp4',
'title': 'Lost for Life',
'description': 'md5:fbdacc8bb6b455e464aaf98bc02e1c82',
'description': 'md5:ea10b5a50405ae1f7b5269a6ec594102',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 4489,
'categories': ['Documentary', 'Crime', 'Award Winning', 'Festivals']
'categories': 'mincount:3',
'age_limit': 14,
'upload_date': '20150421',
'timestamp': 1429656819,
}
}, {
'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
@ -125,7 +141,9 @@ class ViewLiftIE(ViewLiftBaseIE):
'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 979,
'categories': ['Documentary', 'Sports', 'Politics']
'categories': 'mincount:2',
'timestamp': 1399478279,
'upload_date': '20140507',
}
}, {
# Film is not playable in your area.
@ -138,9 +156,6 @@ class ViewLiftIE(ViewLiftBaseIE):
}, {
'url': 'http://www.winnersview.com/videos/the-good-son',
'only_matching': True,
}, {
'url': 'http://www.kesari.tv/news/video/1461919076414',
'only_matching': True,
}, {
# Was once Kaltura embed
'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
@ -156,45 +171,96 @@ class ViewLiftIE(ViewLiftBaseIE):
raise ExtractorError(
'Film %s is not available.' % display_id, expected=True)
film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
initial_store_state = self._search_regex(
r"window\.initialStoreState\s*=.*?JSON\.parse\(unescape\(atob\('([^']+)'\)\)\)",
webpage, 'Initial Store State', default=None)
if initial_store_state:
modules = self._parse_json(compat_urllib_parse_unquote(base64.b64decode(
initial_store_state).decode()), display_id)['page']['data']['modules']
content_data = next(m['contentData'][0] for m in modules if m.get('moduleType') == 'VideoDetailModule')
gist = content_data['gist']
film_id = gist['id']
title = gist['title']
video_assets = content_data['streamingInfo']['videoAssets']
snag = self._parse_json(
self._search_regex(
r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'),
display_id)
formats = []
mpeg_video_assets = video_assets.get('mpeg') or []
for video_asset in mpeg_video_assets:
video_asset_url = video_asset.get('url')
if not video_asset:
continue
bitrate = int_or_none(video_asset.get('bitrate'))
height = int_or_none(self._search_regex(
r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
'height', default=None))
formats.append({
'url': video_asset_url,
'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
'tbr': bitrate,
'height': height,
'vcodec': video_asset.get('codec'),
})
for item in snag:
if item.get('data', {}).get('film', {}).get('id') == film_id:
data = item['data']['film']
title = data['title']
description = clean_html(data.get('synopsis'))
thumbnail = data.get('image')
duration = int_or_none(data.get('duration') or data.get('runtime'))
categories = [
category['title'] for category in data.get('categories', [])
if category.get('title')]
break
hls_url = video_assets.get('hls')
if hls_url:
formats.extend(self._extract_m3u8_formats(
hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
self._sort_formats(formats, ('height', 'tbr', 'format_id'))
info = {
'id': film_id,
'display_id': display_id,
'title': title,
'description': gist.get('description'),
'thumbnail': gist.get('videoImageUrl'),
'duration': int_or_none(gist.get('runtime')),
'age_limit': parse_age_limit(content_data.get('parentalRating')),
'timestamp': int_or_none(gist.get('publishDate'), 1000),
'formats': formats,
}
for k in ('categories', 'tags'):
info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
return info
else:
title = self._search_regex(
r'itemprop="title">([^<]+)<', webpage, 'title')
description = self._html_search_regex(
r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
webpage, 'description', default=None) or self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
duration = parse_duration(self._search_regex(
r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
webpage, 'duration', fatal=False))
categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
return {
'_type': 'url_transparent',
'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
'id': film_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'categories': categories,
'ie_key': 'ViewLiftEmbed',
}
snag = self._parse_json(
self._search_regex(
r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag', default='[]'),
display_id)
for item in snag:
if item.get('data', {}).get('film', {}).get('id') == film_id:
data = item['data']['film']
title = data['title']
description = clean_html(data.get('synopsis'))
thumbnail = data.get('image')
duration = int_or_none(data.get('duration') or data.get('runtime'))
categories = [
category['title'] for category in data.get('categories', [])
if category.get('title')]
break
else:
title = self._search_regex(
r'itemprop="title">([^<]+)<', webpage, 'title')
description = self._html_search_regex(
r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
webpage, 'description', default=None) or self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
duration = parse_duration(self._search_regex(
r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
webpage, 'duration', fatal=False))
categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
return {
'_type': 'url_transparent',
'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
'id': film_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'categories': categories,
'ie_key': 'ViewLiftEmbed',
}

View File

@ -2253,12 +2253,12 @@ US_RATINGS = {
TV_PARENTAL_GUIDELINES = {
'TV-Y': 0,
'TV-Y7': 7,
'TV-G': 0,
'TV-PG': 0,
'TV-14': 14,
'TV-MA': 17,
'Y': 0,
'Y7': 7,
'G': 0,
'PG': 0,
'14': 14,
'MA': 17,
}
@ -2272,7 +2272,10 @@ def parse_age_limit(s):
return int(m.group('age'))
if s in US_RATINGS:
return US_RATINGS[s]
return TV_PARENTAL_GUIDELINES.get(s)
m = re.match(r'^TV[_-]?(%s)$' % '|'.join(TV_PARENTAL_GUIDELINES.keys()), s)
if m:
return TV_PARENTAL_GUIDELINES[m.group(1)]
return None
def strip_jsonp(code):