mirror of
https://github.com/l1ving/youtube-dl
synced 2025-03-10 06:17:16 +08:00
Merge branch 'ytdl-org-master'
This commit is contained in:
commit
f1604aa805
@ -2340,6 +2340,8 @@ class InfoExtractor(object):
|
||||
if res is False:
|
||||
return []
|
||||
ism_doc, urlh = res
|
||||
if ism_doc is None:
|
||||
return []
|
||||
|
||||
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
|
||||
|
||||
|
@ -77,4 +77,3 @@ class MofosexEmbedIE(InfoExtractor):
|
||||
return self.url_result(
|
||||
'http://www.mofosex.com/videos/{0}/{0}.html'.format(video_id),
|
||||
ie=MofosexIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
@ -26,7 +26,7 @@ class MotherlessIE(InfoExtractor):
|
||||
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
|
||||
'upload_date': '20100913',
|
||||
'uploader_id': 'famouslyfuckedup',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
@ -40,7 +40,7 @@ class MotherlessIE(InfoExtractor):
|
||||
'game', 'hairy'],
|
||||
'upload_date': '20140622',
|
||||
'uploader_id': 'Sulivana7x',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': '404',
|
||||
@ -54,7 +54,7 @@ class MotherlessIE(InfoExtractor):
|
||||
'categories': ['superheroine heroine superher'],
|
||||
'upload_date': '20140827',
|
||||
'uploader_id': 'shade0230',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
@ -76,7 +76,8 @@ class MotherlessIE(InfoExtractor):
|
||||
raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
|
||||
(r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>',
|
||||
r'id="view-upload-title">\s+([^<]+)<'), webpage, 'title')
|
||||
video_url = (self._html_search_regex(
|
||||
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
||||
@ -84,14 +85,15 @@ class MotherlessIE(InfoExtractor):
|
||||
or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
|
||||
age_limit = self._rta_search(webpage)
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<strong>Views</strong>\s+([^<]+)<',
|
||||
(r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
|
||||
webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
r'<strong>Favorited</strong>\s+([^<]+)<',
|
||||
(r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'),
|
||||
webpage, 'like count', fatal=False))
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
|
||||
(r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<',
|
||||
r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date')
|
||||
if 'Ago' in upload_date:
|
||||
days = int(re.search(r'([0-9]+)', upload_date).group(1))
|
||||
upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
|
||||
|
@ -6,6 +6,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
qualities,
|
||||
@ -33,42 +34,76 @@ class NovaEmbedIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
bitrates = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||
quality_key = qualities(QUALITIES)
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for format_id, format_list in bitrates.items():
|
||||
if not isinstance(format_list, list):
|
||||
format_list = [format_list]
|
||||
for format_url in format_list:
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
if format_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'url': format_url,
|
||||
}
|
||||
f_id = format_id
|
||||
for quality in QUALITIES:
|
||||
if '%s.mp4' % quality in format_url:
|
||||
f_id += '-%s' % quality
|
||||
f.update({
|
||||
'quality': quality_key(quality),
|
||||
'format_note': quality.upper(),
|
||||
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;',
|
||||
webpage, 'player', default='{}'), video_id, fatal=False)
|
||||
if player:
|
||||
for format_id, format_list in player['tracks'].items():
|
||||
if not isinstance(format_list, list):
|
||||
format_list = [format_list]
|
||||
for format_dict in format_list:
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = url_or_none(format_dict.get('src'))
|
||||
format_type = format_dict.get('type')
|
||||
ext = determine_ext(format_url)
|
||||
if (format_type == 'application/x-mpegURL'
|
||||
or format_id == 'HLS' or ext == 'm3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
elif (format_type == 'application/dash+xml'
|
||||
or format_id == 'DASH' or ext == 'mpd'):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
})
|
||||
break
|
||||
f['format_id'] = f_id
|
||||
formats.append(f)
|
||||
duration = int_or_none(player.get('duration'))
|
||||
else:
|
||||
# Old path, not actual as of 08.04.2020
|
||||
bitrates = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||
quality_key = qualities(QUALITIES)
|
||||
|
||||
for format_id, format_list in bitrates.items():
|
||||
if not isinstance(format_list, list):
|
||||
format_list = [format_list]
|
||||
for format_url in format_list:
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
if format_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'url': format_url,
|
||||
}
|
||||
f_id = format_id
|
||||
for quality in QUALITIES:
|
||||
if '%s.mp4' % quality in format_url:
|
||||
f_id += '-%s' % quality
|
||||
f.update({
|
||||
'quality': quality_key(quality),
|
||||
'format_note': quality.upper(),
|
||||
})
|
||||
break
|
||||
f['format_id'] = f_id
|
||||
formats.append(f)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(
|
||||
@ -81,7 +116,8 @@ class NovaEmbedIE(InfoExtractor):
|
||||
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', fatal=False, group='value')
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration',
|
||||
default=duration))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -246,7 +246,12 @@ class SoundcloudIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
# with AAC HQ format available via OAuth token
|
||||
'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
_API_V2_BASE = 'https://api-v2.soundcloud.com/'
|
||||
@ -350,6 +355,9 @@ class SoundcloudIE(InfoExtractor):
|
||||
format_id_list = []
|
||||
if protocol:
|
||||
format_id_list.append(protocol)
|
||||
ext = f.get('ext')
|
||||
if ext == 'aac':
|
||||
f['abr'] = '256'
|
||||
for k in ('ext', 'abr'):
|
||||
v = f.get(k)
|
||||
if v:
|
||||
@ -360,9 +368,13 @@ class SoundcloudIE(InfoExtractor):
|
||||
abr = f.get('abr')
|
||||
if abr:
|
||||
f['abr'] = int(abr)
|
||||
if protocol == 'hls':
|
||||
protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
|
||||
else:
|
||||
protocol = 'http'
|
||||
f.update({
|
||||
'format_id': '_'.join(format_id_list),
|
||||
'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
|
||||
'protocol': protocol,
|
||||
'preference': -10 if preview else None,
|
||||
})
|
||||
formats.append(f)
|
||||
|
@ -1,9 +1,19 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .nexx import NexxIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class Tele5IE(InfoExtractor):
|
||||
@ -44,14 +54,49 @@ class Tele5IE(InfoExtractor):
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
|
||||
|
||||
if not video_id:
|
||||
NEXX_ID_RE = r'\d{6,}'
|
||||
JWPLATFORM_ID_RE = r'[a-zA-Z0-9]{8}'
|
||||
|
||||
def nexx_result(nexx_id):
|
||||
return self.url_result(
|
||||
'https://api.nexx.cloud/v3/759/videos/byid/%s' % nexx_id,
|
||||
ie=NexxIE.ie_key(), video_id=nexx_id)
|
||||
|
||||
nexx_id = jwplatform_id = None
|
||||
|
||||
if video_id:
|
||||
if re.match(NEXX_ID_RE, video_id):
|
||||
return nexx_result(video_id)
|
||||
elif re.match(JWPLATFORM_ID_RE, video_id):
|
||||
jwplatform_id = video_id
|
||||
|
||||
if not nexx_id:
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(
|
||||
(r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)',
|
||||
r'\s+id\s*=\s*["\']player_(\d{6,})',
|
||||
r'\bdata-id\s*=\s*["\'](\d{6,})'), webpage, 'video id')
|
||||
|
||||
def extract_id(pattern, name, default=NO_DEFAULT):
|
||||
return self._html_search_regex(
|
||||
(r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](%s)' % pattern,
|
||||
r'\s+id\s*=\s*["\']player_(%s)' % pattern,
|
||||
r'\bdata-id\s*=\s*["\'](%s)' % pattern), webpage, name,
|
||||
default=default)
|
||||
|
||||
nexx_id = extract_id(NEXX_ID_RE, 'nexx id', default=None)
|
||||
if nexx_id:
|
||||
return nexx_result(nexx_id)
|
||||
|
||||
if not jwplatform_id:
|
||||
jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id')
|
||||
|
||||
media = self._download_json(
|
||||
'https://cdn.jwplayer.com/v2/media/' + jwplatform_id,
|
||||
display_id)
|
||||
nexx_id = try_get(
|
||||
media, lambda x: x['playlist'][0]['nexx_id'], compat_str)
|
||||
|
||||
if nexx_id:
|
||||
return nexx_result(nexx_id)
|
||||
|
||||
return self.url_result(
|
||||
'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id,
|
||||
ie=NexxIE.ie_key(), video_id=video_id)
|
||||
'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
|
||||
video_id=jwplatform_id)
|
||||
|
@ -31,6 +31,10 @@ class ThisOldHouseIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# iframe www.thisoldhouse.com
|
||||
'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe'
|
||||
|
||||
@ -38,6 +42,6 @@ class ThisOldHouseIE(InfoExtractor):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'<iframe[^>]+src=[\'"](?:https?:)?//thisoldhouse\.chorus\.build/videos/zype/([0-9a-f]{24})',
|
||||
r'<iframe[^>]+src=[\'"](?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})',
|
||||
webpage, 'video id')
|
||||
return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)
|
||||
|
@ -99,7 +99,7 @@ class TV4IE(InfoExtractor):
|
||||
manifest_url.replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_ism_formats(
|
||||
re.sub(r'\.ism/.+?\.m3u8', r'.ism/Manifest', manifest_url),
|
||||
re.sub(r'\.ism/.*?\.m3u8', r'.ism/Manifest', manifest_url),
|
||||
video_id, ism_id='mss', fatal=False))
|
||||
|
||||
if not formats and info.get('is_geo_restricted'):
|
||||
|
@ -655,7 +655,14 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
|
||||
class TwitchClipsIE(TwitchBaseIE):
|
||||
IE_NAME = 'twitch:clips'
|
||||
_VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
|
||||
(?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
|
||||
)
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
|
||||
@ -681,6 +688,12 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||
}, {
|
||||
'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1845,15 +1845,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# fields may contain comma as well (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/8536)
|
||||
feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
|
||||
|
||||
def feed_entry(name):
|
||||
return try_get(feed_data, lambda x: x[name][0], compat_str)
|
||||
|
||||
feed_id = feed_entry('id')
|
||||
if not feed_id:
|
||||
continue
|
||||
feed_title = feed_entry('title')
|
||||
title = video_title
|
||||
if feed_title:
|
||||
title += ' (%s)' % feed_title
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'Youtube',
|
||||
'url': smuggle_url(
|
||||
'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
|
||||
{'force_singlefeed': True}),
|
||||
'title': '%s (%s)' % (video_title, feed_data['title'][0]),
|
||||
'title': title,
|
||||
})
|
||||
feed_ids.append(feed_data['id'][0])
|
||||
feed_ids.append(feed_id)
|
||||
self.to_screen(
|
||||
'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
|
||||
% (', '.join(feed_ids), video_id))
|
||||
@ -1924,7 +1935,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
}
|
||||
|
||||
for fmt in streaming_formats:
|
||||
if fmt.get('drm_families'):
|
||||
if fmt.get('drmFamilies') or fmt.get('drm_families'):
|
||||
continue
|
||||
url = url_or_none(fmt.get('url'))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user