mirror of
https://github.com/l1ving/youtube-dl
synced 2025-02-10 01:13:21 +08:00
Merge branch 'master' into multiple-reject-title
This commit is contained in:
commit
6b988fdfcd
14
Makefile
14
Makefile
@ -61,34 +61,34 @@ youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
|
||||
chmod a+x youtube-dl
|
||||
|
||||
README.md: youtube_dl/*.py youtube_dl/*/*.py
|
||||
COLUMNS=80 python youtube_dl/__main__.py --help | python devscripts/make_readme.py
|
||||
COLUMNS=80 $(PYTHON) youtube_dl/__main__.py --help | $(PYTHON) devscripts/make_readme.py
|
||||
|
||||
CONTRIBUTING.md: README.md
|
||||
python devscripts/make_contributing.py README.md CONTRIBUTING.md
|
||||
$(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md
|
||||
|
||||
supportedsites:
|
||||
python devscripts/make_supportedsites.py docs/supportedsites.md
|
||||
$(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md
|
||||
|
||||
README.txt: README.md
|
||||
pandoc -f markdown -t plain README.md -o README.txt
|
||||
|
||||
youtube-dl.1: README.md
|
||||
python devscripts/prepare_manpage.py >youtube-dl.1.temp.md
|
||||
$(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md
|
||||
pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
|
||||
rm -f youtube-dl.1.temp.md
|
||||
|
||||
youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in
|
||||
python devscripts/bash-completion.py
|
||||
$(PYTHON) devscripts/bash-completion.py
|
||||
|
||||
bash-completion: youtube-dl.bash-completion
|
||||
|
||||
youtube-dl.zsh: youtube_dl/*.py youtube_dl/*/*.py devscripts/zsh-completion.in
|
||||
python devscripts/zsh-completion.py
|
||||
$(PYTHON) devscripts/zsh-completion.py
|
||||
|
||||
zsh-completion: youtube-dl.zsh
|
||||
|
||||
youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in
|
||||
python devscripts/fish-completion.py
|
||||
$(PYTHON) devscripts/fish-completion.py
|
||||
|
||||
fish-completion: youtube-dl.fish
|
||||
|
||||
|
@ -13,6 +13,7 @@ from ..utils import (
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
handle_youtubedl_headers,
|
||||
)
|
||||
|
||||
|
||||
@ -33,9 +34,10 @@ class HlsFD(FileDownloader):
|
||||
if info_dict['http_headers'] and re.match(r'^https?://', url):
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
headers = handle_youtubedl_headers(info_dict['http_headers'])
|
||||
args += [
|
||||
'-headers',
|
||||
''.join('%s: %s\r\n' % (key, val) for key, val in info_dict['http_headers'].items() if key.lower() != 'accept-encoding')]
|
||||
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
|
||||
|
||||
args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']
|
||||
|
||||
|
@ -9,6 +9,7 @@ from ..utils import (
|
||||
find_xpath_attr,
|
||||
smuggle_url,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
)
|
||||
from .senateisvp import SenateISVPIE
|
||||
|
||||
@ -18,33 +19,32 @@ class CSpanIE(InfoExtractor):
|
||||
IE_DESC = 'C-SPAN'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
|
||||
'md5': '8e44ce11f0f725527daccc453f553eb0',
|
||||
'md5': '94b29a4f131ff03d23471dd6f60b6a1d',
|
||||
'info_dict': {
|
||||
'id': '315139',
|
||||
'ext': 'mp4',
|
||||
'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
|
||||
'description': 'Attorney General Eric Holder spoke to reporters following the Supreme Court decision in Shelby County v. Holder in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced until Congress established new guidelines for review.',
|
||||
'description': 'Attorney General Eric Holder speaks to reporters following the Supreme Court decision in [Shelby County v. Holder], in which the court ruled that the preclearance provisions of the Voting Rights Act could not be enforced.',
|
||||
},
|
||||
'skip': 'Regularly fails on travis, for unknown reasons',
|
||||
}, {
|
||||
'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
|
||||
# For whatever reason, the served video alternates between
|
||||
# two different ones
|
||||
'md5': '8e5fbfabe6ad0f89f3012a7943c1287b',
|
||||
'info_dict': {
|
||||
'id': '340723',
|
||||
'id': 'c4486943',
|
||||
'ext': 'mp4',
|
||||
'title': 'International Health Care Models',
|
||||
'title': 'CSPAN - International Health Care Models',
|
||||
'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
|
||||
'md5': '446562a736c6bf97118e389433ed88d4',
|
||||
'md5': '2ae5051559169baadba13fc35345ae74',
|
||||
'info_dict': {
|
||||
'id': '342759',
|
||||
'ext': 'mp4',
|
||||
'title': 'General Motors Ignition Switch Recall',
|
||||
'duration': 14848,
|
||||
'description': 'md5:70c7c3b8fa63fa60d42772440596034c'
|
||||
'description': 'md5:118081aedd24bf1d3b68b3803344e7f3'
|
||||
},
|
||||
}, {
|
||||
# Video from senate.gov
|
||||
@ -57,67 +57,77 @@ class CSpanIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
video_id = self._search_regex(r'progid=\'?([0-9]+)\'?>', webpage, 'video id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
matches = re.search(r'data-(prog|clip)id=\'([0-9]+)\'', webpage)
|
||||
if matches:
|
||||
video_type, video_id = matches.groups()
|
||||
if video_type == 'prog':
|
||||
video_type = 'program'
|
||||
else:
|
||||
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
|
||||
if senate_isvp_url:
|
||||
title = self._og_search_title(webpage)
|
||||
surl = smuggle_url(senate_isvp_url, {'force_title': title})
|
||||
return self.url_result(surl, 'SenateISVP', video_id, title)
|
||||
|
||||
description = self._html_search_regex(
|
||||
[
|
||||
# The full description
|
||||
r'<div class=\'expandable\'>(.*?)<a href=\'#\'',
|
||||
# If the description is small enough the other div is not
|
||||
# present, otherwise this is a stripped version
|
||||
r'<p class=\'initial\'>(.*?)</p>'
|
||||
],
|
||||
webpage, 'description', flags=re.DOTALL, default=None)
|
||||
def get_text_attr(d, attr):
|
||||
return d.get(attr, {}).get('#text')
|
||||
|
||||
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
||||
data = self._download_json(info_url, video_id)
|
||||
data = self._download_json(
|
||||
'http://www.c-span.org/assets/player/ajax-player.php?os=android&html5=%s&id=%s' % (video_type, video_id),
|
||||
video_id)['video']
|
||||
if data['@status'] != 'Success':
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, get_text_attr(data, 'error')), expected=True)
|
||||
|
||||
doc = self._download_xml(
|
||||
'http://www.c-span.org/common/services/flashXml.php?programid=' + video_id,
|
||||
'http://www.c-span.org/common/services/flashXml.php?%sid=%s' % (video_type, video_id),
|
||||
video_id)
|
||||
|
||||
description = self._html_search_meta('description', webpage)
|
||||
|
||||
title = find_xpath_attr(doc, './/string', 'name', 'title').text
|
||||
thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
|
||||
|
||||
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
|
||||
if senate_isvp_url:
|
||||
surl = smuggle_url(senate_isvp_url, {'force_title': title})
|
||||
return self.url_result(surl, 'SenateISVP', video_id, title)
|
||||
files = data['files']
|
||||
capfile = get_text_attr(data, 'capfile')
|
||||
|
||||
files = data['video']['files']
|
||||
try:
|
||||
capfile = data['video']['capfile']['#text']
|
||||
except KeyError:
|
||||
capfile = None
|
||||
|
||||
entries = [{
|
||||
'id': '%s_%d' % (video_id, partnum + 1),
|
||||
'title': (
|
||||
title if len(files) == 1 else
|
||||
'%s part %d' % (title, partnum + 1)),
|
||||
'url': unescapeHTML(f['path']['#text']),
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(f.get('length', {}).get('#text')),
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
'url': capfile,
|
||||
'ext': determine_ext(capfile, 'dfxp')
|
||||
}],
|
||||
} if capfile else None,
|
||||
} for partnum, f in enumerate(files)]
|
||||
entries = []
|
||||
for partnum, f in enumerate(files):
|
||||
formats = []
|
||||
for quality in f['qualities']:
|
||||
formats.append({
|
||||
'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')),
|
||||
'url': unescapeHTML(get_text_attr(quality, 'file')),
|
||||
'height': int_or_none(get_text_attr(quality, 'height')),
|
||||
'tbr': int_or_none(get_text_attr(quality, 'bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': '%s_%d' % (video_id, partnum + 1),
|
||||
'title': (
|
||||
title if len(files) == 1 else
|
||||
'%s part %d' % (title, partnum + 1)),
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(get_text_attr(f, 'length')),
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
'url': capfile,
|
||||
'ext': determine_ext(capfile, 'dfxp')
|
||||
}],
|
||||
} if capfile else None,
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
entry = dict(entries[0])
|
||||
entry['id'] = video_id
|
||||
entry['id'] = 'c' + video_id if video_type == 'clip' else video_id
|
||||
return entry
|
||||
else:
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'title': title,
|
||||
'id': video_id,
|
||||
'id': 'c' + video_id if video_type == 'clip' else video_id,
|
||||
}
|
||||
|
@ -164,7 +164,7 @@ class FacebookIE(InfoExtractor):
|
||||
if not video_title:
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
|
||||
webpage, 'alternative title', fatal=False)
|
||||
webpage, 'alternative title', default=None)
|
||||
video_title = limit_length(video_title, 80)
|
||||
if not video_title:
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
|
@ -147,7 +147,8 @@ class PornHubPlaylistIE(InfoExtractor):
|
||||
|
||||
entries = [
|
||||
self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub')
|
||||
for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage))
|
||||
for video_url in set(re.findall(
|
||||
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage))
|
||||
]
|
||||
|
||||
playlist = self._parse_json(
|
||||
|
@ -58,7 +58,8 @@ class SpiegelIE(InfoExtractor):
|
||||
description = self._html_search_meta('description', webpage, 'description')
|
||||
|
||||
base_url = self._search_regex(
|
||||
r'var\s+server\s*=\s*"([^"]+)\"', webpage, 'server URL')
|
||||
[r'server\s*:\s*(["\'])(?P<url>.+?)\1', r'var\s+server\s*=\s*"(?P<url>[^"]+)\"'],
|
||||
webpage, 'server URL', group='url')
|
||||
|
||||
xml_url = base_url + video_id + '.xml'
|
||||
idoc = self._download_xml(xml_url, video_id)
|
||||
|
@ -3,11 +3,14 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import sanitized_Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class VodlockerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
|
||||
_VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://vodlocker.com/e8wvyzz4sl42',
|
||||
@ -24,6 +27,12 @@ class VodlockerIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if any(p in webpage for p in (
|
||||
'>THIS FILE WAS DELETED<',
|
||||
'>File Not Found<',
|
||||
'The file you were looking for could not be found, sorry for any inconvenience.<')):
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
fields = self._hidden_inputs(webpage)
|
||||
|
||||
if fields['op'] == 'download1':
|
||||
|
@ -258,7 +258,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|(?: # or the v= param in all its forms
|
||||
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
|
||||
(?:\?|\#!?) # the params delimiter ? or # or #!
|
||||
(?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx)
|
||||
(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
|
||||
v=
|
||||
)
|
||||
))
|
||||
@ -346,6 +346,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
|
||||
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
|
||||
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
||||
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
|
||||
@ -730,6 +731,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -1475,6 +1480,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
manifest_url = video_info['hlsvp'][0]
|
||||
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
||||
formats = _map_to_format_list(url_map)
|
||||
# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
|
||||
for a_format in formats:
|
||||
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
|
||||
else:
|
||||
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||
|
||||
@ -1559,7 +1567,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtract
|
||||
youtube\.com/
|
||||
(?:
|
||||
(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
|
||||
\? (?:.*?&)*? (?:p|a|list)=
|
||||
\? (?:.*?[&;])*? (?:p|a|list)=
|
||||
| p/
|
||||
)
|
||||
(
|
||||
|
@ -663,6 +663,16 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
|
||||
return hc
|
||||
|
||||
|
||||
def handle_youtubedl_headers(headers):
|
||||
filtered_headers = headers
|
||||
|
||||
if 'Youtubedl-no-compression' in filtered_headers:
|
||||
filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
|
||||
del filtered_headers['Youtubedl-no-compression']
|
||||
|
||||
return filtered_headers
|
||||
|
||||
|
||||
class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
"""Handler for HTTP requests and responses.
|
||||
|
||||
@ -670,7 +680,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
the standard headers to every HTTP request and handles gzipped and
|
||||
deflated responses from web servers. If compression is to be avoided in
|
||||
a particular request, the original request in the program code only has
|
||||
to include the HTTP header "Youtubedl-No-Compression", which will be
|
||||
to include the HTTP header "Youtubedl-no-compression", which will be
|
||||
removed before making the real request.
|
||||
|
||||
Part of this code was copied from:
|
||||
@ -731,10 +741,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||
# The dict keys are capitalized because of this bug by urllib
|
||||
if h.capitalize() not in req.headers:
|
||||
req.add_header(h, v)
|
||||
if 'Youtubedl-no-compression' in req.headers:
|
||||
if 'Accept-encoding' in req.headers:
|
||||
del req.headers['Accept-encoding']
|
||||
del req.headers['Youtubedl-no-compression']
|
||||
|
||||
req.headers = handle_youtubedl_headers(req.headers)
|
||||
|
||||
if sys.version_info < (2, 7) and '#' in req.get_full_url():
|
||||
# Python 2.6 is brain-dead when it comes to fragments
|
||||
|
Loading…
Reference in New Issue
Block a user