1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-07 04:57:46 +08:00

Merge pull request #2 from rg3/master

Sync
This commit is contained in:
Melvin Soldia 2018-09-16 09:33:30 +08:00 committed by GitHub
commit 988d1eb469
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 342 additions and 79 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.08**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.10**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.09.08
[debug] youtube-dl version 2018.09.10
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@ -1,3 +1,14 @@
version 2018.09.10
Core
+ [utils] Properly recognize AV1 codec (#17506)
Extractors
+ [iprima] Add support for prima.iprima.cz (#17514)
+ [tele5] Add support for tele5.de (#7805, #7922, #17331, #17414)
* [nbc] Fix extraction of percent encoded URLs (#17374)
version 2018.09.08
Extractors

View File

@ -847,6 +847,7 @@
- **techtv.mit.edu**
- **ted**
- **Tele13**
- **Tele5**
- **TeleBruxelles**
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
- **Telegraaf**

View File

@ -785,6 +785,10 @@ class TestUtil(unittest.TestCase):
'vcodec': 'h264',
'acodec': 'aac',
})
self.assertEqual(parse_codecs('av01.0.05M.08'), {
'vcodec': 'av01.0.05M.08',
'acodec': 'none',
})
def test_escape_rfc3986(self):
reserved = "!*'();:@&=+$,/?#[]"

View File

@ -8,7 +8,6 @@ from .kaltura import KalturaIE
from ..utils import (
extract_attributes,
remove_end,
urlencode_postdata,
)
@ -34,19 +33,40 @@ class AsianCrushIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
data=urlencode_postdata({
'postid': video_id,
'action': 'get_channel_kaltura_vars',
}))
webpage = self._download_webpage(url, video_id)
entry_id = data['entry_id']
entry_id, partner_id, title = [None] * 3
vars = self._parse_json(
self._search_regex(
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
default='{}'), video_id, fatal=False)
if vars:
entry_id = vars.get('entry_id')
partner_id = vars.get('partner_id')
title = vars.get('vid_label')
if not entry_id:
entry_id = self._search_regex(
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
player = self._download_webpage(
'https://api.asiancrush.com/embeddedVideoPlayer', video_id,
query={'id': entry_id})
kaltura_id = self._search_regex(
r'entry_id["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', player,
'kaltura id', group='id')
if not partner_id:
partner_id = self._search_regex(
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
default='513551')
return self.url_result(
'kaltura:%s:%s' % (data['partner_id'], entry_id),
ie=KalturaIE.ie_key(), video_id=entry_id,
video_title=data.get('vid_label'))
'kaltura:%s:%s' % (partner_id, kaltura_id),
ie=KalturaIE.ie_key(), video_id=kaltura_id,
video_title=title)
class AsianCrushPlaylistIE(InfoExtractor):

View File

@ -211,6 +211,11 @@ class InfoExtractor(object):
If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader.
uploader_url: Full URL to a personal webpage of the video uploader.
channel: Full name of the channel the video is uploaded on.
Note that channel fields may or may not repeat uploader
fields. This depends on a particular extractor.
channel_id: Id of the channel.
channel_url: Full URL to a channel webpage.
location: Physical location where the video was filmed.
subtitles: The available subtitles as a dictionary in the format
{tag: subformats}. "tag" is usually a language code, and
@ -1701,9 +1706,9 @@ class InfoExtractor(object):
# However, this is not always respected, for example, [2]
# contains EXT-X-STREAM-INF tag which references AUDIO
# rendition group but does not have CODECS and despite
# referencing audio group an audio group, it represents
# a complete (with audio and video) format. So, for such cases
# we will ignore references to rendition groups and treat them
# referencing an audio group it represents a complete
# (with audio and video) format. So, for such cases we will
# ignore references to rendition groups and treat them
# as complete formats.
if audio_group_id and codecs and f.get('vcodec') != 'none':
audio_group = groups.get(audio_group_id)

View File

@ -59,7 +59,7 @@ class DTubeIE(InfoExtractor):
try:
self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
self._downloader._opener.open(video_url, timeout=5).close()
except timeout as e:
except timeout:
self.to_screen(
'%s: %s URL is invalid, skipping' % (video_id, format_id))
continue

View File

@ -9,6 +9,7 @@ from ..utils import (
encode_base_n,
ExtractorError,
int_or_none,
merge_dicts,
parse_duration,
str_to_int,
url_or_none,
@ -25,10 +26,16 @@ class EpornerIE(InfoExtractor):
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
'ext': 'mp4',
'title': 'Infamous Tiffany Teen Strip Tease Video',
'description': 'md5:764f39abf932daafa37485eb46efa152',
'timestamp': 1232520922,
'upload_date': '20090121',
'duration': 1838,
'view_count': int,
'age_limit': 18,
},
'params': {
'proxy': '127.0.0.1:8118'
}
}, {
# New (May 2016) URL layout
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
@ -104,12 +111,15 @@ class EpornerIE(InfoExtractor):
})
self._sort_formats(formats)
duration = parse_duration(self._html_search_meta('duration', webpage))
json_ld = self._search_json_ld(webpage, display_id, default={})
duration = parse_duration(self._html_search_meta(
'duration', webpage, default=None))
view_count = str_to_int(self._search_regex(
r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
webpage, 'view count', fatal=False))
return {
return merge_dicts(json_ld, {
'id': video_id,
'display_id': display_id,
'title': title,
@ -117,4 +127,4 @@ class EpornerIE(InfoExtractor):
'view_count': view_count,
'formats': formats,
'age_limit': 18,
}
})

View File

@ -1086,6 +1086,7 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tele5 import Tele5IE
from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE

View File

@ -3,15 +3,45 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..compat import (
compat_b64decode,
compat_str,
compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
str_or_none,
str_to_int,
try_get,
unified_timestamp,
url_or_none,
)
class FourTubeBaseIE(InfoExtractor):
_TKN_HOST = 'tkn.kodicdn.com'
def _extract_formats(self, url, video_id, media_id, sources):
token_url = 'https://%s/%s/desktop/%s' % (
self._TKN_HOST, media_id, '+'.join(sources))
parsed_url = compat_urlparse.urlparse(url)
tokens = self._download_json(token_url, video_id, data=b'', headers={
'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
'Referer': url,
})
formats = [{
'url': tokens[format]['token'],
'format_id': format + 'p',
'resolution': format + 'p',
'quality': int(format),
} for format in sources]
self._sort_formats(formats)
return formats
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
@ -68,21 +98,7 @@ class FourTubeBaseIE(InfoExtractor):
media_id = params[0]
sources = ['%s' % p for p in params[2]]
token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
media_id, '+'.join(sources))
parsed_url = compat_urlparse.urlparse(url)
tokens = self._download_json(token_url, video_id, data=b'', headers={
'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
'Referer': url,
})
formats = [{
'url': tokens[format]['token'],
'format_id': format + 'p',
'resolution': format + 'p',
'quality': int(format),
} for format in sources]
self._sort_formats(formats)
formats = self._extract_formats(url, video_id, media_id, sources)
return {
'id': video_id,
@ -164,6 +180,7 @@ class FuxIE(FourTubeBaseIE):
class PornTubeIE(FourTubeBaseIE):
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
_URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s'
_TKN_HOST = 'tkn.porntube.com'
_TESTS = [{
'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759',
'info_dict': {
@ -171,13 +188,32 @@ class PornTubeIE(FourTubeBaseIE):
'ext': 'mp4',
'title': 'Teen couple doing anal',
'uploader': 'Alexy',
'uploader_id': 'Alexy',
'uploader_id': '91488',
'upload_date': '20150606',
'timestamp': 1433595647,
'duration': 5052,
'view_count': int,
'like_count': int,
'categories': list,
'age_limit': 18,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406',
'info_dict': {
'id': '1331406',
'ext': 'mp4',
'title': 'Squirting Teen Ballerina on ECG',
'uploader': 'Exploited College Girls',
'uploader_id': '665',
'channel': 'Exploited College Girls',
'channel_id': '665',
'upload_date': '20130920',
'timestamp': 1379685485,
'duration': 851,
'view_count': int,
'like_count': int,
'age_limit': 18,
},
'params': {
@ -191,6 +227,55 @@ class PornTubeIE(FourTubeBaseIE):
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, display_id = mobj.group('id', 'display_id')
webpage = self._download_webpage(url, display_id)
video = self._parse_json(
self._search_regex(
r'INITIALSTATE\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
webpage, 'data', group='value'), video_id,
transform_source=lambda x: compat_urllib_parse_unquote(
compat_b64decode(x).decode('utf-8')))['page']['video']
title = video['title']
media_id = video['mediaId']
sources = [compat_str(e['height'])
for e in video['encodings'] if e.get('height')]
formats = self._extract_formats(url, video_id, media_id, sources)
thumbnail = url_or_none(video.get('masterThumb'))
uploader = try_get(video, lambda x: x['user']['username'], compat_str)
uploader_id = str_or_none(try_get(
video, lambda x: x['user']['id'], int))
channel = try_get(video, lambda x: x['channel']['name'], compat_str)
channel_id = str_or_none(try_get(
video, lambda x: x['channel']['id'], int))
like_count = int_or_none(video.get('likes'))
dislike_count = int_or_none(video.get('dislikes'))
view_count = int_or_none(video.get('playsQty'))
duration = int_or_none(video.get('durationInSeconds'))
timestamp = unified_timestamp(video.get('publishedAt'))
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'uploader': uploader or channel,
'uploader_id': uploader_id or channel_id,
'channel': channel,
'channel_id': channel_id,
'timestamp': timestamp,
'like_count': like_count,
'dislike_count': dislike_count,
'view_count': view_count,
'duration': duration,
'age_limit': 18,
}
class PornerBrosIE(FourTubeBaseIE):
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'

View File

@ -3112,7 +3112,7 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
webpage)]
if sharevideos_urls:

View File

@ -12,7 +12,7 @@ from ..utils import (
class IPrimaIE(InfoExtractor):
_VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
_VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
_GEO_BYPASS = False
_TESTS = [{
@ -33,6 +33,14 @@ class IPrimaIE(InfoExtractor):
# geo restricted
'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
'only_matching': True,
}, {
# iframe api.play-backend.iprima.cz
'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
'only_matching': True,
}, {
# iframe prima.iprima.cz
'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
'only_matching': True,
}]
def _real_extract(self, url):
@ -42,7 +50,10 @@ class IPrimaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
video_id = self._search_regex(
(r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
r'data-product="([^"]+)">'),
webpage, 'real id')
playerpage = self._download_webpage(
'http://play.iprima.cz/prehravac/init',

View File

@ -167,9 +167,9 @@ class MotherlessGroupIE(InfoExtractor):
if not entries:
entries = [
self.url_result(
compat_urlparse.urljoin(base, '/' + video_id),
ie=MotherlessIE.ie_key(), video_id=video_id)
for video_id in orderedSet(re.findall(
compat_urlparse.urljoin(base, '/' + entry_id),
ie=MotherlessIE.ie_key(), video_id=entry_id)
for entry_id in orderedSet(re.findall(
r'data-codename=["\']([A-Z0-9]+)', webpage))]
return entries

View File

@ -7,6 +7,7 @@ import re
from .common import InfoExtractor
from .theplatform import ThePlatformIE
from .adobepass import AdobePassIE
from ..compat import compat_urllib_parse_unquote
from ..utils import (
find_xpath_attr,
smuggle_url,
@ -75,11 +76,16 @@ class NBCIE(AdobePassIE):
'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
'only_matching': True,
},
{
# Percent escaped url
'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
'only_matching': True,
}
]
def _real_extract(self, url):
permalink, video_id = re.match(self._VALID_URL, url).groups()
permalink = 'http' + permalink
permalink = 'http' + compat_urllib_parse_unquote(permalink)
response = self._download_json(
'https://api.nbc.com/v3/videos', video_id, query={
'filter[permalink]': permalink,

View File

@ -40,6 +40,7 @@ class PornHubIE(InfoExtractor):
'ext': 'mp4',
'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
'uploader': 'Babes',
'upload_date': '20130628',
'duration': 361,
'view_count': int,
'like_count': int,
@ -57,6 +58,7 @@ class PornHubIE(InfoExtractor):
'ext': 'mp4',
'title': '重庆婷婷女王足交',
'uploader': 'Unknown',
'upload_date': '20150213',
'duration': 1753,
'view_count': int,
'like_count': int,
@ -237,8 +239,14 @@ class PornHubIE(InfoExtractor):
video_urls.append((video_url, None))
video_urls_set.add(video_url)
upload_date = None
formats = []
for video_url, height in video_urls:
if not upload_date:
upload_date = self._search_regex(
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
if upload_date:
upload_date = upload_date.replace('/', '')
tbr = None
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
if mobj:
@ -278,6 +286,7 @@ class PornHubIE(InfoExtractor):
return {
'id': video_id,
'uploader': video_uploader,
'upload_date': upload_date,
'title': title,
'thumbnail': thumbnail,
'duration': duration,

View File

@ -164,6 +164,6 @@ class SeznamZpravyArticleIE(InfoExtractor):
description = info.get('description') or self._og_search_description(webpage)
return self.playlist_result([
self.url_result(url, ie=SeznamZpravyIE.ie_key())
for url in SeznamZpravyIE._extract_urls(webpage)],
self.url_result(entry_url, ie=SeznamZpravyIE.ie_key())
for entry_url in SeznamZpravyIE._extract_urls(webpage)],
article_id, title, description)

View File

@ -0,0 +1,44 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .nexx import NexxIE
from ..compat import compat_urlparse
class Tele5IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:mediathek|tv)/(?P<id>[^?#&]+)'
_TESTS = [{
'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
'info_dict': {
'id': '1549416',
'ext': 'mp4',
'upload_date': '20180814',
'timestamp': 1534290623,
'title': 'Pandorum',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.tele5.de/tv/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
'only_matching': True,
}, {
'url': 'https://www.tele5.de/tv/dark-matter/videos',
'only_matching': True,
}]
def _real_extract(self, url):
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
if not video_id:
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(
r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)',
webpage, 'video id')
return self.url_result(
'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id,
ie=NexxIE.ie_key(), video_id=video_id)

View File

@ -45,7 +45,7 @@ class Tube8IE(KeezMoviesIE):
r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
description = self._html_search_regex(
r'>Description:</strong>\s*(.+?)\s*<', webpage, 'description', fatal=False)
r'(?s)Description:</dt>\s*<dd>(.+?)</dd>', webpage, 'description', fatal=False)
uploader = self._html_search_regex(
r'<span class="username">\s*(.+?)\s*<',
webpage, 'uploader', fatal=False)
@ -55,19 +55,19 @@ class Tube8IE(KeezMoviesIE):
dislike_count = int_or_none(self._search_regex(
r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
view_count = str_to_int(self._search_regex(
r'<strong>Views: </strong>([\d,\.]+)\s*</li>',
r'Views:\s*</dt>\s*<dd>([\d,\.]+)',
webpage, 'view count', fatal=False))
comment_count = str_to_int(self._search_regex(
r'<span id="allCommentsCount">(\d+)</span>',
webpage, 'comment count', fatal=False))
category = self._search_regex(
r'Category:\s*</strong>\s*<a[^>]+href=[^>]+>([^<]+)',
r'Category:\s*</dt>\s*<dd>\s*<a[^>]+href=[^>]+>([^<]+)',
webpage, 'category', fatal=False)
categories = [category] if category else None
tags_str = self._search_regex(
r'(?s)Tags:\s*</strong>(.+?)</(?!a)',
r'(?s)Tags:\s*</dt>\s*<dd>(.+?)</(?!a)',
webpage, 'tags', fatal=False)
tags = [t for t in re.findall(
r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None

View File

@ -559,7 +559,8 @@ class TwitchStreamIE(TwitchBaseIE):
TwitchAllVideosIE,
TwitchUploadsIE,
TwitchPastBroadcastsIE,
TwitchHighlightsIE))
TwitchHighlightsIE,
TwitchClipsIE))
else super(TwitchStreamIE, cls).suitable(url))
def _real_extract(self, url):
@ -633,7 +634,7 @@ class TwitchStreamIE(TwitchBaseIE):
class TwitchClipsIE(TwitchBaseIE):
IE_NAME = 'twitch:clips'
_VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
@ -653,6 +654,9 @@ class TwitchClipsIE(TwitchBaseIE):
# multiple formats
'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
'only_matching': True,
}, {
'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@ -299,10 +299,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
'uploader_id': 'atencio',
'uploader': 'Peter Atencio',
'channel_id': 'keypeele',
'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele',
'timestamp': 1380339469,
'upload_date': '20130928',
'duration': 187,
},
'expected_warnings': ['Unable to download JSON metadata'],
},
{
'url': 'http://vimeo.com/76979871',
@ -355,11 +358,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
'url': 'https://vimeo.com/channels/tributes/6213729',
'info_dict': {
'id': '6213729',
'ext': 'mov',
'ext': 'mp4',
'title': 'Vimeo Tribute: The Shining',
'uploader': 'Casey Donahue',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
'uploader_id': 'caseydonahue',
'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes',
'channel_id': 'tributes',
'timestamp': 1250886430,
'upload_date': '20090821',
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
@ -465,6 +470,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
if 'Referer' not in headers:
headers['Referer'] = url
channel_id = self._search_regex(
r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
# Extract ID from URL
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
@ -563,19 +571,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
if config.get('view') == 4:
config = self._verify_player_video_password(redirect_url, video_id)
vod = config.get('video', {}).get('vod', {})
def is_rented():
if '>You rented this title.<' in webpage:
return True
if config.get('user', {}).get('purchased'):
return True
label = try_get(
config, lambda x: x['video']['vod']['purchase_options'][0]['label_string'], compat_str)
if label and label.startswith('You rented this'):
return True
for purchase_option in vod.get('purchase_options', []):
if purchase_option.get('purchased'):
return True
label = purchase_option.get('label_string')
if label and (label.startswith('You rented this') or label.endswith(' remaining')):
return True
return False
if is_rented():
feature_id = config.get('video', {}).get('vod', {}).get('feature_id')
if is_rented() and vod.get('is_trailer'):
feature_id = vod.get('feature_id')
if feature_id and not data.get('force_feature_id', False):
return self.url_result(smuggle_url(
'https://player.vimeo.com/player/%s' % feature_id,
@ -652,6 +664,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
webpage, 'license', default=None, group='license')
channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
info_dict = {
'id': video_id,
'formats': formats,
@ -662,6 +676,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
'like_count': like_count,
'comment_count': comment_count,
'license': cc_license,
'channel_id': channel_id,
'channel_url': channel_url,
}
info_dict = merge_dicts(info_dict, info_dict_config, json_ld)

View File

@ -4,15 +4,19 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
float_or_none,
unified_timestamp,
url_or_none,
)
class VzaarIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
_TESTS = [{
# HTTP and HLS
'url': 'https://vzaar.com/videos/1152805',
'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
'info_dict': {
@ -40,24 +44,48 @@ class VzaarIE(InfoExtractor):
video_id = self._match_id(url)
video_data = self._download_json(
'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
source_url = video_data['sourceUrl']
info = {
title = video_data['videoTitle']
formats = []
source_url = url_or_none(video_data.get('sourceUrl'))
if source_url:
f = {
'url': source_url,
'format_id': 'http',
}
if 'audio' in source_url:
f.update({
'vcodec': 'none',
'ext': 'mp3',
})
else:
f.update({
'width': int_or_none(video_data.get('width')),
'height': int_or_none(video_data.get('height')),
'ext': 'mp4',
'fps': float_or_none(video_data.get('fps')),
})
formats.append(f)
video_guid = video_data.get('guid')
usp = video_data.get('usp')
if isinstance(video_guid, compat_str) and isinstance(usp, dict):
m3u8_url = ('http://fable.vzaar.com/v4/usp/%s/%s.ism/.m3u8?'
% (video_guid, video_id)) + '&'.join(
'%s=%s' % (k, v) for k, v in usp.items())
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
return {
'id': video_id,
'title': video_data['videoTitle'],
'url': source_url,
'title': title,
'thumbnail': self._proto_relative_url(video_data.get('poster')),
'duration': float_or_none(video_data.get('videoDuration')),
'timestamp': unified_timestamp(video_data.get('ts')),
'formats': formats,
}
if 'audio' in source_url:
info.update({
'vcodec': 'none',
'ext': 'mp3',
})
else:
info.update({
'width': int_or_none(video_data.get('width')),
'height': int_or_none(video_data.get('height')),
'ext': 'mp4',
})
return info

View File

@ -490,6 +490,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
'upload_date': '20121002',
'license': 'Standard YouTube License',
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
@ -1907,6 +1909,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else:
self._downloader.report_warning('unable to extract uploader nickname')
channel_id = self._html_search_meta(
'channelId', video_webpage, 'channel id')
channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
# thumbnail image
# We try first to get a high quality image:
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
@ -2078,6 +2084,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': video_uploader,
'uploader_id': video_uploader_id,
'uploader_url': video_uploader_url,
'channel_id': channel_id,
'channel_url': channel_url,
'upload_date': upload_date,
'license': video_license,
'creator': video_creator or artist,

View File

@ -2477,7 +2477,7 @@ def parse_codecs(codecs_str):
vcodec, acodec = None, None
for full_codec in splited_codecs:
codec = full_codec.split('.')[0]
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'):
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'):
if not vcodec:
vcodec = full_codec
elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2018.09.08'
__version__ = '2018.09.10'