1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-09 08:17:16 +08:00

Merge pull request #2 from rg3/master

Sync
This commit is contained in:
Melvin Soldia 2018-09-16 09:33:30 +08:00 committed by GitHub
commit 988d1eb469
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 342 additions and 79 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.08*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.10*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.08** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.10**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.09.08 [debug] youtube-dl version 2018.09.10
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -1,3 +1,14 @@
version 2018.09.10
Core
+ [utils] Properly recognize AV1 codec (#17506)
Extractors
+ [iprima] Add support for prima.iprima.cz (#17514)
+ [tele5] Add support for tele5.de (#7805, #7922, #17331, #17414)
* [nbc] Fix extraction of percent encoded URLs (#17374)
version 2018.09.08 version 2018.09.08
Extractors Extractors

View File

@ -847,6 +847,7 @@
- **techtv.mit.edu** - **techtv.mit.edu**
- **ted** - **ted**
- **Tele13** - **Tele13**
- **Tele5**
- **TeleBruxelles** - **TeleBruxelles**
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telecinco**: telecinco.es, cuatro.com and mediaset.es
- **Telegraaf** - **Telegraaf**

View File

@ -785,6 +785,10 @@ class TestUtil(unittest.TestCase):
'vcodec': 'h264', 'vcodec': 'h264',
'acodec': 'aac', 'acodec': 'aac',
}) })
self.assertEqual(parse_codecs('av01.0.05M.08'), {
'vcodec': 'av01.0.05M.08',
'acodec': 'none',
})
def test_escape_rfc3986(self): def test_escape_rfc3986(self):
reserved = "!*'();:@&=+$,/?#[]" reserved = "!*'();:@&=+$,/?#[]"

View File

@ -8,7 +8,6 @@ from .kaltura import KalturaIE
from ..utils import ( from ..utils import (
extract_attributes, extract_attributes,
remove_end, remove_end,
urlencode_postdata,
) )
@ -34,19 +33,40 @@ class AsianCrushIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
data = self._download_json( webpage = self._download_webpage(url, video_id)
'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
data=urlencode_postdata({
'postid': video_id,
'action': 'get_channel_kaltura_vars',
}))
entry_id = data['entry_id'] entry_id, partner_id, title = [None] * 3
vars = self._parse_json(
self._search_regex(
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
default='{}'), video_id, fatal=False)
if vars:
entry_id = vars.get('entry_id')
partner_id = vars.get('partner_id')
title = vars.get('vid_label')
if not entry_id:
entry_id = self._search_regex(
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
player = self._download_webpage(
'https://api.asiancrush.com/embeddedVideoPlayer', video_id,
query={'id': entry_id})
kaltura_id = self._search_regex(
r'entry_id["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', player,
'kaltura id', group='id')
if not partner_id:
partner_id = self._search_regex(
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
default='513551')
return self.url_result( return self.url_result(
'kaltura:%s:%s' % (data['partner_id'], entry_id), 'kaltura:%s:%s' % (partner_id, kaltura_id),
ie=KalturaIE.ie_key(), video_id=entry_id, ie=KalturaIE.ie_key(), video_id=kaltura_id,
video_title=data.get('vid_label')) video_title=title)
class AsianCrushPlaylistIE(InfoExtractor): class AsianCrushPlaylistIE(InfoExtractor):

View File

@ -211,6 +211,11 @@ class InfoExtractor(object):
If not explicitly set, calculated from timestamp. If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader. uploader_id: Nickname or id of the video uploader.
uploader_url: Full URL to a personal webpage of the video uploader. uploader_url: Full URL to a personal webpage of the video uploader.
channel: Full name of the channel the video is uploaded on.
Note that channel fields may or may not repeat uploader
fields. This depends on a particular extractor.
channel_id: Id of the channel.
channel_url: Full URL to a channel webpage.
location: Physical location where the video was filmed. location: Physical location where the video was filmed.
subtitles: The available subtitles as a dictionary in the format subtitles: The available subtitles as a dictionary in the format
{tag: subformats}. "tag" is usually a language code, and {tag: subformats}. "tag" is usually a language code, and
@ -1701,9 +1706,9 @@ class InfoExtractor(object):
# However, this is not always respected, for example, [2] # However, this is not always respected, for example, [2]
# contains EXT-X-STREAM-INF tag which references AUDIO # contains EXT-X-STREAM-INF tag which references AUDIO
# rendition group but does not have CODECS and despite # rendition group but does not have CODECS and despite
# referencing audio group an audio group, it represents # referencing an audio group it represents a complete
# a complete (with audio and video) format. So, for such cases # (with audio and video) format. So, for such cases we will
# we will ignore references to rendition groups and treat them # ignore references to rendition groups and treat them
# as complete formats. # as complete formats.
if audio_group_id and codecs and f.get('vcodec') != 'none': if audio_group_id and codecs and f.get('vcodec') != 'none':
audio_group = groups.get(audio_group_id) audio_group = groups.get(audio_group_id)

View File

@ -59,7 +59,7 @@ class DTubeIE(InfoExtractor):
try: try:
self.to_screen('%s: Checking %s video format URL' % (video_id, format_id)) self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
self._downloader._opener.open(video_url, timeout=5).close() self._downloader._opener.open(video_url, timeout=5).close()
except timeout as e: except timeout:
self.to_screen( self.to_screen(
'%s: %s URL is invalid, skipping' % (video_id, format_id)) '%s: %s URL is invalid, skipping' % (video_id, format_id))
continue continue

View File

@ -9,6 +9,7 @@ from ..utils import (
encode_base_n, encode_base_n,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
merge_dicts,
parse_duration, parse_duration,
str_to_int, str_to_int,
url_or_none, url_or_none,
@ -25,10 +26,16 @@ class EpornerIE(InfoExtractor):
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video', 'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Infamous Tiffany Teen Strip Tease Video', 'title': 'Infamous Tiffany Teen Strip Tease Video',
'description': 'md5:764f39abf932daafa37485eb46efa152',
'timestamp': 1232520922,
'upload_date': '20090121',
'duration': 1838, 'duration': 1838,
'view_count': int, 'view_count': int,
'age_limit': 18, 'age_limit': 18,
}, },
'params': {
'proxy': '127.0.0.1:8118'
}
}, { }, {
# New (May 2016) URL layout # New (May 2016) URL layout
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
@ -104,12 +111,15 @@ class EpornerIE(InfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
duration = parse_duration(self._html_search_meta('duration', webpage)) json_ld = self._search_json_ld(webpage, display_id, default={})
duration = parse_duration(self._html_search_meta(
'duration', webpage, default=None))
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
r'id="cinemaviews">\s*([0-9,]+)\s*<small>views', r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
return { return merge_dicts(json_ld, {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
@ -117,4 +127,4 @@ class EpornerIE(InfoExtractor):
'view_count': view_count, 'view_count': view_count,
'formats': formats, 'formats': formats,
'age_limit': 18, 'age_limit': 18,
} })

View File

@ -1086,6 +1086,7 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE from .techtalks import TechTalksIE
from .ted import TEDIE from .ted import TEDIE
from .tele5 import Tele5IE
from .tele13 import Tele13IE from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE from .telecinco import TelecincoIE

View File

@ -3,15 +3,45 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import (
compat_b64decode,
compat_str,
compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import ( from ..utils import (
int_or_none,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
str_or_none,
str_to_int, str_to_int,
try_get,
unified_timestamp,
url_or_none,
) )
class FourTubeBaseIE(InfoExtractor): class FourTubeBaseIE(InfoExtractor):
_TKN_HOST = 'tkn.kodicdn.com'
def _extract_formats(self, url, video_id, media_id, sources):
token_url = 'https://%s/%s/desktop/%s' % (
self._TKN_HOST, media_id, '+'.join(sources))
parsed_url = compat_urlparse.urlparse(url)
tokens = self._download_json(token_url, video_id, data=b'', headers={
'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
'Referer': url,
})
formats = [{
'url': tokens[format]['token'],
'format_id': format + 'p',
'resolution': format + 'p',
'quality': int(format),
} for format in sources]
self._sort_formats(formats)
return formats
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
kind, video_id, display_id = mobj.group('kind', 'id', 'display_id') kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
@ -68,21 +98,7 @@ class FourTubeBaseIE(InfoExtractor):
media_id = params[0] media_id = params[0]
sources = ['%s' % p for p in params[2]] sources = ['%s' % p for p in params[2]]
token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format( formats = self._extract_formats(url, video_id, media_id, sources)
media_id, '+'.join(sources))
parsed_url = compat_urlparse.urlparse(url)
tokens = self._download_json(token_url, video_id, data=b'', headers={
'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
'Referer': url,
})
formats = [{
'url': tokens[format]['token'],
'format_id': format + 'p',
'resolution': format + 'p',
'quality': int(format),
} for format in sources]
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
@ -164,6 +180,7 @@ class FuxIE(FourTubeBaseIE):
class PornTubeIE(FourTubeBaseIE): class PornTubeIE(FourTubeBaseIE):
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)' _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
_URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s' _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s'
_TKN_HOST = 'tkn.porntube.com'
_TESTS = [{ _TESTS = [{
'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759', 'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759',
'info_dict': { 'info_dict': {
@ -171,13 +188,32 @@ class PornTubeIE(FourTubeBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Teen couple doing anal', 'title': 'Teen couple doing anal',
'uploader': 'Alexy', 'uploader': 'Alexy',
'uploader_id': 'Alexy', 'uploader_id': '91488',
'upload_date': '20150606', 'upload_date': '20150606',
'timestamp': 1433595647, 'timestamp': 1433595647,
'duration': 5052, 'duration': 5052,
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'categories': list, 'age_limit': 18,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406',
'info_dict': {
'id': '1331406',
'ext': 'mp4',
'title': 'Squirting Teen Ballerina on ECG',
'uploader': 'Exploited College Girls',
'uploader_id': '665',
'channel': 'Exploited College Girls',
'channel_id': '665',
'upload_date': '20130920',
'timestamp': 1379685485,
'duration': 851,
'view_count': int,
'like_count': int,
'age_limit': 18, 'age_limit': 18,
}, },
'params': { 'params': {
@ -191,6 +227,55 @@ class PornTubeIE(FourTubeBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, display_id = mobj.group('id', 'display_id')
webpage = self._download_webpage(url, display_id)
video = self._parse_json(
self._search_regex(
r'INITIALSTATE\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
webpage, 'data', group='value'), video_id,
transform_source=lambda x: compat_urllib_parse_unquote(
compat_b64decode(x).decode('utf-8')))['page']['video']
title = video['title']
media_id = video['mediaId']
sources = [compat_str(e['height'])
for e in video['encodings'] if e.get('height')]
formats = self._extract_formats(url, video_id, media_id, sources)
thumbnail = url_or_none(video.get('masterThumb'))
uploader = try_get(video, lambda x: x['user']['username'], compat_str)
uploader_id = str_or_none(try_get(
video, lambda x: x['user']['id'], int))
channel = try_get(video, lambda x: x['channel']['name'], compat_str)
channel_id = str_or_none(try_get(
video, lambda x: x['channel']['id'], int))
like_count = int_or_none(video.get('likes'))
dislike_count = int_or_none(video.get('dislikes'))
view_count = int_or_none(video.get('playsQty'))
duration = int_or_none(video.get('durationInSeconds'))
timestamp = unified_timestamp(video.get('publishedAt'))
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'uploader': uploader or channel,
'uploader_id': uploader_id or channel_id,
'channel': channel,
'channel_id': channel_id,
'timestamp': timestamp,
'like_count': like_count,
'dislike_count': dislike_count,
'view_count': view_count,
'duration': duration,
'age_limit': 18,
}
class PornerBrosIE(FourTubeBaseIE): class PornerBrosIE(FourTubeBaseIE):
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)' _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'

View File

@ -3112,7 +3112,7 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key()) foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
sharevideos_urls = [mobj.group('url') for mobj in re.finditer( sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1', r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
webpage)] webpage)]
if sharevideos_urls: if sharevideos_urls:

View File

@ -12,7 +12,7 @@ from ..utils import (
class IPrimaIE(InfoExtractor): class IPrimaIE(InfoExtractor):
_VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)' _VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
_GEO_BYPASS = False _GEO_BYPASS = False
_TESTS = [{ _TESTS = [{
@ -33,6 +33,14 @@ class IPrimaIE(InfoExtractor):
# geo restricted # geo restricted
'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1', 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
'only_matching': True, 'only_matching': True,
}, {
# iframe api.play-backend.iprima.cz
'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
'only_matching': True,
}, {
# iframe prima.iprima.cz
'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -42,7 +50,10 @@ class IPrimaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id') video_id = self._search_regex(
(r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
r'data-product="([^"]+)">'),
webpage, 'real id')
playerpage = self._download_webpage( playerpage = self._download_webpage(
'http://play.iprima.cz/prehravac/init', 'http://play.iprima.cz/prehravac/init',

View File

@ -167,9 +167,9 @@ class MotherlessGroupIE(InfoExtractor):
if not entries: if not entries:
entries = [ entries = [
self.url_result( self.url_result(
compat_urlparse.urljoin(base, '/' + video_id), compat_urlparse.urljoin(base, '/' + entry_id),
ie=MotherlessIE.ie_key(), video_id=video_id) ie=MotherlessIE.ie_key(), video_id=entry_id)
for video_id in orderedSet(re.findall( for entry_id in orderedSet(re.findall(
r'data-codename=["\']([A-Z0-9]+)', webpage))] r'data-codename=["\']([A-Z0-9]+)', webpage))]
return entries return entries

View File

@ -7,6 +7,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from .adobepass import AdobePassIE from .adobepass import AdobePassIE
from ..compat import compat_urllib_parse_unquote
from ..utils import ( from ..utils import (
find_xpath_attr, find_xpath_attr,
smuggle_url, smuggle_url,
@ -75,11 +76,16 @@ class NBCIE(AdobePassIE):
'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310', 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
'only_matching': True, 'only_matching': True,
}, },
{
# Percent escaped url
'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
'only_matching': True,
}
] ]
def _real_extract(self, url): def _real_extract(self, url):
permalink, video_id = re.match(self._VALID_URL, url).groups() permalink, video_id = re.match(self._VALID_URL, url).groups()
permalink = 'http' + permalink permalink = 'http' + compat_urllib_parse_unquote(permalink)
response = self._download_json( response = self._download_json(
'https://api.nbc.com/v3/videos', video_id, query={ 'https://api.nbc.com/v3/videos', video_id, query={
'filter[permalink]': permalink, 'filter[permalink]': permalink,

View File

@ -40,6 +40,7 @@ class PornHubIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Seductive Indian beauty strips down and fingers her pink pussy', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
'uploader': 'Babes', 'uploader': 'Babes',
'upload_date': '20130628',
'duration': 361, 'duration': 361,
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
@ -57,6 +58,7 @@ class PornHubIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': '重庆婷婷女王足交', 'title': '重庆婷婷女王足交',
'uploader': 'Unknown', 'uploader': 'Unknown',
'upload_date': '20150213',
'duration': 1753, 'duration': 1753,
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
@ -237,8 +239,14 @@ class PornHubIE(InfoExtractor):
video_urls.append((video_url, None)) video_urls.append((video_url, None))
video_urls_set.add(video_url) video_urls_set.add(video_url)
upload_date = None
formats = [] formats = []
for video_url, height in video_urls: for video_url, height in video_urls:
if not upload_date:
upload_date = self._search_regex(
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
if upload_date:
upload_date = upload_date.replace('/', '')
tbr = None tbr = None
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url) mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
if mobj: if mobj:
@ -278,6 +286,7 @@ class PornHubIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'uploader': video_uploader, 'uploader': video_uploader,
'upload_date': upload_date,
'title': title, 'title': title,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration, 'duration': duration,

View File

@ -164,6 +164,6 @@ class SeznamZpravyArticleIE(InfoExtractor):
description = info.get('description') or self._og_search_description(webpage) description = info.get('description') or self._og_search_description(webpage)
return self.playlist_result([ return self.playlist_result([
self.url_result(url, ie=SeznamZpravyIE.ie_key()) self.url_result(entry_url, ie=SeznamZpravyIE.ie_key())
for url in SeznamZpravyIE._extract_urls(webpage)], for entry_url in SeznamZpravyIE._extract_urls(webpage)],
article_id, title, description) article_id, title, description)

View File

@ -0,0 +1,44 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .nexx import NexxIE
from ..compat import compat_urlparse
class Tele5IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:mediathek|tv)/(?P<id>[^?#&]+)'
_TESTS = [{
'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
'info_dict': {
'id': '1549416',
'ext': 'mp4',
'upload_date': '20180814',
'timestamp': 1534290623,
'title': 'Pandorum',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.tele5.de/tv/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
'only_matching': True,
}, {
'url': 'https://www.tele5.de/tv/dark-matter/videos',
'only_matching': True,
}]
def _real_extract(self, url):
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
if not video_id:
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(
r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)',
webpage, 'video id')
return self.url_result(
'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id,
ie=NexxIE.ie_key(), video_id=video_id)

View File

@ -45,7 +45,7 @@ class Tube8IE(KeezMoviesIE):
r'videoTitle\s*=\s*"([^"]+)', webpage, 'title') r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
description = self._html_search_regex( description = self._html_search_regex(
r'>Description:</strong>\s*(.+?)\s*<', webpage, 'description', fatal=False) r'(?s)Description:</dt>\s*<dd>(.+?)</dd>', webpage, 'description', fatal=False)
uploader = self._html_search_regex( uploader = self._html_search_regex(
r'<span class="username">\s*(.+?)\s*<', r'<span class="username">\s*(.+?)\s*<',
webpage, 'uploader', fatal=False) webpage, 'uploader', fatal=False)
@ -55,19 +55,19 @@ class Tube8IE(KeezMoviesIE):
dislike_count = int_or_none(self._search_regex( dislike_count = int_or_none(self._search_regex(
r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False)) r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
r'<strong>Views: </strong>([\d,\.]+)\s*</li>', r'Views:\s*</dt>\s*<dd>([\d,\.]+)',
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
comment_count = str_to_int(self._search_regex( comment_count = str_to_int(self._search_regex(
r'<span id="allCommentsCount">(\d+)</span>', r'<span id="allCommentsCount">(\d+)</span>',
webpage, 'comment count', fatal=False)) webpage, 'comment count', fatal=False))
category = self._search_regex( category = self._search_regex(
r'Category:\s*</strong>\s*<a[^>]+href=[^>]+>([^<]+)', r'Category:\s*</dt>\s*<dd>\s*<a[^>]+href=[^>]+>([^<]+)',
webpage, 'category', fatal=False) webpage, 'category', fatal=False)
categories = [category] if category else None categories = [category] if category else None
tags_str = self._search_regex( tags_str = self._search_regex(
r'(?s)Tags:\s*</strong>(.+?)</(?!a)', r'(?s)Tags:\s*</dt>\s*<dd>(.+?)</(?!a)',
webpage, 'tags', fatal=False) webpage, 'tags', fatal=False)
tags = [t for t in re.findall( tags = [t for t in re.findall(
r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None

View File

@ -559,7 +559,8 @@ class TwitchStreamIE(TwitchBaseIE):
TwitchAllVideosIE, TwitchAllVideosIE,
TwitchUploadsIE, TwitchUploadsIE,
TwitchPastBroadcastsIE, TwitchPastBroadcastsIE,
TwitchHighlightsIE)) TwitchHighlightsIE,
TwitchClipsIE))
else super(TwitchStreamIE, cls).suitable(url)) else super(TwitchStreamIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
@ -633,7 +634,7 @@ class TwitchStreamIE(TwitchBaseIE):
class TwitchClipsIE(TwitchBaseIE): class TwitchClipsIE(TwitchBaseIE):
IE_NAME = 'twitch:clips' IE_NAME = 'twitch:clips'
_VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat', 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
@ -653,6 +654,9 @@ class TwitchClipsIE(TwitchBaseIE):
# multiple formats # multiple formats
'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy', 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -299,10 +299,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
'uploader_id': 'atencio', 'uploader_id': 'atencio',
'uploader': 'Peter Atencio', 'uploader': 'Peter Atencio',
'channel_id': 'keypeele',
'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele',
'timestamp': 1380339469, 'timestamp': 1380339469,
'upload_date': '20130928', 'upload_date': '20130928',
'duration': 187, 'duration': 187,
}, },
'expected_warnings': ['Unable to download JSON metadata'],
}, },
{ {
'url': 'http://vimeo.com/76979871', 'url': 'http://vimeo.com/76979871',
@ -355,11 +358,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
'url': 'https://vimeo.com/channels/tributes/6213729', 'url': 'https://vimeo.com/channels/tributes/6213729',
'info_dict': { 'info_dict': {
'id': '6213729', 'id': '6213729',
'ext': 'mov', 'ext': 'mp4',
'title': 'Vimeo Tribute: The Shining', 'title': 'Vimeo Tribute: The Shining',
'uploader': 'Casey Donahue', 'uploader': 'Casey Donahue',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
'uploader_id': 'caseydonahue', 'uploader_id': 'caseydonahue',
'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes',
'channel_id': 'tributes',
'timestamp': 1250886430, 'timestamp': 1250886430,
'upload_date': '20090821', 'upload_date': '20090821',
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
@ -465,6 +470,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
if 'Referer' not in headers: if 'Referer' not in headers:
headers['Referer'] = url headers['Referer'] = url
channel_id = self._search_regex(
r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
# Extract ID from URL # Extract ID from URL
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
@ -563,19 +571,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
if config.get('view') == 4: if config.get('view') == 4:
config = self._verify_player_video_password(redirect_url, video_id) config = self._verify_player_video_password(redirect_url, video_id)
vod = config.get('video', {}).get('vod', {})
def is_rented(): def is_rented():
if '>You rented this title.<' in webpage: if '>You rented this title.<' in webpage:
return True return True
if config.get('user', {}).get('purchased'): if config.get('user', {}).get('purchased'):
return True return True
label = try_get( for purchase_option in vod.get('purchase_options', []):
config, lambda x: x['video']['vod']['purchase_options'][0]['label_string'], compat_str) if purchase_option.get('purchased'):
if label and label.startswith('You rented this'): return True
return True label = purchase_option.get('label_string')
if label and (label.startswith('You rented this') or label.endswith(' remaining')):
return True
return False return False
if is_rented(): if is_rented() and vod.get('is_trailer'):
feature_id = config.get('video', {}).get('vod', {}).get('feature_id') feature_id = vod.get('feature_id')
if feature_id and not data.get('force_feature_id', False): if feature_id and not data.get('force_feature_id', False):
return self.url_result(smuggle_url( return self.url_result(smuggle_url(
'https://player.vimeo.com/player/%s' % feature_id, 'https://player.vimeo.com/player/%s' % feature_id,
@ -652,6 +664,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1', r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
webpage, 'license', default=None, group='license') webpage, 'license', default=None, group='license')
channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
info_dict = { info_dict = {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
@ -662,6 +676,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
'like_count': like_count, 'like_count': like_count,
'comment_count': comment_count, 'comment_count': comment_count,
'license': cc_license, 'license': cc_license,
'channel_id': channel_id,
'channel_url': channel_url,
} }
info_dict = merge_dicts(info_dict, info_dict_config, json_ld) info_dict = merge_dicts(info_dict, info_dict_config, json_ld)

View File

@ -4,15 +4,19 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
float_or_none, float_or_none,
unified_timestamp,
url_or_none,
) )
class VzaarIE(InfoExtractor): class VzaarIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)' _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# HTTP and HLS
'url': 'https://vzaar.com/videos/1152805', 'url': 'https://vzaar.com/videos/1152805',
'md5': 'bde5ddfeb104a6c56a93a06b04901dbf', 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
'info_dict': { 'info_dict': {
@ -40,24 +44,48 @@ class VzaarIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( video_data = self._download_json(
'http://view.vzaar.com/v2/%s/video' % video_id, video_id) 'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
source_url = video_data['sourceUrl']
info = { title = video_data['videoTitle']
formats = []
source_url = url_or_none(video_data.get('sourceUrl'))
if source_url:
f = {
'url': source_url,
'format_id': 'http',
}
if 'audio' in source_url:
f.update({
'vcodec': 'none',
'ext': 'mp3',
})
else:
f.update({
'width': int_or_none(video_data.get('width')),
'height': int_or_none(video_data.get('height')),
'ext': 'mp4',
'fps': float_or_none(video_data.get('fps')),
})
formats.append(f)
video_guid = video_data.get('guid')
usp = video_data.get('usp')
if isinstance(video_guid, compat_str) and isinstance(usp, dict):
m3u8_url = ('http://fable.vzaar.com/v4/usp/%s/%s.ism/.m3u8?'
% (video_guid, video_id)) + '&'.join(
'%s=%s' % (k, v) for k, v in usp.items())
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
return {
'id': video_id, 'id': video_id,
'title': video_data['videoTitle'], 'title': title,
'url': source_url,
'thumbnail': self._proto_relative_url(video_data.get('poster')), 'thumbnail': self._proto_relative_url(video_data.get('poster')),
'duration': float_or_none(video_data.get('videoDuration')), 'duration': float_or_none(video_data.get('videoDuration')),
'timestamp': unified_timestamp(video_data.get('ts')),
'formats': formats,
} }
if 'audio' in source_url:
info.update({
'vcodec': 'none',
'ext': 'mp3',
})
else:
info.update({
'width': int_or_none(video_data.get('width')),
'height': int_or_none(video_data.get('height')),
'ext': 'mp4',
})
return info

View File

@ -490,6 +490,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Philipp Hagemeister', 'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag', 'uploader_id': 'phihag',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
'upload_date': '20121002', 'upload_date': '20121002',
'license': 'Standard YouTube License', 'license': 'Standard YouTube License',
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
@ -1907,6 +1909,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else: else:
self._downloader.report_warning('unable to extract uploader nickname') self._downloader.report_warning('unable to extract uploader nickname')
channel_id = self._html_search_meta(
'channelId', video_webpage, 'channel id')
channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
# thumbnail image # thumbnail image
# We try first to get a high quality image: # We try first to get a high quality image:
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">', m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
@ -2078,6 +2084,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': video_uploader, 'uploader': video_uploader,
'uploader_id': video_uploader_id, 'uploader_id': video_uploader_id,
'uploader_url': video_uploader_url, 'uploader_url': video_uploader_url,
'channel_id': channel_id,
'channel_url': channel_url,
'upload_date': upload_date, 'upload_date': upload_date,
'license': video_license, 'license': video_license,
'creator': video_creator or artist, 'creator': video_creator or artist,

View File

@ -2477,7 +2477,7 @@ def parse_codecs(codecs_str):
vcodec, acodec = None, None vcodec, acodec = None, None
for full_codec in splited_codecs: for full_codec in splited_codecs:
codec = full_codec.split('.')[0] codec = full_codec.split('.')[0]
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'): if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'):
if not vcodec: if not vcodec:
vcodec = full_codec vcodec = full_codec
elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2018.09.08' __version__ = '2018.09.10'