mirror of
https://github.com/l1ving/youtube-dl
synced 2025-02-09 06:05:36 +08:00
Merge branch 'master' into Vimeo-issue-16717
This commit is contained in:
commit
1fdb5ec5b1
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.29**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.08.22*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.08.22**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.07.29
|
||||
[debug] youtube-dl version 2018.08.22
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -48,3 +48,6 @@ youtube-dl.zsh
|
||||
|
||||
tmp/
|
||||
venv/
|
||||
|
||||
# VS Code related files
|
||||
.vscode
|
||||
|
36
ChangeLog
36
ChangeLog
@ -1,3 +1,39 @@
|
||||
version 2018.08.22
|
||||
|
||||
Core
|
||||
* [utils] Use pure browser header for User-Agent (#17236)
|
||||
|
||||
Extractors
|
||||
+ [kinopoisk] Add support for kinopoisk.ru (#17283)
|
||||
+ [yourporn] Add support for yourporn.sexy (#17298)
|
||||
+ [go] Add support for disneynow.go.com (#16299, #17264)
|
||||
+ [6play] Add support for play.rtl.hr (#17249)
|
||||
* [anvato] Fallback to generic API key for access-key-to-API-key lookup
|
||||
(#16788, #17254)
|
||||
* [lci] Fix extraction (#17274)
|
||||
* [bbccouk] Extend id URL regular expression (#17270)
|
||||
* [cwtv] Fix extraction (#17256)
|
||||
* [nova] Fix extraction (#17241)
|
||||
+ [generic] Add support for expressen embeds
|
||||
* [raywenderlich] Adapt to site redesign (#17225)
|
||||
+ [redbulltv] Add support redbull.com tv URLs (#17218)
|
||||
+ [bitchute] Add support for bitchute.com (#14052)
|
||||
+ [clyp] Add support for token protected media (#17184)
|
||||
* [imdb] Fix extension extraction (#17167)
|
||||
|
||||
|
||||
version 2018.08.04
|
||||
|
||||
Extractors
|
||||
* [funk:channel] Improve byChannelAlias extraction (#17142)
|
||||
* [twitch] Fix authentication (#17024, #17126)
|
||||
* [twitch:vod] Improve URL regular expression (#17135)
|
||||
* [watchbox] Fix extraction (#17107)
|
||||
* [pbs] Fix extraction (#17109)
|
||||
* [theplatform] Relax URL regular expression (#16181, #17097)
|
||||
+ [viqeo] Add support for viqeo.tv (#17066)
|
||||
|
||||
|
||||
version 2018.07.29
|
||||
|
||||
Extractors
|
||||
|
@ -108,6 +108,8 @@
|
||||
- **BiliBili**
|
||||
- **BioBioChileTV**
|
||||
- **BIQLE**
|
||||
- **BitChute**
|
||||
- **BitChuteChannel**
|
||||
- **BleacherReport**
|
||||
- **BleacherReportCMS**
|
||||
- **blinkx**
|
||||
@ -405,6 +407,7 @@
|
||||
- **Ketnet**
|
||||
- **KhanAcademy**
|
||||
- **KickStarter**
|
||||
- **KinoPoisk**
|
||||
- **KonserthusetPlay**
|
||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||
- **KrasView**: Красвью
|
||||
@ -696,6 +699,7 @@
|
||||
- **RaiPlayLive**
|
||||
- **RaiPlayPlaylist**
|
||||
- **RayWenderlich**
|
||||
- **RayWenderlichCourse**
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBullTV**
|
||||
@ -1001,6 +1005,7 @@
|
||||
- **Vimple**: Vimple - one-click video hosting
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
- **Viqeo**
|
||||
- **Viu**
|
||||
- **viu:ott**
|
||||
- **viu:playlist**
|
||||
@ -1092,6 +1097,7 @@
|
||||
- **YouNowLive**
|
||||
- **YouNowMoment**
|
||||
- **YouPorn**
|
||||
- **YourPorn**
|
||||
- **YourUpload**
|
||||
- **youtube**: YouTube.com
|
||||
- **youtube:channel**: YouTube.com channels
|
||||
|
@ -134,9 +134,33 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
|
||||
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
|
||||
'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
|
||||
'info_dict': {
|
||||
'id': '4465496',
|
||||
'ext': 'mp4',
|
||||
'title': 'VIDEO: Humpback whale breaches right next to NH boat',
|
||||
'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
|
||||
'duration': 22,
|
||||
'timestamp': 1534855680,
|
||||
'upload_date': '20180821',
|
||||
'uploader': 'ANV',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
|
||||
'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AnvatoIE, self).__init__(*args, **kwargs)
|
||||
self.__server_time = None
|
||||
@ -169,7 +193,8 @@ class AnvatoIE(InfoExtractor):
|
||||
'api': {
|
||||
'anvrid': anvrid,
|
||||
'anvstk': md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY))),
|
||||
'anvts': server_time,
|
||||
},
|
||||
}
|
||||
@ -284,5 +309,6 @@ class AnvatoIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
|
||||
if access_key not in self._ANVACK_TABLE:
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
|
||||
access_key) or access_key
|
||||
return self._get_anvato_videos(access_key, video_id)
|
||||
|
@ -29,7 +29,7 @@ from ..compat import (
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_ID_REGEX = r'[pbw][\da-z]{7}'
|
||||
_ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?bbc\.co\.uk/
|
||||
@ -236,6 +236,12 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/programmes/m00005xn',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
118
youtube_dl/extractor/bitchute.py
Normal file
118
youtube_dl/extractor/bitchute.py
Normal file
@ -0,0 +1,118 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import urlencode_postdata
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
|
||||
'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
|
||||
'info_dict': {
|
||||
'id': 'szoMrox2JEI',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fuck bitches get money',
|
||||
'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Victoria X Rave',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.bitchute.com/video/%s' % video_id, video_id)
|
||||
|
||||
title = self._search_regex(
|
||||
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'title',
|
||||
default=None) or self._og_search_description(webpage)
|
||||
|
||||
formats = [
|
||||
{'url': mobj.group('url')}
|
||||
for mobj in re.finditer(
|
||||
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:image:src', webpage, 'thumbnail')
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage,
|
||||
'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class BitChuteChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.bitchute.com/channel/victoriaxrave/',
|
||||
'playlist_mincount': 185,
|
||||
'info_dict': {
|
||||
'id': 'victoriaxrave',
|
||||
},
|
||||
}
|
||||
|
||||
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
||||
|
||||
def _entries(self, channel_id):
|
||||
channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
|
||||
offset = 0
|
||||
for page_num in itertools.count(1):
|
||||
data = self._download_json(
|
||||
'%sextend/' % channel_url, channel_id,
|
||||
'Downloading channel page %d' % page_num,
|
||||
data=urlencode_postdata({
|
||||
'csrfmiddlewaretoken': self._TOKEN,
|
||||
'name': '',
|
||||
'offset': offset,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': channel_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Cookie': 'csrftoken=%s' % self._TOKEN,
|
||||
})
|
||||
if data.get('success') is False:
|
||||
break
|
||||
html = data.get('html')
|
||||
if not html:
|
||||
break
|
||||
video_ids = re.findall(
|
||||
r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
|
||||
html)
|
||||
if not video_ids:
|
||||
break
|
||||
offset += len(video_ids)
|
||||
for video_id in video_ids:
|
||||
yield self.url_result(
|
||||
'https://www.bitchute.com/video/%s' % video_id,
|
||||
ie=BitChuteIE.ie_key(), video_id=video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
return self.playlist_result(
|
||||
self._entries(channel_id), playlist_id=channel_id)
|
@ -1,15 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class ClypIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://clyp.it/ojz2wfah',
|
||||
'md5': '1d4961036c41247ecfdcc439c0cddcbb',
|
||||
'info_dict': {
|
||||
@ -21,13 +25,34 @@ class ClypIE(InfoExtractor):
|
||||
'timestamp': 1443515251,
|
||||
'upload_date': '20150929',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d',
|
||||
'info_dict': {
|
||||
'id': 'b04p1odi',
|
||||
'ext': 'mp3',
|
||||
'title': 'GJ! (Reward Edit)',
|
||||
'description': 'Metal Resistance (THE ONE edition)',
|
||||
'duration': 177.789,
|
||||
'timestamp': 1528241278,
|
||||
'upload_date': '20180605',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
token = qs.get('token', [None])[0]
|
||||
|
||||
query = {}
|
||||
if token:
|
||||
query['token'] = token
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.clyp.it/%s' % audio_id, audio_id)
|
||||
'https://api.clyp.it/%s' % audio_id, audio_id, query=query)
|
||||
|
||||
formats = []
|
||||
for secure in ('', 'Secure'):
|
||||
@ -45,7 +70,7 @@ class ClypIE(InfoExtractor):
|
||||
title = metadata['Title']
|
||||
description = metadata.get('Description')
|
||||
duration = float_or_none(metadata.get('Duration'))
|
||||
timestamp = parse_iso8601(metadata.get('DateCreated'))
|
||||
timestamp = unified_timestamp(metadata.get('DateCreated'))
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
|
@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -40,10 +43,15 @@ class CWTVIE(InfoExtractor):
|
||||
'duration': 1263,
|
||||
'series': 'Whose Line Is It Anyway?',
|
||||
'season_number': 11,
|
||||
'season': '11',
|
||||
'episode_number': 20,
|
||||
'upload_date': '20151006',
|
||||
'timestamp': 1444107300,
|
||||
'age_limit': 14,
|
||||
'uploader': 'CWTV',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
|
||||
@ -58,60 +66,28 @@ class CWTVIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = None
|
||||
formats = []
|
||||
for partner in (154, 213):
|
||||
vdata = self._download_json(
|
||||
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False)
|
||||
if not vdata:
|
||||
continue
|
||||
video_data = vdata
|
||||
for quality, quality_data in vdata.get('videos', {}).items():
|
||||
quality_url = quality_data.get('uri')
|
||||
if not quality_url:
|
||||
continue
|
||||
if quality == 'variantplaylist':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
tbr = int_or_none(quality_data.get('bitrate'))
|
||||
format_id = 'http' + ('-%d' % tbr if tbr else '')
|
||||
if self._is_valid_url(quality_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': quality_url,
|
||||
'tbr': tbr,
|
||||
})
|
||||
video_metadata = video_data['assetFields']
|
||||
ism_url = video_metadata.get('smoothStreamingUrl')
|
||||
if ism_url:
|
||||
formats.extend(self._extract_ism_formats(
|
||||
ism_url, video_id, ism_id='mss', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
video_data = self._download_json(
|
||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
||||
video_id)['video']
|
||||
title = video_data['title']
|
||||
mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
|
||||
|
||||
thumbnails = [{
|
||||
'url': image['uri'],
|
||||
'width': image.get('width'),
|
||||
'height': image.get('height'),
|
||||
} for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
|
||||
|
||||
subtitles = {
|
||||
'en': [{
|
||||
'url': video_metadata['UnicornCcUrl'],
|
||||
}],
|
||||
} if video_metadata.get('UnicornCcUrl') else None
|
||||
season = str_or_none(video_data.get('season'))
|
||||
episode = str_or_none(video_data.get('episode'))
|
||||
if episode and season:
|
||||
episode = episode.lstrip(season)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': video_metadata['title'],
|
||||
'description': video_metadata.get('description'),
|
||||
'duration': int_or_none(video_metadata.get('duration')),
|
||||
'series': video_metadata.get('seriesName'),
|
||||
'season_number': int_or_none(video_metadata.get('seasonNumber')),
|
||||
'season': video_metadata.get('seasonName'),
|
||||
'episode_number': int_or_none(video_metadata.get('episodeNumber')),
|
||||
'timestamp': parse_iso8601(video_data.get('startTime')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': title,
|
||||
'url': smuggle_url(mpx_url, {'force_smil_url': True}),
|
||||
'description': video_data.get('description_long'),
|
||||
'duration': int_or_none(video_data.get('duration_secs')),
|
||||
'series': video_data.get('series_name'),
|
||||
'season_number': int_or_none(season),
|
||||
'episode_number': int_or_none(episode),
|
||||
'timestamp': parse_iso8601(video_data.get('start_time')),
|
||||
'age_limit': parse_age_limit(video_data.get('rating')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
@ -11,7 +13,13 @@ from ..utils import (
|
||||
|
||||
|
||||
class ExpressenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?expressen\.se/
|
||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
||||
tv/(?:[^/]+/)*
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
|
||||
'md5': '2fbbe3ca14392a6b1b36941858d33a45',
|
||||
@ -28,8 +36,21 @@ class ExpressenIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url') for mobj in re.finditer(
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
|
@ -118,6 +118,10 @@ from .bilibili import (
|
||||
BiliBiliBangumiIE,
|
||||
)
|
||||
from .biobiochiletv import BioBioChileTVIE
|
||||
from .bitchute import (
|
||||
BitChuteIE,
|
||||
BitChuteChannelIE,
|
||||
)
|
||||
from .biqle import BIQLEIE
|
||||
from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
@ -516,6 +520,7 @@ from .keezmovies import KeezMoviesIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .keek import KeekIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
@ -736,7 +741,10 @@ from .nonktube import NonkTubeIE
|
||||
from .noovo import NoovoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
from .nova import NovaIE
|
||||
from .nova import (
|
||||
NovaEmbedIE,
|
||||
NovaIE,
|
||||
)
|
||||
from .novamov import (
|
||||
AuroraVidIE,
|
||||
CloudTimeIE,
|
||||
@ -895,7 +903,10 @@ from .rai import (
|
||||
RaiPlayPlaylistIE,
|
||||
RaiIE,
|
||||
)
|
||||
from .raywenderlich import RayWenderlichIE
|
||||
from .raywenderlich import (
|
||||
RayWenderlichIE,
|
||||
RayWenderlichCourseIE,
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import RedBullTVIE
|
||||
@ -1170,6 +1181,7 @@ from .tvp import (
|
||||
from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
TVPlayHomeIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tweakers import TweakersIE
|
||||
@ -1291,6 +1303,7 @@ from .viki import (
|
||||
VikiIE,
|
||||
VikiChannelIE,
|
||||
)
|
||||
from .viqeo import ViqeoIE
|
||||
from .viu import (
|
||||
ViuIE,
|
||||
ViuPlaylistIE,
|
||||
@ -1416,6 +1429,7 @@ from .younow import (
|
||||
YouNowMomentIE,
|
||||
)
|
||||
from .youporn import YouPornIE
|
||||
from .yourporn import YourPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
|
@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -125,17 +126,31 @@ class FunkChannelIE(FunkBaseIE):
|
||||
# Id-based channels are currently broken on their side: webplayer
|
||||
# tries to process them via byChannelAlias endpoint and fails
|
||||
# predictably.
|
||||
by_channel_alias = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
|
||||
% channel_id,
|
||||
'Downloading byChannelAlias JSON', headers=headers, query={
|
||||
'size': 100,
|
||||
}, fatal=False)
|
||||
if by_channel_alias:
|
||||
for page_num in itertools.count():
|
||||
by_channel_alias = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
|
||||
% channel_id,
|
||||
'Downloading byChannelAlias JSON page %d' % (page_num + 1),
|
||||
headers=headers, query={
|
||||
'filterFsk': 'false',
|
||||
'sort': 'creationDate,desc',
|
||||
'size': 100,
|
||||
'page': page_num,
|
||||
}, fatal=False)
|
||||
if not by_channel_alias:
|
||||
break
|
||||
video_list = try_get(
|
||||
by_channel_alias, lambda x: x['_embedded']['videoList'], list)
|
||||
if video_list:
|
||||
if not video_list:
|
||||
break
|
||||
try:
|
||||
video = next(r for r in video_list if r.get('alias') == alias)
|
||||
break
|
||||
except StopIteration:
|
||||
pass
|
||||
if not try_get(
|
||||
by_channel_alias, lambda x: x['_links']['next']):
|
||||
break
|
||||
|
||||
if not video:
|
||||
by_id_list = self._download_json(
|
||||
|
@ -32,7 +32,6 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from .commonprotocols import RtmpIE
|
||||
@ -113,6 +112,8 @@ from .peertube import PeerTubeIE
|
||||
from .indavideo import IndavideoEmbedIE
|
||||
from .apa import APAIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .expressen import ExpressenIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -2060,6 +2061,30 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'TODO: fix nested playlists processing in tests',
|
||||
},
|
||||
{
|
||||
# Viqeo embeds
|
||||
'url': 'https://viqeo.tv/',
|
||||
'info_dict': {
|
||||
'id': 'viqeo',
|
||||
'title': 'All-new video platform',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
},
|
||||
{
|
||||
# videojs embed
|
||||
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
||||
'info_dict': {
|
||||
'id': 'shell',
|
||||
'ext': 'mp4',
|
||||
'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
|
||||
'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@ -3094,6 +3119,16 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
sharevideos_urls, video_id, video_title)
|
||||
|
||||
viqeo_urls = ViqeoIE._extract_urls(webpage)
|
||||
if viqeo_urls:
|
||||
return self.playlist_from_matches(
|
||||
viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
|
||||
|
||||
expressen_urls = ExpressenIE._extract_urls(webpage)
|
||||
if expressen_urls:
|
||||
return self.playlist_from_matches(
|
||||
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
@ -3131,8 +3166,8 @@ class GenericIE(InfoExtractor):
|
||||
sources = [sources]
|
||||
formats = []
|
||||
for source in sources:
|
||||
src = url_or_none(source.get('src'))
|
||||
if not src:
|
||||
src = source.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
continue
|
||||
src = compat_urlparse.urljoin(url, src)
|
||||
src_type = source.get('type')
|
||||
|
@ -36,7 +36,8 @@ class GoIE(AdobePassIE):
|
||||
'requestor_id': 'DisneyXD',
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
|
||||
% '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
|
||||
'info_dict': {
|
||||
@ -62,6 +63,14 @@ class GoIE(AdobePassIE):
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# brand 004
|
||||
'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# brand 008
|
||||
'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_videos(self, brand, video_id='-1', show_id='-1'):
|
||||
@ -72,14 +81,23 @@ class GoIE(AdobePassIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
site_info = self._SITE_INFO[sub_domain]
|
||||
brand = site_info['brand']
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
site_info = self._SITE_INFO.get(sub_domain, {})
|
||||
brand = site_info.get('brand')
|
||||
if not video_id or not site_info:
|
||||
webpage = self._download_webpage(url, display_id or video_id)
|
||||
video_id = self._search_regex(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None)
|
||||
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id',
|
||||
default=None)
|
||||
if not site_info:
|
||||
brand = self._search_regex(
|
||||
(r'data-brand=\s*["\']\s*(\d+)',
|
||||
r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
|
||||
default='004')
|
||||
site_info = next(
|
||||
si for _, si in self._SITE_INFO.items()
|
||||
if si.get('brand') == brand)
|
||||
if not video_id:
|
||||
# show extraction works for Disney, DisneyJunior and DisneyXD
|
||||
# ABC and Freeform has different layout
|
||||
|
@ -64,7 +64,8 @@ class ImdbIE(InfoExtractor):
|
||||
video_url = url_or_none(encoding.get('videoUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
|
||||
ext = mimetype2ext(encoding.get(
|
||||
'mimeType')) or determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
|
70
youtube_dl/extractor/kinopoisk.py
Normal file
70
youtube_dl/extractor/kinopoisk.py
Normal file
@ -0,0 +1,70 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class KinoPoiskIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['RU']
|
||||
_VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kinopoisk.ru/film/81041/watch/',
|
||||
'md5': '4f71c80baea10dfa54a837a46111d326',
|
||||
'info_dict': {
|
||||
'id': '81041',
|
||||
'ext': 'mp4',
|
||||
'title': 'Алеша попович и тугарин змей',
|
||||
'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 4533,
|
||||
'age_limit': 12,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kinopoisk.ru/film/81041',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
|
||||
query={'kpId': video_id})
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
|
||||
webpage, 'data'),
|
||||
video_id)['models']
|
||||
|
||||
film = data['filmStatus']
|
||||
title = film.get('title') or film['originalTitle']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
data['playlistEntity']['uri'], video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = dict_get(
|
||||
film, ('descriptscription', 'description',
|
||||
'shortDescriptscription', 'shortDescription'))
|
||||
thumbnail = film.get('coverUrl') or film.get('posterUrl')
|
||||
duration = int_or_none(film.get('duration'))
|
||||
age_limit = int_or_none(film.get('restrictionAge'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
@ -20,5 +20,7 @@ class LCIIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
wat_id = self._search_regex(r'data-watid=[\'"](\d+)', webpage, 'wat id')
|
||||
wat_id = self._search_regex(
|
||||
(r'data-watid=[\'"](\d+)', r'idwat["\']?\s*:\s*["\']?(\d+)'),
|
||||
webpage, 'wat id')
|
||||
return self.url_result('wat:' + wat_id, 'Wat', wat_id)
|
||||
|
@ -6,28 +6,90 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NovaEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
|
||||
'md5': 'b3834f6de5401baabf31ed57456463f7',
|
||||
'info_dict': {
|
||||
'id': '8o0n0r',
|
||||
'ext': 'mp4',
|
||||
'title': '2180. díl',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2578,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
bitrates = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||
quality_key = qualities(QUALITIES)
|
||||
|
||||
formats = []
|
||||
for format_id, format_list in bitrates.items():
|
||||
if not isinstance(format_list, list):
|
||||
continue
|
||||
for format_url in format_list:
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
f = {
|
||||
'url': format_url,
|
||||
}
|
||||
f_id = format_id
|
||||
for quality in QUALITIES:
|
||||
if '%s.mp4' % quality in format_url:
|
||||
f_id += '-%s' % quality
|
||||
f.update({
|
||||
'quality': quality_key(quality),
|
||||
'format_note': quality.upper(),
|
||||
})
|
||||
break
|
||||
f['format_id'] = f_id
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._search_regex(
|
||||
(r'<value>(?P<title>[^<]+)',
|
||||
r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'title', group='value')
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._search_regex(
|
||||
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', fatal=False, group='value')
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class NovaIE(InfoExtractor):
|
||||
IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus',
|
||||
'info_dict': {
|
||||
'id': '1608920',
|
||||
'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou',
|
||||
'ext': 'flv',
|
||||
'title': 'Duel: Michal Hrdlička a Petr Suchoň',
|
||||
'description': 'md5:d0cc509858eee1b1374111c588c6f5d5',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
|
||||
'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
|
||||
'info_dict': {
|
||||
@ -38,33 +100,6 @@ class NovaIE(InfoExtractor):
|
||||
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove',
|
||||
'info_dict': {
|
||||
'id': '1756825',
|
||||
'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove',
|
||||
'ext': 'flv',
|
||||
'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově',
|
||||
'description': 'md5:dc24e50be5908df83348e50d1431295e', # Make sure this description is clean of html tags
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://novaplus.nova.cz/porad/televizni-noviny/video/5585-televizni-noviny-30-5-2015/',
|
||||
'info_dict': {
|
||||
'id': '1756858',
|
||||
'ext': 'flv',
|
||||
'title': 'Televizní noviny - 30. 5. 2015',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
'upload_date': '20150530',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
|
||||
'info_dict': {
|
||||
@ -79,6 +114,20 @@ class NovaIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# media.cms.nova.cz embed
|
||||
'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
|
||||
'info_dict': {
|
||||
'id': '8o0n0r',
|
||||
'ext': 'mp4',
|
||||
'title': '2180. díl',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2578,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [NovaEmbedIE.ie_key()],
|
||||
}, {
|
||||
'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
|
||||
'only_matching': True,
|
||||
@ -103,6 +152,15 @@ class NovaIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
# novaplus
|
||||
embed_id = self._search_regex(
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
|
||||
webpage, 'embed url', default=None)
|
||||
if embed_id:
|
||||
return self.url_result(
|
||||
'https://media.cms.nova.cz/embed/%s' % embed_id,
|
||||
ie=NovaEmbedIE.ie_key(), video_id=embed_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
[r"(?:media|video_id)\s*:\s*'(\d+)'",
|
||||
r'media=(\d+)',
|
||||
@ -111,8 +169,21 @@ class NovaIE(InfoExtractor):
|
||||
webpage, 'video id')
|
||||
|
||||
config_url = self._search_regex(
|
||||
r'src="(http://tn\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
|
||||
r'src="(https?://(?:tn|api)\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
|
||||
webpage, 'config url', default=None)
|
||||
config_params = {}
|
||||
|
||||
if not config_url:
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)Player\s*\(.+?\s*,\s*({.+?\bmedia\b["\']?\s*:\s*["\']?\d+.+?})\s*\)', webpage,
|
||||
'player', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
if player:
|
||||
config_url = url_or_none(player.get('configUrl'))
|
||||
params = player.get('configParams')
|
||||
if isinstance(params, dict):
|
||||
config_params = params
|
||||
|
||||
if not config_url:
|
||||
DEFAULT_SITE_ID = '23000'
|
||||
@ -127,14 +198,20 @@ class NovaIE(InfoExtractor):
|
||||
}
|
||||
|
||||
site_id = self._search_regex(
|
||||
r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(site, DEFAULT_SITE_ID)
|
||||
r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(
|
||||
site, DEFAULT_SITE_ID)
|
||||
|
||||
config_url = ('http://tn.nova.cz/bin/player/videojs/config.php?site=%s&media=%s&jsVar=vjsconfig'
|
||||
% (site_id, video_id))
|
||||
config_url = 'https://api.nova.cz/bin/player/videojs/config.php'
|
||||
config_params = {
|
||||
'site': site_id,
|
||||
'media': video_id,
|
||||
'quality': 3,
|
||||
'version': 1,
|
||||
}
|
||||
|
||||
config = self._download_json(
|
||||
config_url, display_id,
|
||||
'Downloading config JSON',
|
||||
'Downloading config JSON', query=config_params,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
|
||||
|
||||
mediafile = config['mediafile']
|
||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
||||
strip_jsonp,
|
||||
strip_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
US_RATINGS,
|
||||
)
|
||||
|
||||
@ -557,6 +558,13 @@ class PBSIE(InfoExtractor):
|
||||
if redirect_url and redirect_url not in redirect_urls:
|
||||
redirects.append(redirect)
|
||||
redirect_urls.add(redirect_url)
|
||||
encodings = info.get('encodings')
|
||||
if isinstance(encodings, list):
|
||||
for encoding in encodings:
|
||||
encoding_url = url_or_none(encoding)
|
||||
if encoding_url and encoding_url not in redirect_urls:
|
||||
redirects.append({'url': encoding_url})
|
||||
redirect_urls.add(encoding_url)
|
||||
|
||||
chapters = []
|
||||
# Player pages may also serve different qualities
|
||||
|
@ -4,24 +4,37 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class RayWenderlichIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P<course_id>[^/]+)/lessons/(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
videos\.raywenderlich\.com/courses|
|
||||
(?:www\.)?raywenderlich\.com
|
||||
)/
|
||||
(?P<course_id>[^/]+)/lessons/(?P<id>\d+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
|
||||
'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1',
|
||||
'info_dict': {
|
||||
'id': '248377018',
|
||||
'ext': 'mp4',
|
||||
'title': 'Testing In iOS Episode 1: Introduction',
|
||||
'title': 'Introduction',
|
||||
'description': 'md5:804d031b3efa9fcb49777d512d74f722',
|
||||
'timestamp': 1513906277,
|
||||
'upload_date': '20171222',
|
||||
'duration': 133,
|
||||
'uploader': 'Ray Wenderlich',
|
||||
'uploader_id': 'user3304672',
|
||||
@ -34,69 +47,133 @@ class RayWenderlichIE(InfoExtractor):
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}, {
|
||||
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_video_id(data, lesson_id):
|
||||
if not data:
|
||||
return
|
||||
groups = try_get(data, lambda x: x['groups'], list) or []
|
||||
if not groups:
|
||||
return
|
||||
for group in groups:
|
||||
if not isinstance(group, dict):
|
||||
continue
|
||||
contents = try_get(data, lambda x: x['contents'], list) or []
|
||||
for content in contents:
|
||||
if not isinstance(content, dict):
|
||||
continue
|
||||
ordinal = int_or_none(content.get('ordinal'))
|
||||
if ordinal != lesson_id:
|
||||
continue
|
||||
video_id = content.get('identifier')
|
||||
if video_id:
|
||||
return compat_str(video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_id, lesson_id = mobj.group('course_id', 'id')
|
||||
display_id = '%s/%s' % (course_id, lesson_id)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:image', webpage, 'thumbnail')
|
||||
|
||||
if '>Subscribe to unlock' in webpage:
|
||||
raise ExtractorError(
|
||||
'This content is only available for subscribers',
|
||||
expected=True)
|
||||
|
||||
info = {
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
vimeo_id = self._search_regex(
|
||||
r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None)
|
||||
|
||||
if not vimeo_id:
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
|
||||
'data collection', default='{}', group='data'),
|
||||
display_id, transform_source=unescapeHTML, fatal=False)
|
||||
video_id = self._extract_video_id(
|
||||
data, lesson_id) or self._search_regex(
|
||||
r'/videos/(\d+)/', thumbnail, 'video id')
|
||||
headers = {
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}
|
||||
csrf_token = self._html_search_meta(
|
||||
'csrf-token', webpage, 'csrf token', default=None)
|
||||
if csrf_token:
|
||||
headers['X-CSRF-Token'] = csrf_token
|
||||
video = self._download_json(
|
||||
'https://videos.raywenderlich.com/api/v1/videos/%s.json'
|
||||
% video_id, display_id, headers=headers)['video']
|
||||
vimeo_id = video['clips'][0]['provider_id']
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'title': video.get('name'),
|
||||
'description': video.get('description') or video.get(
|
||||
'meta_description'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': unified_timestamp(video.get('created_at')),
|
||||
})
|
||||
|
||||
return merge_dicts(info, self.url_result(
|
||||
VimeoIE._smuggle_referrer(
|
||||
'https://player.vimeo.com/video/%s' % vimeo_id, url),
|
||||
ie=VimeoIE.ie_key(), video_id=vimeo_id))
|
||||
|
||||
|
||||
class RayWenderlichCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
videos\.raywenderlich\.com/courses|
|
||||
(?:www\.)?raywenderlich\.com
|
||||
)/
|
||||
(?P<id>[^/]+)
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.raywenderlich.com/3530-testing-in-ios',
|
||||
'info_dict': {
|
||||
'title': 'Testing in iOS',
|
||||
'id': '105-testing-in-ios',
|
||||
'id': '3530-testing-in-ios',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': False,
|
||||
},
|
||||
'playlist_count': 29,
|
||||
}]
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if RayWenderlichIE.suitable(url) else super(
|
||||
RayWenderlichCourseIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
course_id = self._match_id(url)
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_id, lesson_id = mobj.group('course_id', 'id')
|
||||
video_id = '%s/%s' % (course_id, lesson_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
no_playlist = self._downloader.params.get('noplaylist')
|
||||
if no_playlist or smuggled_data.get('force_video', False):
|
||||
if no_playlist:
|
||||
self.to_screen(
|
||||
'Downloading just video %s because of --no-playlist'
|
||||
% video_id)
|
||||
if '>Subscribe to unlock' in webpage:
|
||||
raise ExtractorError(
|
||||
'This content is only available for subscribers',
|
||||
expected=True)
|
||||
vimeo_id = self._search_regex(
|
||||
r'data-vimeo-id=["\'](\d+)', webpage, 'video id')
|
||||
return self.url_result(
|
||||
VimeoIE._smuggle_referrer(
|
||||
'https://player.vimeo.com/video/%s' % vimeo_id, url),
|
||||
ie=VimeoIE.ie_key(), video_id=vimeo_id)
|
||||
|
||||
self.to_screen(
|
||||
'Downloading playlist %s - add --no-playlist to just download video'
|
||||
% course_id)
|
||||
|
||||
lesson_ids = set((lesson_id, ))
|
||||
for lesson in re.findall(
|
||||
r'(<a[^>]+\bclass=["\']lesson-link[^>]+>)', webpage):
|
||||
attrs = extract_attributes(lesson)
|
||||
if not attrs:
|
||||
continue
|
||||
lesson_url = attrs.get('href')
|
||||
if not lesson_url:
|
||||
continue
|
||||
lesson_id = self._search_regex(
|
||||
r'/lessons/(\d+)', lesson_url, 'lesson id', default=None)
|
||||
if not lesson_id:
|
||||
continue
|
||||
lesson_ids.add(lesson_id)
|
||||
webpage = self._download_webpage(url, course_id)
|
||||
|
||||
entries = []
|
||||
for lesson_id in sorted(lesson_ids):
|
||||
lesson_urls = set()
|
||||
for lesson_url in re.findall(
|
||||
r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage):
|
||||
if lesson_url in lesson_urls:
|
||||
continue
|
||||
lesson_urls.add(lesson_url)
|
||||
entries.append(self.url_result(
|
||||
smuggle_url(urljoin(url, lesson_id), {'force_video': True}),
|
||||
ie=RayWenderlichIE.ie_key()))
|
||||
urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key()))
|
||||
|
||||
title = self._search_regex(
|
||||
r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title',
|
||||
default=None)
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', default=None)
|
||||
|
||||
return self.playlist_result(entries, course_id, title)
|
||||
|
@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class RedBullTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?P<id>AP-\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com/(?:[^/]+/)?tv)/video/(?P<id>AP-\w+)'
|
||||
_TESTS = [{
|
||||
# film
|
||||
'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
|
||||
@ -35,6 +35,9 @@ class RedBullTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -19,7 +19,7 @@ from ..utils import (
|
||||
|
||||
class SixPlayIE(InfoExtractor):
|
||||
IE_NAME = '6play'
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay.be)/.+?-c_)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr)/.+?-c_)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
|
||||
'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
|
||||
@ -32,6 +32,9 @@ class SixPlayIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -39,6 +42,7 @@ class SixPlayIE(InfoExtractor):
|
||||
service, consumer_name = {
|
||||
'6play.fr': ('6play', 'm6web'),
|
||||
'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
|
||||
'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'),
|
||||
}.get(domain, ('6play', 'm6web'))
|
||||
|
||||
data = self._download_json(
|
||||
|
@ -310,7 +310,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
|
||||
class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
_URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
|
||||
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))'
|
||||
_VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[^&]+))'
|
||||
_TESTS = [{
|
||||
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
|
||||
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
|
||||
@ -327,6 +327,9 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
||||
'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
|
||||
'uploader': 'NBCU-NEWS',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byGuid=nn_netcast_180306.Copy.01',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
|
||||
|
@ -32,12 +32,12 @@ class TVPlayIE(InfoExtractor):
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
tvplay(?:\.skaties)?\.lv/parraides|
|
||||
(?:tv3play|play\.tv3)\.lt/programos|
|
||||
tvplay(?:\.skaties)?\.lv(?:/parraides)?|
|
||||
(?:tv3play|play\.tv3)\.lt(?:/programos)?|
|
||||
tv3play(?:\.tv3)?\.ee/sisu|
|
||||
(?:tv(?:3|6|8|10)play|viafree)\.se/program|
|
||||
(?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
|
||||
play\.novatv\.bg/programi
|
||||
play\.nova(?:tv)?\.bg/programi
|
||||
)
|
||||
/(?:[^/]+/)+
|
||||
)
|
||||
@ -203,10 +203,18 @@ class TVPlayIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://play.nova.bg/programi/zdravei-bulgariya/764300?autostart=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/418113/?autostart=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# views is null
|
||||
'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
|
||||
@ -288,6 +296,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': m.group('url'),
|
||||
'app': m.group('app'),
|
||||
'play_path': m.group('playpath'),
|
||||
'preference': -1,
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
@ -447,3 +456,102 @@ class ViafreeIE(InfoExtractor):
|
||||
'skip_rtmp': True,
|
||||
}),
|
||||
ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class TVPlayHomeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tvplay\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/[^/]+/[^/?#&]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
|
||||
'info_dict': {
|
||||
'id': '366367',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aferistai',
|
||||
'description': 'Aferistai. Kalėdinė pasaka.',
|
||||
'series': 'Aferistai [N-7]',
|
||||
'season': '1 sezonas',
|
||||
'season_number': 1,
|
||||
'duration': 464,
|
||||
'timestamp': 1394209658,
|
||||
'upload_date': '20140307',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [TVPlayIE.ie_key()],
|
||||
}, {
|
||||
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id',
|
||||
default=None)
|
||||
|
||||
if video_id:
|
||||
return self.url_result(
|
||||
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'data-file\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'm3u8 url', group='url')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
r'data-title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'title', default=None, group='value') or self._html_search_meta(
|
||||
'title', webpage, default=None) or self._og_search_title(
|
||||
webpage)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'description', webpage,
|
||||
default=None) or self._og_search_description(webpage)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'data-image\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', default=None, group='url') or self._html_search_meta(
|
||||
'thumbnail', webpage, default=None) or self._og_search_thumbnail(
|
||||
webpage)
|
||||
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'data-duration\s*=\s*["\'](\d+)', webpage, 'duration',
|
||||
fatal=False))
|
||||
|
||||
season = self._search_regex(
|
||||
(r'data-series-title\s*=\s*(["\'])[^/]+/(?P<value>(?:(?!\1).)+)\1',
|
||||
r'\bseason\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'season', default=None, group='value')
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
|
||||
default=None))
|
||||
episode = self._search_regex(
|
||||
r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode',
|
||||
default=None, group='value')
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
|
||||
default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -4,10 +4,10 @@ from __future__ import unicode_literals
|
||||
import itertools
|
||||
import re
|
||||
import random
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_kwargs,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
@ -26,7 +26,6 @@ from ..utils import (
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
@ -37,8 +36,9 @@ class TwitchBaseIE(InfoExtractor):
|
||||
|
||||
_API_BASE = 'https://api.twitch.tv'
|
||||
_USHER_BASE = 'https://usher.ttvnw.net'
|
||||
_LOGIN_URL = 'https://www.twitch.tv/login'
|
||||
_CLIENT_ID = 'jzkbprff40iqj646a697cyrvl0zt2m6'
|
||||
_LOGIN_FORM_URL = 'https://www.twitch.tv/login'
|
||||
_LOGIN_POST_URL = 'https://passport.twitch.tv/login'
|
||||
_CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
|
||||
_NETRC_MACHINE = 'twitch'
|
||||
|
||||
def _handle_error(self, response):
|
||||
@ -77,22 +77,21 @@ class TwitchBaseIE(InfoExtractor):
|
||||
page_url = urlh.geturl()
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
|
||||
'post url', default=page_url, group='url')
|
||||
'post url', default=self._LOGIN_POST_URL, group='url')
|
||||
post_url = urljoin(page_url, post_url)
|
||||
|
||||
headers = {'Referer': page_url}
|
||||
headers = {
|
||||
'Referer': page_url,
|
||||
'Origin': page_url,
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
}
|
||||
|
||||
try:
|
||||
response = self._download_json(
|
||||
post_url, None, note,
|
||||
data=urlencode_postdata(form),
|
||||
headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
response = self._parse_json(
|
||||
e.cause.read().decode('utf-8'), None)
|
||||
fail(response.get('message') or response['errors'][0])
|
||||
raise
|
||||
response = self._download_json(
|
||||
post_url, None, note, data=json.dumps(form).encode(),
|
||||
headers=headers, expected_status=400)
|
||||
error = response.get('error_description') or response.get('error_code')
|
||||
if error:
|
||||
fail(error)
|
||||
|
||||
if 'Authenticated successfully' in response.get('message', ''):
|
||||
return None, None
|
||||
@ -105,7 +104,7 @@ class TwitchBaseIE(InfoExtractor):
|
||||
headers=headers)
|
||||
|
||||
login_page, handle = self._download_webpage_handle(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
self._LOGIN_FORM_URL, None, 'Downloading login page')
|
||||
|
||||
# Some TOR nodes and public proxies are blocked completely
|
||||
if 'blacklist_message' in login_page:
|
||||
@ -115,6 +114,7 @@ class TwitchBaseIE(InfoExtractor):
|
||||
login_page, handle, 'Logging in', {
|
||||
'username': username,
|
||||
'password': password,
|
||||
'client_id': self._CLIENT_ID,
|
||||
})
|
||||
|
||||
# Successful login
|
||||
@ -240,7 +240,7 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v|videos)/|
|
||||
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
|
||||
player\.twitch\.tv/\?.*?\bvideo=v
|
||||
)
|
||||
(?P<id>\d+)
|
||||
@ -296,6 +296,9 @@ class TwitchVodIE(TwitchItemBaseIE):
|
||||
}, {
|
||||
'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.twitch.tv/northernlion/video/291940395',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class VidziIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://vidzi.tv/cghql9yq6emu.html',
|
||||
'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
|
||||
@ -35,6 +35,9 @@ class VidziIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://vidzi.si/rph9gztxj1et.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vidzi.nu/cghql9yq6emu.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
99
youtube_dl/extractor/viqeo.py
Normal file
99
youtube_dl/extractor/viqeo.py
Normal file
@ -0,0 +1,99 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ViqeoIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
viqeo:|
|
||||
https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
|
||||
https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
|
||||
)
|
||||
(?P<id>[\da-f]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
|
||||
'md5': 'a169dd1a6426b350dca4296226f21e76',
|
||||
'info_dict': {
|
||||
'id': 'cde96f09d25f39bee837',
|
||||
'ext': 'mp4',
|
||||
'title': 'cde96f09d25f39bee837',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 76,
|
||||
},
|
||||
}, {
|
||||
'url': 'viqeo:cde96f09d25f39bee837',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
|
||||
video_id)
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for media_file in data['mediaFiles']:
|
||||
if not isinstance(media_file, dict):
|
||||
continue
|
||||
media_url = url_or_none(media_file.get('url'))
|
||||
if not media_url or not media_url.startswith(('http', '//')):
|
||||
continue
|
||||
media_type = str_or_none(media_file.get('type'))
|
||||
if not media_type:
|
||||
continue
|
||||
media_kind = media_type.split('/')[0].lower()
|
||||
f = {
|
||||
'url': media_url,
|
||||
'width': int_or_none(media_file.get('width')),
|
||||
'height': int_or_none(media_file.get('height')),
|
||||
}
|
||||
format_id = str_or_none(media_file.get('quality'))
|
||||
if media_kind == 'image':
|
||||
f['id'] = format_id
|
||||
thumbnails.append(f)
|
||||
elif media_kind in ('video', 'audio'):
|
||||
is_audio = media_kind == 'audio'
|
||||
f.update({
|
||||
'format_id': 'audio' if is_audio else format_id,
|
||||
'fps': int_or_none(media_file.get('fps')),
|
||||
'vcodec': 'none' if is_audio else None,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(data.get('duration'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
@ -10,6 +10,7 @@ from ..utils import (
|
||||
js_to_json,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
@ -67,12 +68,20 @@ class WatchBoxIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
source = (self._parse_json(
|
||||
player_config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
|
||||
default='{}'),
|
||||
video_id, transform_source=js_to_json,
|
||||
fatal=False) or {}).get('source') or {}
|
||||
r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage,
|
||||
'player config', default='{}', group='data'),
|
||||
video_id, transform_source=unescapeHTML, fatal=False)
|
||||
|
||||
if not player_config:
|
||||
player_config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
|
||||
default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False) or {}
|
||||
|
||||
source = player_config.get('source') or {}
|
||||
|
||||
video_id = compat_str(source.get('videoId') or video_id)
|
||||
|
||||
|
@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
)
|
||||
|
||||
|
||||
class WebOfStoriesIE(InfoExtractor):
|
||||
@ -133,8 +136,10 @@ class WebOfStoriesPlaylistIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories')
|
||||
for video_number in set(re.findall(r'href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage))
|
||||
self.url_result(
|
||||
'http://www.webofstories.com/play/%s' % video_id,
|
||||
'WebOfStories', video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage))
|
||||
]
|
||||
|
||||
title = self._search_regex(
|
||||
|
@ -23,7 +23,7 @@ class XFileShareIE(InfoExtractor):
|
||||
(r'powerwatch\.pw', 'PowerWatch'),
|
||||
(r'rapidvideo\.ws', 'Rapidvideo.ws'),
|
||||
(r'thevideobee\.to', 'TheVideoBee'),
|
||||
(r'vidto\.me', 'Vidto'),
|
||||
(r'vidto\.(?:me|se)', 'Vidto'),
|
||||
(r'streamin\.to', 'Streamin.To'),
|
||||
(r'xvidstage\.com', 'XVIDSTAGE'),
|
||||
(r'vidabc\.com', 'Vid ABC'),
|
||||
@ -115,7 +115,10 @@ class XFileShareIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vidto.se/1tx1pf6t12cg.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
41
youtube_dl/extractor/yourporn.py
Normal file
41
youtube_dl/extractor/yourporn.py
Normal file
@ -0,0 +1,41 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import urljoin
|
||||
|
||||
|
||||
class YourPornIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?yourporn\.sexy/post/(?P<id>[^/?#&.]+)'
|
||||
_TEST = {
|
||||
'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html',
|
||||
'md5': '6f8682b6464033d87acaa7a8ff0c092e',
|
||||
'info_dict': {
|
||||
'id': '57ffcb2e1179b',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:c9f43630bd968267672651ba905a7d35',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = urljoin(url, self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
|
||||
group='data'),
|
||||
video_id)[video_id])
|
||||
|
||||
title = (self._search_regex(
|
||||
r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
|
||||
default=None) or self._og_search_description(webpage)).strip()
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@ -82,7 +82,7 @@ def register_socks_protocols():
|
||||
compiled_regex_type = type(re.compile(''))
|
||||
|
||||
std_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)',
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0',
|
||||
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2018.07.29'
|
||||
__version__ = '2018.08.22'
|
||||
|
Loading…
Reference in New Issue
Block a user