1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-03-21 22:50:00 +08:00

Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Ryan Hayward 2018-05-30 21:51:10 -05:00
commit 7a41b039b1
58 changed files with 769 additions and 714 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.18**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.30*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.30**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.05.18
[debug] youtube-dl version 2018.05.30
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@ -1,3 +1,55 @@
version 2018.05.30
Core
* [downloader/rtmp] Generalize download messages and report time elapsed
on finish
* [downloader/rtmp] Gracefully handle live streams interrupted by user
Extractors
* [teamcoco] Fix extraction for full episodes (#16573)
* [spiegel] Fix info extraction (#16538)
+ [apa] Add support for apa.at (#15041, #15672)
+ [bellmedia] Add support for bnnbloomberg.ca (#16560)
+ [9c9media] Extract MPD formats and subtitles
* [cammodels] Use geo verification headers
+ [ufctv] Add support for authentication (#16542)
+ [cammodels] Add support for cammodels.com (#14499)
* [utils] Fix style id extraction for namespaced id attribute in dfxp2srt
(#16551)
* [soundcloud] Detect format extension (#16549)
* [cbc] Fix playlist title extraction (#16502)
+ [tumblr] Detect and report sensitive media (#13829)
+ [tumblr] Add support for authentication (#15133)
version 2018.05.26
Core
* [utils] Improve parse_age_limit
Extractors
* [audiomack] Stringify video id (#15310)
* [izlesene] Fix extraction (#16233, #16271, #16407)
+ [indavideo] Add support for generic embeds (#11989)
* [indavideo] Fix extraction (#11221)
* [indavideo] Sign download URLs (#16174)
+ [peertube] Add support for PeerTube based sites (#16301, #16329)
* [imgur] Fix extraction (#16537)
+ [hidive] Add support for authentication (#16534)
+ [nbc] Add support for stream.nbcsports.com (#13911)
+ [viewlift] Add support for hoichoi.tv (#16536)
* [go90] Extract age limit and detect DRM protection(#10127)
* [viewlift] fix extraction for snagfilms.com (#15766)
* [globo] Improve extraction (#4189)
* Add support for authentication
* Simplify URL signing
* Extract DASH and MSS formats
* [leeco] Fix extraction (#16464)
* [teamcoco] Add fallback for format extraction (#16484)
* [teamcoco] Improve URL regular expression (#16484)
* [imdb] Improve extraction (#4085, #14557)
version 2018.05.18
Extractors

View File

@ -93,8 +93,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
## Network Options:
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
To enable experimental SOCKS proxy, specify
a proper scheme. For example
To enable SOCKS proxy, specify a proper
scheme. For example
socks5://127.0.0.1:1080/. Pass in an empty
string (--proxy "") for direct connection
--socket-timeout SECONDS Time to wait before giving up, in seconds
@ -109,16 +109,15 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
option is not present) is used for the
actual downloading.
--geo-bypass Bypass geographic restriction via faking
X-Forwarded-For HTTP header (experimental)
X-Forwarded-For HTTP header
--no-geo-bypass Do not bypass geographic restriction via
faking X-Forwarded-For HTTP header
(experimental)
--geo-bypass-country CODE Force bypass geographic restriction with
explicitly provided two-letter ISO 3166-2
country code (experimental)
country code
--geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with
explicitly provided IP block in CIDR
notation (experimental)
notation
## Video Selection:
--playlist-start NUMBER Playlist video to start at (default is 1)
@ -209,7 +208,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
--playlist-reverse Download playlist videos in reverse order
--playlist-random Download playlist videos in random order
--xattr-set-filesize Set file xattribute ytdl.filesize with
expected file size (experimental)
expected file size
--hls-prefer-native Use the native HLS downloader instead of
ffmpeg
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS

View File

@ -15,7 +15,6 @@
- **8tracks**
- **91porn**
- **9c9media**
- **9c9media:stack**
- **9gag**
- **9now.com.au**
- **abc.net.au**
@ -48,6 +47,7 @@
- **anitube.se**
- **Anvato**
- **AnySex**
- **APA**
- **Aparat**
- **AppleConnect**
- **AppleDaily**: 臺灣蘋果日報
@ -128,6 +128,7 @@
- **BYUtv**
- **Camdemy**
- **CamdemyFolder**
- **CamModels**
- **CamWithHer**
- **canalc2.tv**
- **Canalplus**: mycanal.fr and piwiplus.fr
@ -365,7 +366,6 @@
- **ImgurAlbum**
- **Ina**
- **Inc**
- **Indavideo**
- **IndavideoEmbed**
- **InfoQ**
- **Instagram**
@ -526,6 +526,7 @@
- **nbcolympics**
- **nbcolympics:stream**
- **NBCSports**
- **NBCSportsStream**
- **NBCSportsVPlayer**
- **ndr**: NDR.de - Norddeutscher Rundfunk
- **ndr:embed**
@ -625,6 +626,7 @@
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
- **pcmag**
- **PearVideo**
- **PeerTube**
- **People**
- **PerformGroup**
- **periscope**: Periscope

View File

@ -24,71 +24,78 @@ class RtmpFD(FileDownloader):
def real_download(self, filename, info_dict):
def run_rtmpdump(args):
start = time.time()
resume_percent = None
resume_downloaded_data_len = None
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
while not proc_stderr_closed:
# read line from stderr
line = ''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1)) * 1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
time_now = time.time()
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': downloaded_data_len,
'total_bytes_estimate': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'eta': eta,
'elapsed': time_now - start,
'speed': speed,
})
cursor_in_new_line = False
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
def dl():
resume_percent = None
resume_downloaded_data_len = None
proc_stderr_closed = False
while not proc_stderr_closed:
# read line from stderr
line = ''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1)) * 1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': downloaded_data_len,
'total_bytes_estimate': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'elapsed': time_now - start,
'speed': speed,
})
cursor_in_new_line = False
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen('')
cursor_in_new_line = True
self.to_screen('[rtmpdump] ' + line)
proc.wait()
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1)) * 1024)
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'elapsed': time_now - start,
'speed': speed,
})
cursor_in_new_line = False
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen('')
cursor_in_new_line = True
self.to_screen('[rtmpdump] ' + line)
try:
dl()
finally:
proc.wait()
if not cursor_in_new_line:
self.to_screen('')
return proc.returncode
@ -163,7 +170,15 @@ class RtmpFD(FileDownloader):
RD_INCOMPLETE = 2
RD_NO_CONNECT = 3
retval = run_rtmpdump(args)
started = time.time()
try:
retval = run_rtmpdump(args)
except KeyboardInterrupt:
if not info_dict.get('is_live'):
raise
retval = RD_SUCCESS
self.to_screen('\n[rtmpdump] Interrupted by user')
if retval == RD_NO_CONNECT:
self.report_error('[rtmpdump] Could not connect to RTMP server.')
@ -171,7 +186,7 @@ class RtmpFD(FileDownloader):
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] %s bytes' % prevsize)
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
args = basic_args + ['--resume']
if retval == RD_FAILED:
@ -188,13 +203,14 @@ class RtmpFD(FileDownloader):
break
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] %s bytes' % fsize)
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
'elapsed': time.time() - started,
})
return True
else:

View File

@ -52,7 +52,7 @@ class AnimeOnDemandIE(InfoExtractor):
}]
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -0,0 +1,94 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
js_to_json,
)
class APAIE(InfoExtractor):
_VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TESTS = [{
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
'info_dict': {
'id': 'jjv85FdZ',
'ext': 'mp4',
'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 254,
'timestamp': 1519211149,
'upload_date': '20180221',
},
}, {
'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
'only_matching': True,
}, {
'url': 'http://uvp-rma.sf.apa.at/embed/70404cca-2f47-4855-bbb8-20b1fae58f76',
'only_matching': True,
}, {
'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1',
webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
jwplatform_id = self._search_regex(
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
'jwplatform id', default=None)
if jwplatform_id:
return self.url_result(
'jwplatform:' + jwplatform_id, ie='JWPlatform',
video_id=video_id)
sources = self._parse_json(
self._search_regex(
r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
video_id, transform_source=js_to_json)
formats = []
for source in sources:
if not isinstance(source, dict):
continue
source_url = source.get('file')
if not source_url or not isinstance(source_url, compat_str):
continue
ext = determine_ext(source_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': source_url,
})
self._sort_formats(formats)
thumbnail = self._search_regex(
r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'thumbnail', fatal=False, group='url')
return {
'id': video_id,
'title': video_id,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@ -74,7 +74,7 @@ class AtresPlayerIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -44,7 +44,7 @@ class BambuserIE(InfoExtractor):
}
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -12,7 +12,7 @@ class BellMediaIE(InfoExtractor):
(?:
ctv|
tsn|
bnn|
bnn(?:bloomberg)?|
thecomedynetwork|
discovery|
discoveryvelocity|
@ -27,17 +27,16 @@ class BellMediaIE(InfoExtractor):
much\.com
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
_TESTS = [{
'url': 'http://www.ctv.ca/video/player?vid=706966',
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
'md5': '36d3ef559cfe8af8efe15922cd3ce950',
'info_dict': {
'id': '706966',
'ext': 'mp4',
'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
'upload_date': '20150919',
'timestamp': 1442624700,
'id': '1403070',
'ext': 'flv',
'title': 'David Cockfield\'s Top Picks',
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
'upload_date': '20180525',
'timestamp': 1527288600,
},
'expected_warnings': ['HTTP Error 404'],
}, {
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
'only_matching': True,
@ -70,6 +69,7 @@ class BellMediaIE(InfoExtractor):
'investigationdiscovery': 'invdisc',
'animalplanet': 'aniplan',
'etalk': 'ctv',
'bnnbloomberg': 'bnn',
}
def _real_extract(self, url):

View File

@ -0,0 +1,96 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
)
class CamModelsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.cammodels.com/cam/AutumnKnight/',
'only_matching': True,
}]
def _real_extract(self, url):
user_id = self._match_id(url)
webpage = self._download_webpage(
url, user_id, headers=self.geo_verification_headers())
manifest_root = self._html_search_regex(
r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
if not manifest_root:
ERRORS = (
("I'm offline, but let's stay connected", 'This user is currently offline'),
('in a private show', 'This user is in a private show'),
('is currently performing LIVE', 'This model is currently performing live'),
)
for pattern, message in ERRORS:
if pattern in webpage:
error = message
expected = True
break
else:
error = 'Unable to find manifest URL root'
expected = False
raise ExtractorError(error, expected=expected)
manifest = self._download_json(
'%s%s.json' % (manifest_root, user_id), user_id)
formats = []
for format_id, format_dict in manifest['formats'].items():
if not isinstance(format_dict, dict):
continue
encodings = format_dict.get('encodings')
if not isinstance(encodings, list):
continue
vcodec = format_dict.get('videoCodec')
acodec = format_dict.get('audioCodec')
for media in encodings:
if not isinstance(media, dict):
continue
media_url = media.get('location')
if not media_url or not isinstance(media_url, compat_str):
continue
format_id_list = [format_id]
height = int_or_none(media.get('videoHeight'))
if height is not None:
format_id_list.append('%dp' % height)
f = {
'url': media_url,
'format_id': '-'.join(format_id_list),
'width': int_or_none(media.get('videoWidth')),
'height': height,
'vbr': int_or_none(media.get('videoKbps')),
'abr': int_or_none(media.get('audioKbps')),
'fps': int_or_none(media.get('fps')),
'vcodec': vcodec,
'acodec': acodec,
}
if 'rtmp' in format_id:
f['ext'] = 'flv'
elif 'hls' in format_id:
f.update({
'ext': 'mp4',
# hls skips fragments, preferring rtmp
'preference': -1,
})
else:
continue
formats.append(f)
self._sort_formats(formats)
return {
'id': user_id,
'title': self._live_title(user_id),
'is_live': True,
'formats': formats,
}

View File

@ -20,6 +20,7 @@ from ..utils import (
parse_duration,
parse_iso8601,
parse_age_limit,
strip_or_none,
int_or_none,
ExtractorError,
)
@ -129,6 +130,9 @@ class CBCIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = self._og_search_title(webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
entries = [
self._extract_player_init(player_init, display_id)
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
@ -136,8 +140,7 @@ class CBCIE(InfoExtractor):
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
return self.playlist_result(
entries, display_id,
self._og_search_title(webpage, fatal=False),
entries, display_id, strip_or_none(title),
self._og_search_description(webpage))

View File

@ -49,7 +49,7 @@ class CrunchyrollBaseIE(InfoExtractor):
})
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -11,10 +11,10 @@ class CTVNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
'md5': '9b8624ba66351a23e0b6e1391971f9af',
'info_dict': {
'id': '901995',
'ext': 'mp4',
'ext': 'flv',
'title': 'Extended: \'That person cannot be me\' Johnson says',
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
'timestamp': 1467286284,

View File

@ -35,7 +35,7 @@ class CuriosityStreamBaseIE(InfoExtractor):
return result['data']
def _real_initialize(self):
(email, password) = self._get_login_info()
email, password = self._get_login_info()
if email is None:
return
result = self._download_json(

View File

@ -42,7 +42,7 @@ class DramaFeverBaseIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -44,6 +44,7 @@ from .anysex import AnySexIE
from .aol import AolIE
from .allocine import AllocineIE
from .aliexpress import AliExpressLiveIE
from .apa import APAIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
from .appletrailers import (
@ -145,6 +146,7 @@ from .camdemy import (
CamdemyIE,
CamdemyFolderIE
)
from .cammodels import CamModelsIE
from .camwithher import CamWithHerIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
@ -703,12 +705,7 @@ from .nexx import (
from .nfb import NFBIE
from .nfl import NFLIE
from .nhk import NhkVodIE
from .nhl import (
NHLVideocenterIE,
NHLNewsIE,
NHLVideocenterCategoryIE,
NHLIE,
)
from .nhl import NHLIE
from .nick import (
NickIE,
NickBrIE,
@ -717,10 +714,7 @@ from .nick import (
NickRuIE,
)
from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninecninemedia import (
NineCNineMediaStackIE,
NineCNineMediaIE,
)
from .ninecninemedia import NineCNineMediaIE
from .ninegag import NineGagIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE

View File

@ -226,7 +226,7 @@ class FacebookIE(InfoExtractor):
return urls
def _login(self):
(useremail, password) = self._get_login_info()
useremail, password = self._get_login_info()
if useremail is None:
return

View File

@ -46,7 +46,7 @@ class FC2IE(InfoExtractor):
}]
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None or password is None:
return False

View File

@ -51,7 +51,7 @@ class FunimationIE(InfoExtractor):
}]
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return
try:

View File

@ -91,7 +91,7 @@ class GDCVaultIE(InfoExtractor):
]
def _login(self, webpage_url, display_id):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None or password is None:
self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
return None

View File

@ -110,6 +110,7 @@ from .xfileshare import XFileShareIE
from .cloudflarestream import CloudflareStreamIE
from .peertube import PeerTubeIE
from .indavideo import IndavideoEmbedIE
from .apa import APAIE
class GenericIE(InfoExtractor):
@ -2041,6 +2042,23 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
{
# APA embed via JWPlatform embed
'url': 'http://www.vol.at/blue-man-group/5593454',
'info_dict': {
'id': 'jjv85FdZ',
'ext': 'mp4',
'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 254,
'timestamp': 1519211149,
'upload_date': '20180221',
},
'params': {
'skip_download': True,
},
},
{
'url': 'http://share-videos.se/auto/video/83645793?uid=13',
'md5': 'b68d276de422ab07ee1d49388103f457',
@ -3068,6 +3086,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
apa_urls = APAIE._extract_urls(webpage)
if apa_urls:
return self.playlist_from_matches(
apa_urls, video_id, video_title, ie=APAIE.ie_key())
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
webpage)]

View File

@ -23,7 +23,6 @@ from ..utils import (
class GloboIE(InfoExtractor):
_VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
_LOGGED_IN = False
_NETRC_MACHINE = 'globo'
_TESTS = [{
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
@ -68,9 +67,6 @@ class GloboIE(InfoExtractor):
}]
def _real_initialize(self):
if self._LOGGED_IN:
return
email, password = self._get_login_info()
if email is None:
return
@ -91,7 +87,6 @@ class GloboIE(InfoExtractor):
resp = self._parse_json(e.cause.read(), None)
raise ExtractorError(resp.get('userMessage') or resp['id'], expected=True)
raise
self._LOGGED_IN = True
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@ -18,7 +18,6 @@ class HiDiveIE(InfoExtractor):
# so disabling geo bypass completely
_GEO_BYPASS = False
_NETRC_MACHINE = 'hidive'
_LOGGED_IN = False
_LOGIN_URL = 'https://www.hidive.com/account/login'
_TESTS = [{
@ -38,10 +37,7 @@ class HiDiveIE(InfoExtractor):
}]
def _real_initialize(self):
if self._LOGGED_IN:
return
(email, password) = self._get_login_info()
email, password = self._get_login_info()
if email is None:
return
@ -56,7 +52,6 @@ class HiDiveIE(InfoExtractor):
})
self._download_webpage(
self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data))
self._LOGGED_IN = True
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)

View File

@ -66,7 +66,7 @@ class HRTiBaseIE(InfoExtractor):
self._logout_url = modules['user']['resources']['logout']['uri']
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
# TODO: figure out authentication with cookies
if username is None or password is None:
self.raise_login_required()

View File

@ -239,7 +239,7 @@ class IqiyiIE(InfoExtractor):
return ohdave_rsa_encrypt(data, e, N)
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
# No authentication to be performed
if not username:

View File

@ -1,96 +1,90 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
parse_iso8601,
)
from .nhl import NHLBaseIE
class MLBIE(InfoExtractor):
class MLBIE(NHLBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:[\da-z_-]+\.)*mlb\.com/
(?:[\da-z_-]+\.)*(?P<site>mlb)\.com/
(?:
(?:
(?:.*?/)?video/(?:topic/[\da-z_-]+/)?(?:v|.*?/c-)|
(?:[^/]+/)*c-|
(?:
shared/video/embed/(?:embed|m-internal-embed)\.html|
(?:[^/]+/)+(?:play|index)\.jsp|
)\?.*?\bcontent_id=
)
(?P<id>n?\d+)|
(?:[^/]+/)*(?P<path>[^/]+)
(?P<id>\d+)
)
'''
_CONTENT_DOMAIN = 'content.mlb.com'
_TESTS = [
{
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
'md5': 'ff56a598c2cf411a9a38a69709e97079',
'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
'md5': '632358dacfceec06bad823b83d21df2d',
'info_dict': {
'id': '34698933',
'ext': 'mp4',
'title': "Ackley's spectacular catch",
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
'duration': 66,
'timestamp': 1405980600,
'upload_date': '20140721',
'timestamp': 1405995000,
'upload_date': '20140722',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663',
'md5': 'bf2619bf9cacc0a564fc35e6aeb9219f',
'info_dict': {
'id': '34496663',
'ext': 'mp4',
'title': 'Stanton prepares for Derby',
'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
'duration': 46,
'timestamp': 1405105800,
'timestamp': 1405120200,
'upload_date': '20140711',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby',
'md5': '0e6e73d509321e142409b695eadd541f',
'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115',
'md5': '99bb9176531adc600b90880fb8be9328',
'info_dict': {
'id': '34578115',
'ext': 'mp4',
'title': 'Cespedes repeats as Derby champ',
'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
'duration': 488,
'timestamp': 1405399936,
'timestamp': 1405414336,
'upload_date': '20140715',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance',
'md5': 'b8fd237347b844365d74ea61d4245967',
'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915',
'md5': 'da8b57a12b060e7663ee1eebd6f330ec',
'info_dict': {
'id': '34577915',
'ext': 'mp4',
'title': 'Bautista on Home Run Derby',
'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
'duration': 52,
'timestamp': 1405390722,
'timestamp': 1405405122,
'upload_date': '20140715',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
'md5': 'aafaf5b0186fee8f32f20508092f8111',
'url': 'https://www.mlb.com/news/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer/c-118550098',
'md5': 'e09e37b552351fddbf4d9e699c924d68',
'info_dict': {
'id': '75609783',
'ext': 'mp4',
'title': 'Must C: Pillar climbs for catch',
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
'timestamp': 1429124820,
'timestamp': 1429139220,
'upload_date': '20150415',
}
},
@ -111,7 +105,7 @@ class MLBIE(InfoExtractor):
'only_matching': True,
},
{
'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783',
'only_matching': True,
},
{
@ -120,58 +114,7 @@ class MLBIE(InfoExtractor):
'only_matching': True,
},
{
'url': 'http://washington.nationals.mlb.com/mlb/gameday/index.jsp?c_id=was&gid=2015_05_09_atlmlb_wasmlb_1&lang=en&content_id=108309983&mode=video#',
'url': 'https://www.mlb.com/cut4/carlos-gomez-borrowed-sunglasses-from-an-as-fan/c-278912842',
'only_matching': True,
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
if not video_id:
video_path = mobj.group('path')
webpage = self._download_webpage(url, video_path)
video_id = self._search_regex(
[r'data-video-?id="(\d+)"', r'content_id=(\d+)'], webpage, 'video id')
detail = self._download_xml(
'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
% (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
title = detail.find('./headline').text
description = detail.find('./big-blurb').text
duration = parse_duration(detail.find('./duration').text)
timestamp = parse_iso8601(detail.attrib['date'][:-5])
thumbnails = [{
'url': thumbnail.text,
} for thumbnail in detail.findall('./thumbnailScenarios/thumbnailScenario')]
formats = []
for media_url in detail.findall('./url'):
playback_scenario = media_url.attrib['playback_scenario']
fmt = {
'url': media_url.text,
'format_id': playback_scenario,
}
m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)
if m:
fmt.update({
'vbr': int(m.group('vbr')) * 1000,
'width': int(m.group('width')),
'height': int(m.group('height')),
})
formats.append(fmt)
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'formats': formats,
'thumbnails': thumbnails,
}

View File

@ -1,18 +1,10 @@
from __future__ import unicode_literals
import re
import json
import os
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
compat_str,
)
from ..compat import compat_str
from ..utils import (
unified_strdate,
determine_ext,
int_or_none,
parse_iso8601,
@ -20,236 +12,77 @@ from ..utils import (
)
class NHLBaseInfoExtractor(InfoExtractor):
@staticmethod
def _fix_json(json_string):
return json_string.replace('\\\'', '\'')
class NHLBaseIE(InfoExtractor):
def _real_extract(self, url):
site, tmp_id = re.match(self._VALID_URL, url).groups()
video_data = self._download_json(
'https://%s/%s/%sid/v1/%s/details/web-v1.json'
% (self._CONTENT_DOMAIN, site[:3], 'item/' if site == 'mlb' else '', tmp_id), tmp_id)
if video_data.get('type') != 'video':
video_data = video_data['media']
video = video_data.get('video')
if video:
video_data = video
else:
videos = video_data.get('videos')
if videos:
video_data = videos[0]
def _real_extract_video(self, video_id):
vid_parts = video_id.split(',')
if len(vid_parts) == 3:
video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0'))
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
data = self._download_json(
json_url, video_id, transform_source=self._fix_json)
return self._extract_video(data[0])
video_id = compat_str(video_data['id'])
title = video_data['title']
def _extract_video(self, info):
video_id = info['id']
self.report_extraction(video_id)
formats = []
for playback in video_data.get('playbacks', []):
playback_url = playback.get('url')
if not playback_url:
continue
ext = determine_ext(playback_url)
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
playback_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=playback.get('name', 'hls'), fatal=False)
self._check_formats(m3u8_formats, video_id)
formats.extend(m3u8_formats)
else:
height = int_or_none(playback.get('height'))
formats.append({
'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
'url': playback_url,
'width': int_or_none(playback.get('width')),
'height': height,
'tbr': int_or_none(self._search_regex(r'_(\d+)[kK]', playback_url, 'bitrate', default=None)),
})
self._sort_formats(formats)
initial_video_url = info['publishPoint']
if info['formats'] == '1':
parsed_url = compat_urllib_parse_urlparse(initial_video_url)
filename, ext = os.path.splitext(parsed_url.path)
path = '%s_sd%s' % (filename, ext)
data = compat_urllib_parse_urlencode({
'type': 'fvod',
'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
thumbnails = []
cuts = video_data.get('image', {}).get('cuts') or []
if isinstance(cuts, dict):
cuts = cuts.values()
for thumbnail_data in cuts:
thumbnail_url = thumbnail_data.get('src')
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'width': int_or_none(thumbnail_data.get('width')),
'height': int_or_none(thumbnail_data.get('height')),
})
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
path_doc = self._download_xml(
path_url, video_id, 'Downloading final video url')
video_url = path_doc.find('path').text
else:
video_url = initial_video_url
join = compat_urlparse.urljoin
ret = {
'id': video_id,
'title': info['name'],
'url': video_url,
'description': info['description'],
'duration': int(info['duration']),
'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
}
if video_url.startswith('rtmp:'):
mobj = re.match(r'(?P<tc_url>rtmp://[^/]+/(?P<app>[a-z0-9/]+))/(?P<play_path>mp4:.*)', video_url)
ret.update({
'tc_url': mobj.group('tc_url'),
'play_path': mobj.group('play_path'),
'app': mobj.group('app'),
'no_resume': True,
})
return ret
class NHLVideocenterIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com:videocenter'
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)'
_TESTS = [{
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
'md5': 'db704a4ea09e8d3988c85e36cc892d09',
'info_dict': {
'id': '453614',
'ext': 'mp4',
'title': 'Quick clip: Weise 4-3 goal vs Flames',
'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
'duration': 18,
'upload_date': '20131006',
},
}, {
'url': 'http://video.nhl.com/videocenter/console?id=2014020024-628-h',
'md5': 'd22e82bc592f52d37d24b03531ee9696',
'info_dict': {
'id': '2014020024-628-h',
'ext': 'mp4',
'title': 'Alex Galchenyuk Goal on Ray Emery (14:40/3rd)',
'description': 'Home broadcast - Montreal Canadiens at Philadelphia Flyers - October 11, 2014',
'duration': 0,
'upload_date': '20141011',
},
}, {
'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802',
'md5': 'c78fc64ea01777e426cfc202b746c825',
'info_dict': {
'id': '58665',
'ext': 'flv',
'title': 'Classic Game In Six - April 22, 1979',
'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.',
'duration': 400,
'upload_date': '20100129'
},
}, {
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
'only_matching': True,
}, {
'url': 'http://video.nhl.com/videocenter/?id=736722',
'only_matching': True,
}, {
'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en',
'md5': '076fcb88c255154aacbf0a7accc3f340',
'info_dict': {
'id': '2014020299-X-h',
'ext': 'mp4',
'title': 'Penguins at Islanders / Game Highlights',
'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014',
'duration': 268,
'upload_date': '20141122',
}
}, {
'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4',
'info_dict': {
'id': '691469',
'ext': 'mp4',
'title': 'RAW | Craig MacTavish Full Press Conference',
'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.',
'upload_date': '20141205',
},
'params': {
'skip_download': True, # Requires rtmpdump
}
}, {
'url': 'http://video.nhl.com/videocenter/embed?playlist=836127',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
return self._real_extract_video(video_id)
class NHLNewsIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com:news'
IE_DESC = 'NHL news'
_VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
_TESTS = [{
'url': 'http://www.nhl.com/ice/news.htm?id=750727',
'md5': '4b3d1262e177687a3009937bd9ec0be8',
'info_dict': {
'id': '736722',
'ext': 'mp4',
'title': 'Cal Clutterbuck has been fined $2,000',
'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6',
'duration': 37,
'upload_date': '20150128',
},
}, {
# iframe embed
'url': 'http://sabres.nhl.com/club/news.htm?id=780189',
'md5': '9f663d1c006c90ac9fb82777d4294e12',
'info_dict': {
'id': '836127',
'ext': 'mp4',
'title': 'Morning Skate: OTT vs. BUF (9/23/15)',
'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.",
'duration': 93,
'upload_date': '20150923',
},
}]
def _real_extract(self, url):
news_id = self._match_id(url)
webpage = self._download_webpage(url, news_id)
video_id = self._search_regex(
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'",
r'<iframe[^>]+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'],
webpage, 'video id')
return self._real_extract_video(video_id)
class NHLVideocenterCategoryIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com:videocenter:category'
IE_DESC = 'NHL videocenter category'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
_TEST = {
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
'info_dict': {
'id': '999',
'title': 'Highlights',
},
'playlist_count': 12,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
team = mobj.group('team')
webpage = self._download_webpage(url, team)
cat_id = self._search_regex(
[r'var defaultCatId = "(.+?)";',
r'{statusIndex:0,index:0,.*?id:(.*?),'],
webpage, 'category id')
playlist_title = self._html_search_regex(
r'tab0"[^>]*?>(.*?)</td>',
webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
data = compat_urllib_parse_urlencode({
'cid': cat_id,
# This is the default value
'count': 12,
'ptrs': 3,
'format': 'json',
})
path = '/videocenter/servlets/browse?' + data
request_url = compat_urlparse.urljoin(url, path)
response = self._download_webpage(request_url, playlist_title)
response = self._fix_json(response)
if not response.strip():
self._downloader.report_warning('Got an empty response, trying '
'adding the "newvideos" parameter')
response = self._download_webpage(request_url + '&newvideos=true',
playlist_title)
response = self._fix_json(response)
videos = json.loads(response)
return {
'_type': 'playlist',
'title': playlist_title,
'id': cat_id,
'entries': [self._extract_video(v) for v in videos],
'id': video_id,
'title': title,
'description': video_data.get('description'),
'timestamp': parse_iso8601(video_data.get('date')),
'duration': parse_duration(video_data.get('duration')),
'thumbnails': thumbnails,
'formats': formats,
}
class NHLIE(InfoExtractor):
class NHLIE(NHLBaseIE):
IE_NAME = 'nhl.com'
_VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
_SITES_MAP = {
'nhl': 'nhl',
'wch2016': 'wch',
}
_CONTENT_DOMAIN = 'nhl.bamcontent.com'
_TESTS = [{
# type=video
'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
@ -293,59 +126,3 @@ class NHLIE(InfoExtractor):
'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
tmp_id, site = mobj.group('id'), mobj.group('site')
video_data = self._download_json(
'https://nhl.bamcontent.com/%s/id/v1/%s/details/web-v1.json'
% (self._SITES_MAP[site], tmp_id), tmp_id)
if video_data.get('type') == 'article':
video_data = video_data['media']
video_id = compat_str(video_data['id'])
title = video_data['title']
formats = []
for playback in video_data.get('playbacks', []):
playback_url = playback.get('url')
if not playback_url:
continue
ext = determine_ext(playback_url)
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
playback_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=playback.get('name', 'hls'), fatal=False)
self._check_formats(m3u8_formats, video_id)
formats.extend(m3u8_formats)
else:
height = int_or_none(playback.get('height'))
formats.append({
'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
'url': playback_url,
'width': int_or_none(playback.get('width')),
'height': height,
})
self._sort_formats(formats, ('preference', 'width', 'height', 'tbr', 'format_id'))
thumbnails = []
for thumbnail_id, thumbnail_data in video_data.get('image', {}).get('cuts', {}).items():
thumbnail_url = thumbnail_data.get('src')
if not thumbnail_url:
continue
thumbnails.append({
'id': thumbnail_id,
'url': thumbnail_url,
'width': int_or_none(thumbnail_data.get('width')),
'height': int_or_none(thumbnail_data.get('height')),
})
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'timestamp': parse_iso8601(video_data.get('date')),
'duration': parse_duration(video_data.get('duration')),
'thumbnails': thumbnails,
'formats': formats,
}

View File

@ -163,7 +163,7 @@ class NiconicoIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
# No authentication to be performed
if not username:
return True

View File

@ -13,38 +13,11 @@ from ..utils import (
)
class NineCNineMediaBaseIE(InfoExtractor):
_API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
class NineCNineMediaStackIE(NineCNineMediaBaseIE):
IE_NAME = '9c9media:stack'
_GEO_COUNTRIES = ['CA']
_VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'
def _real_extract(self, url):
destination_code, content_id, package_id, stack_id = re.match(self._VALID_URL, url).groups()
stack_base_url_template = self._API_BASE_TEMPLATE + 'contentpackages/%s/stacks/%s/manifest.'
stack_base_url = stack_base_url_template % (destination_code, content_id, package_id, stack_id)
formats = []
formats.extend(self._extract_m3u8_formats(
stack_base_url + 'm3u8', stack_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
stack_base_url + 'f4m', stack_id,
f4m_id='hds', fatal=False))
self._sort_formats(formats)
return {
'id': stack_id,
'formats': formats,
}
class NineCNineMediaIE(NineCNineMediaBaseIE):
class NineCNineMediaIE(InfoExtractor):
IE_NAME = '9c9media'
_GEO_COUNTRIES = ['CA']
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
_API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
def _real_extract(self, url):
destination_code, content_id = re.match(self._VALID_URL, url).groups()
@ -58,13 +31,26 @@ class NineCNineMediaIE(NineCNineMediaBaseIE):
content_package = content['ContentPackages'][0]
package_id = content_package['Id']
content_package_url = api_base_url + 'contentpackages/%s/' % package_id
content_package = self._download_json(content_package_url, content_id)
content_package = self._download_json(
content_package_url, content_id, query={
'$include': '[HasClosedCaptions]',
})
if content_package.get('Constraints', {}).get('Security', {}).get('Type') == 'adobe-drm':
if content_package.get('Constraints', {}).get('Security', {}).get('Type'):
raise ExtractorError('This video is DRM protected.', expected=True)
stacks = self._download_json(content_package_url + 'stacks/', package_id)['Items']
multistacks = len(stacks) > 1
manifest_base_url = content_package_url + 'manifest.'
formats = []
formats.extend(self._extract_m3u8_formats(
manifest_base_url + 'm3u8', content_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
manifest_base_url + 'f4m', content_id,
f4m_id='hds', fatal=False))
formats.extend(self._extract_mpd_formats(
manifest_base_url + 'mpd', content_id,
mpd_id='dash', fatal=False))
self._sort_formats(formats)
thumbnails = []
for image in content.get('Images', []):
@ -85,10 +71,12 @@ class NineCNineMediaIE(NineCNineMediaBaseIE):
continue
container.append(e_name)
description = content.get('Desc') or content.get('ShortDesc')
season = content.get('Season', {})
base_info = {
'description': description,
info = {
'id': content_id,
'title': title,
'description': content.get('Desc') or content.get('ShortDesc'),
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
'episode_number': int_or_none(content.get('Episode')),
'season': season.get('Name'),
@ -97,26 +85,19 @@ class NineCNineMediaIE(NineCNineMediaBaseIE):
'series': content.get('Media', {}).get('Name'),
'tags': tags,
'categories': categories,
'duration': float_or_none(content_package.get('Duration')),
'formats': formats,
}
entries = []
for stack in stacks:
stack_id = compat_str(stack['Id'])
entry = {
'_type': 'url_transparent',
'url': '9c9media:stack:%s:%s:%s:%s' % (destination_code, content_id, package_id, stack_id),
'id': stack_id,
'title': '%s_part%s' % (title, stack['Name']) if multistacks else title,
'duration': float_or_none(stack.get('Duration')),
'ie_key': 'NineCNineMediaStack',
if content_package.get('HasClosedCaptions'):
info['subtitles'] = {
'en': [{
'url': manifest_base_url + 'vtt',
'ext': 'vtt',
}, {
'url': manifest_base_url + 'srt',
'ext': 'srt',
}]
}
entry.update(base_info)
entries.append(entry)
return {
'_type': 'multi_video',
'id': content_id,
'title': title,
'description': description,
'entries': entries,
}
return info

View File

@ -65,7 +65,7 @@ class NocoIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -42,7 +42,7 @@ class PacktPubIE(PacktPubBaseIE):
_TOKEN = None
def _real_initialize(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return
try:

View File

@ -53,7 +53,7 @@ class PatreonIE(InfoExtractor):
# needed. Keeping this commented for when this inevitably changes.
'''
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -94,7 +94,7 @@ class PluralsightIE(PluralsightBaseIE):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -54,6 +54,7 @@ class RBMARadioIE(InfoExtractor):
'abr': abr,
'vcodec': 'none',
} for abr in (96, 128, 256)]
self._check_formats(formats, episode_id)
description = clean_html(episode.get('longTeaser'))
thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))

View File

@ -19,7 +19,7 @@ class RDSIE(InfoExtractor):
'info_dict': {
'id': '604333',
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
'ext': 'mp4',
'ext': 'flv',
'title': 'Fowler Jr. prend la direction de Jacksonville',
'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
'timestamp': 1430397346,

View File

@ -50,7 +50,7 @@ class RoosterTeethIE(InfoExtractor):
}]
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -27,7 +27,7 @@ class SafariBaseIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -64,7 +64,7 @@ class SinaIE(InfoExtractor):
# The video id is in the redirected url
self.to_screen('Getting video id')
request = HEADRequest(url)
(_, urlh) = self._download_webpage_handle(request, 'NA', False)
_, urlh = self._download_webpage_handle(request, 'NA', False)
return self._real_extract(urlh.geturl())
else:
pseudo_id = mobj.group('pseudo_id')

View File

@ -181,7 +181,6 @@ class SoundcloudIE(InfoExtractor):
thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
if isinstance(thumbnail, compat_str):
thumbnail = thumbnail.replace('-large', '-t500x500')
ext = 'mp3'
result = {
'id': track_id,
'uploader': info.get('user', {}).get('username'),
@ -215,8 +214,11 @@ class SoundcloudIE(InfoExtractor):
track_id, 'Downloading track url', query=query)
for key, stream_url in format_dict.items():
abr = int_or_none(self._search_regex(
r'_(\d+)_url', key, 'audio bitrate', default=None))
ext, abr = 'mp3', None
mobj = re.search(r'_([^_]+)_(\d+)_url', key)
if mobj:
ext, abr = mobj.groups()
abr = int(abr)
if key.startswith('http'):
stream_formats = [{
'format_id': key,
@ -234,13 +236,14 @@ class SoundcloudIE(InfoExtractor):
}]
elif key.startswith('hls'):
stream_formats = self._extract_m3u8_formats(
stream_url, track_id, 'mp3', entry_protocol='m3u8_native',
stream_url, track_id, ext, entry_protocol='m3u8_native',
m3u8_id=key, fatal=False)
else:
continue
for f in stream_formats:
f['abr'] = abr
if abr:
for f in stream_formats:
f['abr'] = abr
formats.extend(stream_formats)
@ -250,7 +253,7 @@ class SoundcloudIE(InfoExtractor):
formats.append({
'format_id': 'fallback',
'url': update_url_query(info['stream_url'], query),
'ext': ext,
'ext': 'mp3',
})
for f in formats:

View File

@ -11,9 +11,9 @@ from .nexx import (
from .spiegeltv import SpiegeltvIE
from ..compat import compat_urlparse
from ..utils import (
extract_attributes,
unified_strdate,
get_element_by_attribute,
parse_duration,
strip_or_none,
unified_timestamp,
)
@ -21,35 +21,38 @@ class SpiegelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$'
_TESTS = [{
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
'md5': '2c2754212136f35fb4b19767d242f66e',
'md5': 'b57399839d055fccfeb9a0455c439868',
'info_dict': {
'id': '1259285',
'id': '563747',
'ext': 'mp4',
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
'description': 'md5:8029d8310232196eb235d27575a8b9f4',
'duration': 49,
'upload_date': '20130311',
'timestamp': 1362994320,
},
}, {
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
'md5': 'f2cdf638d7aa47654e251e1aee360af1',
'md5': '5b6c2f4add9d62912ed5fc78a1faed80',
'info_dict': {
'id': '1309159',
'id': '580988',
'ext': 'mp4',
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
'duration': 983,
'upload_date': '20131115',
'timestamp': 1384546642,
},
}, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
'md5': 'd8eeca6bfc8f1cd6f490eb1f44695d51',
'md5': '97b91083a672d72976faa8433430afb9',
'info_dict': {
'id': '1519126',
'id': '601883',
'ext': 'mp4',
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
'upload_date': '20140904',
'timestamp': 1409834160,
}
}, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
@ -62,59 +65,28 @@ class SpiegelIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage, handle = self._download_webpage_handle(url, video_id)
metadata_url = 'http://www.spiegel.de/video/metadata/video-%s.json' % video_id
handle = self._request_webpage(metadata_url, video_id)
# 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html
if SpiegeltvIE.suitable(handle.geturl()):
return self.url_result(handle.geturl(), 'Spiegeltv')
nexx_id = self._search_regex(
r'nexxOmniaId\s*:\s*(\d+)', webpage, 'nexx id', default=None)
if nexx_id:
domain_id = NexxIE._extract_domain_id(webpage) or '748'
return self.url_result(
'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
video_id=nexx_id)
video_data = extract_attributes(self._search_regex(r'(<div[^>]+id="spVideoElements"[^>]+>)', webpage, 'video element', default=''))
title = video_data.get('data-video-title') or get_element_by_attribute('class', 'module-title', webpage)
description = video_data.get('data-video-teaser') or self._html_search_meta('description', webpage, 'description')
base_url = self._search_regex(
[r'server\s*:\s*(["\'])(?P<url>.+?)\1', r'var\s+server\s*=\s*"(?P<url>[^"]+)\"'],
webpage, 'server URL', group='url')
xml_url = base_url + video_id + '.xml'
idoc = self._download_xml(xml_url, video_id)
formats = []
for n in list(idoc):
if n.tag.startswith('type') and n.tag != 'type6':
format_id = n.tag.rpartition('type')[2]
video_url = base_url + n.find('./filename').text
formats.append({
'format_id': format_id,
'url': video_url,
'width': int(n.find('./width').text),
'height': int(n.find('./height').text),
'abr': int(n.find('./audiobitrate').text),
'vbr': int(n.find('./videobitrate').text),
'vcodec': n.find('./codec').text,
'acodec': 'MP4A',
})
duration = float(idoc[0].findall('./duration')[0].text)
self._check_formats(formats, video_id)
self._sort_formats(formats)
video_data = self._parse_json(self._webpage_read_content(
handle, metadata_url, video_id), video_id)
title = video_data['title']
nexx_id = video_data['nexxOmniaId']
domain_id = video_data.get('nexxOmniaDomain') or '748'
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'nexx:%s:%s' % (domain_id, nexx_id),
'title': title,
'description': description.strip() if description else None,
'duration': duration,
'upload_date': unified_strdate(video_data.get('data-video-date')),
'formats': formats,
'description': strip_or_none(video_data.get('teaser')),
'duration': parse_duration(video_data.get('duration')),
'timestamp': unified_timestamp(video_data.get('datum')),
'ie_key': NexxIE.ie_key(),
}

View File

@ -4,6 +4,10 @@ from __future__ import unicode_literals
import re
from .turner import TurnerBaseIE
from ..compat import (
compat_urllib_parse_urlparse,
compat_parse_qs,
)
from ..utils import (
float_or_none,
int_or_none,
@ -38,48 +42,22 @@ class TBSIE(TurnerBaseIE):
def _real_extract(self, url):
site, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
video_data = self._parse_json(self._search_regex(
drupal_settings = self._parse_json(self._search_regex(
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
webpage, 'drupal setting'), display_id)['turner_playlist'][0]
webpage, 'drupal setting'), display_id)
video_data = drupal_settings['turner_playlist'][0]
media_id = video_data['mediaID']
title = video_data['title']
tokenizer_query = compat_parse_qs(compat_urllib_parse_urlparse(
drupal_settings['ngtv_token_url']).query)
streams_data = self._download_json(
'http://medium.ngtv.io/media/%s/tv' % media_id,
media_id)['media']['tv']
duration = None
chapters = []
formats = []
for supported_type in ('unprotected', 'bulkaes'):
stream_data = streams_data.get(supported_type, {})
m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
if not m3u8_url:
continue
if stream_data.get('playlistProtection') == 'spe':
m3u8_url = self._add_akamai_spe_token(
'http://token.vgtf.net/token/token_spe',
m3u8_url, media_id, {
'url': url,
'site_name': site[:3].upper(),
'auth_required': video_data.get('authRequired') == '1',
})
formats.extend(self._extract_m3u8_formats(
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
if not chapters:
for chapter in stream_data.get('contentSegments', []):
start_time = float_or_none(chapter.get('start'))
duration = float_or_none(chapter.get('duration'))
if start_time is None or duration is None:
continue
chapters.append({
'start_time': start_time,
'end_time': start_time + duration,
})
self._sort_formats(formats)
info = self._extract_ngtv_info(
media_id, tokenizer_query, {
'url': url,
'site_name': site[:3].upper(),
'auth_required': video_data.get('authRequired') == '1',
})
thumbnails = []
for image_id, image in video_data.get('images', {}).items():
@ -98,15 +76,14 @@ class TBSIE(TurnerBaseIE):
})
thumbnails.append(i)
return {
info.update({
'id': media_id,
'title': title,
'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
'duration': duration,
'duration': float_or_none(video_data.get('duration')) or info.get('duration'),
'timestamp': int_or_none(video_data.get('created')),
'season_number': int_or_none(video_data.get('season')),
'episode_number': int_or_none(video_data.get('episode')),
'cahpters': chapters,
'thumbnails': thumbnails,
'formats': formats,
}
})
return info

View File

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import json
from .common import InfoExtractor
from .turner import TurnerBaseIE
from ..utils import (
determine_ext,
ExtractorError,
@ -15,7 +15,7 @@ from ..utils import (
)
class TeamcocoIE(InfoExtractor):
class TeamcocoIE(TurnerBaseIE):
_VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
_TESTS = [
{
@ -110,6 +110,8 @@ class TeamcocoIE(InfoExtractor):
name
}
duration
turnerMediaId
turnerMediaAuthToken
}
}
... on NotFoundSlug {
@ -123,53 +125,65 @@ class TeamcocoIE(InfoExtractor):
record = response['record']
video_id = record['id']
video_sources = self._graphql_call('''{
%s(id: "%s") {
src
}
}''', 'RecordVideoSource', video_id) or {}
formats = []
get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
for format_id, src in video_sources.get('src', {}).items():
if not isinstance(src, dict):
continue
src_url = src.get('src')
if not src_url:
continue
ext = determine_ext(src_url, mimetype2ext(src.get('type')))
if format_id == 'hls' or ext == 'm3u8':
# compat_urllib_parse.urljoin does not work here
if src_url.startswith('/'):
src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
formats.extend(self._extract_m3u8_formats(
src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
else:
if src_url.startswith('/mp4:protected/'):
# TODO Correct extraction for these files
continue
tbr = int_or_none(self._search_regex(
r'(\d+)k\.mp4', src_url, 'tbr', default=None))
formats.append({
'url': src_url,
'ext': ext,
'tbr': tbr,
'format_id': format_id,
'quality': get_quality(format_id),
})
if not formats:
formats = self._extract_m3u8_formats(
record['file']['url'], video_id, 'mp4', fatal=False)
self._sort_formats(formats)
return {
info = {
'id': video_id,
'display_id': display_id,
'formats': formats,
'title': record['title'],
'thumbnail': record.get('thumb', {}).get('preview'),
'description': record.get('teaser'),
'duration': parse_duration(record.get('duration')),
'timestamp': parse_iso8601(record.get('publishOn')),
}
media_id = record.get('turnerMediaId')
if media_id:
self._initialize_geo_bypass({
'countries': ['US'],
})
info.update(self._extract_ngtv_info(media_id, {
'accessToken': record['turnerMediaAuthToken'],
'accessTokenType': 'jws',
}))
else:
video_sources = self._graphql_call('''{
%s(id: "%s") {
src
}
}''', 'RecordVideoSource', video_id) or {}
formats = []
get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
for format_id, src in video_sources.get('src', {}).items():
if not isinstance(src, dict):
continue
src_url = src.get('src')
if not src_url:
continue
ext = determine_ext(src_url, mimetype2ext(src.get('type')))
if format_id == 'hls' or ext == 'm3u8':
# compat_urllib_parse.urljoin does not work here
if src_url.startswith('/'):
src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
formats.extend(self._extract_m3u8_formats(
src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
else:
if src_url.startswith('/mp4:protected/'):
# TODO Correct extraction for these files
continue
tbr = int_or_none(self._search_regex(
r'(\d+)k\.mp4', src_url, 'tbr', default=None))
formats.append({
'url': src_url,
'ext': ext,
'tbr': tbr,
'format_id': format_id,
'quality': get_quality(format_id),
})
if not formats:
formats = self._extract_m3u8_formats(
record['file']['url'], video_id, 'mp4', fatal=False)
self._sort_formats(formats)
info['formats'] = formats
return info

View File

@ -32,7 +32,7 @@ class TennisTVIE(InfoExtractor):
_NETRC_MACHINE = 'tennistv'
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if not username or not password:
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

View File

@ -36,7 +36,7 @@ class TubiTvIE(InfoExtractor):
}]
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return
self.report_login()

View File

@ -4,11 +4,18 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import int_or_none
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
urlencode_postdata
)
class TumblrIE(InfoExtractor):
_VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
_NETRC_MACHINE = 'tumblr'
_LOGIN_URL = 'https://www.tumblr.com/login'
_TESTS = [{
'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
'md5': '479bb068e5b16462f5176a6828829767',
@ -97,6 +104,45 @@ class TumblrIE(InfoExtractor):
'add_ie': ['Instagram'],
}]
def _real_initialize(self):
self._login()
def _login(self):
username, password = self._get_login_info()
if username is None:
return
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
login_form = self._hidden_inputs(login_page)
login_form.update({
'user[email]': username,
'user[password]': password
})
response, urlh = self._download_webpage_handle(
self._LOGIN_URL, None, 'Logging in',
data=urlencode_postdata(login_form), headers={
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': self._LOGIN_URL,
})
# Successful login
if '/dashboard' in urlh.geturl():
return
login_errors = self._parse_json(
self._search_regex(
r'RegistrationForm\.errors\s*=\s*(\[.+?\])\s*;', response,
'login errors', default='[]'),
None, fatal=False)
if login_errors:
raise ExtractorError(
'Unable to login: %s' % login_errors[0], expected=True)
self.report_warning('Login has probably failed')
def _real_extract(self, url):
m_url = re.match(self._VALID_URL, url)
video_id = m_url.group('id')
@ -105,11 +151,19 @@ class TumblrIE(InfoExtractor):
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
webpage, urlh = self._download_webpage_handle(url, video_id)
redirect_url = compat_str(urlh.geturl())
if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
raise ExtractorError(
'This Tumblr may contain sensitive media. '
'Disable safe mode in your account settings '
'at https://www.tumblr.com/settings/account#safe_mode',
expected=True)
iframe_url = self._search_regex(
r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
webpage, 'iframe url', default=None)
if iframe_url is None:
return self.url_result(urlh.geturl(), 'Generic')
return self.url_result(redirect_url, 'Generic')
iframe = self._download_webpage(iframe_url, video_id, 'Downloading iframe page')

View File

@ -9,6 +9,7 @@ from ..utils import (
xpath_text,
int_or_none,
determine_ext,
float_or_none,
parse_duration,
xpath_attr,
update_url_query,
@ -23,14 +24,17 @@ class TurnerBaseIE(AdobePassIE):
def _extract_timestamp(self, video_data):
return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data):
def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, custom_tokenizer_query=None):
secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
if not token:
query = {
'path': secure_path,
'videoId': content_id,
}
if custom_tokenizer_query:
query.update(custom_tokenizer_query)
else:
query['videoId'] = content_id
if ap_data.get('auth_required'):
query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
auth = self._download_xml(
@ -188,3 +192,42 @@ class TurnerBaseIE(AdobePassIE):
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
'is_live': is_live,
}
def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
streams_data = self._download_json(
'http://medium.ngtv.io/media/%s/tv' % media_id,
media_id)['media']['tv']
duration = None
chapters = []
formats = []
for supported_type in ('unprotected', 'bulkaes'):
stream_data = streams_data.get(supported_type, {})
m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
if not m3u8_url:
continue
if stream_data.get('playlistProtection') == 'spe':
m3u8_url = self._add_akamai_spe_token(
'http://token.ngtv.io/token/token_spe',
m3u8_url, media_id, ap_data or {}, tokenizer_query)
formats.extend(self._extract_m3u8_formats(
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
duration = float_or_none(stream_data.get('totalRuntime'))
if not chapters:
for chapter in stream_data.get('contentSegments', []):
start_time = float_or_none(chapter.get('start'))
chapter_duration = float_or_none(chapter.get('duration'))
if start_time is None or chapter_duration is None:
continue
chapters.append({
'start_time': start_time,
'end_time': start_time + chapter_duration,
})
self._sort_formats(formats)
return {
'formats': formats,
'chapters': chapters,
'duration': duration,
}

View File

@ -61,7 +61,7 @@ class TwitchBaseIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -151,7 +151,7 @@ class UdemyIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -3,13 +3,16 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_duration,
parse_iso8601,
urlencode_postdata,
)
class UFCTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P<id>[^/]+)'
_NETRC_MACHINE = 'ufctv'
_TEST = {
'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode',
'info_dict': {
@ -26,6 +29,21 @@ class UFCTVIE(InfoExtractor):
}
}
def _real_initialize(self):
username, password = self._get_login_info()
if username is None:
return
code = self._download_json(
'https://www.ufc.tv/secure/authenticate',
None, 'Logging in', data=urlencode_postdata({
'username': username,
'password': password,
'format': 'json',
})).get('code')
if code and code != 'loginsuccess':
raise ExtractorError(code, expected=True)
def _real_extract(self, url):
display_id = self._match_id(url)
video_data = self._download_json(url, display_id, query={

View File

@ -75,7 +75,7 @@ class VesselIE(InfoExtractor):
'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return
self.report_login()

View File

@ -88,7 +88,7 @@ class VikiBaseIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -37,7 +37,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
_LOGIN_URL = 'https://vimeo.com/log_in'
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
if self._LOGIN_REQUIRED:
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

View File

@ -32,7 +32,7 @@ class VKBaseIE(InfoExtractor):
_NETRC_MACHINE = 'vk'
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@ -85,7 +85,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
"""
(username, password) = self._get_login_info()
username, password = self._get_login_info()
# No authentication to be performed
if username is None:
if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:

View File

@ -24,7 +24,7 @@ class ZattooBaseIE(InfoExtractor):
_power_guide_hash = None
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if not username or not password:
self.raise_login_required(
'A valid %s account is needed to access this media.'

View File

@ -2253,12 +2253,12 @@ US_RATINGS = {
TV_PARENTAL_GUIDELINES = {
'Y': 0,
'Y7': 7,
'G': 0,
'PG': 0,
'14': 14,
'MA': 17,
'TV-Y': 0,
'TV-Y7': 7,
'TV-G': 0,
'TV-PG': 0,
'TV-14': 14,
'TV-MA': 17,
}
@ -2272,9 +2272,9 @@ def parse_age_limit(s):
return int(m.group('age'))
if s in US_RATINGS:
return US_RATINGS[s]
m = re.match(r'^TV[_-]?(%s)$' % '|'.join(TV_PARENTAL_GUIDELINES.keys()), s)
m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
if m:
return TV_PARENTAL_GUIDELINES[m.group(1)]
return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
return None
@ -2667,6 +2667,7 @@ def dfxp2srt(dfxp_data):
]
_x = functools.partial(xpath_with_ns, ns_map={
'xml': 'http://www.w3.org/XML/1998/namespace',
'ttml': 'http://www.w3.org/ns/ttml',
'tts': 'http://www.w3.org/ns/ttml#styling',
})
@ -2758,7 +2759,9 @@ def dfxp2srt(dfxp_data):
repeat = False
while True:
for style in dfxp.findall(_x('.//ttml:style')):
style_id = style.get('id')
style_id = style.get('id') or style.get(_x('xml:id'))
if not style_id:
continue
parent_style_id = style.get('style')
if parent_style_id:
if parent_style_id not in styles:

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2018.05.18'
__version__ = '2018.05.30'