1
0
mirror of https://github.com/l1ving/youtube-dl synced 2025-02-03 17:17:51 +08:00

[nhl] Cache authentication between runs

This hopefully solves the sign-on restriction error messages when a user
attempts to login too many times.

Refactored API calls to a NHLApi class and reduced repetition
Made timestamp an optional field
Made non-optional parsing use [] instead of get() (e.g. everything that
produces the title / url / id)
This commit is contained in:
Joshua McKinney 2017-01-26 05:19:47 -06:00
parent d044355fe0
commit 34266aa604

View File

@ -8,6 +8,7 @@ import time
from .common import InfoExtractor
from ..compat import (
compat_cookies,
compat_urlparse,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
@ -355,15 +356,99 @@ class NHLIE(InfoExtractor):
}
class NHLApi:
def __init__(self, extractor, auth=None):
self.extractor = extractor
self.auth = auth
if auth:
extractor.to_screen("Using cached credentials. Use the --rm-cache-dir option to remove.")
cookie = compat_cookies.SimpleCookie(auth.encode('utf8', 'replace'))
auth_cookie = cookie['Authorization']
# TODO handle cookie expiry
extractor._set_cookie('nhl.com', 'Authorization', auth_cookie.value)
def login(self, username, password, auth_provider='nhl'):
if auth_provider not in ['nhl', 'rogers']:
raise ExtractorError('Unknown authentication provider: %s. Valid values are nhl, rogers' % auth_provider)
access_token = self._get_oauth_access_token()
if auth_provider == 'nhl':
url = 'https://gateway.web.nhl.com/ws/subscription/flow/nhlPurchase.login'
credentials = {
'nhlCredentials': {
'email': username,
'password': password,
}
}
referrer = 'https://www.nhl.com/login/nhl'
elif auth_provider == 'rogers':
url = 'https://activation-rogers.svc.nhl.com/ws/subscription/flow/rogers.login-check'
credentials = {
'rogerCredentials': {
'email': username,
'password': password,
}
}
referrer = 'https://www.nhl.com/login/rogers'
login_request = sanitized_Request(
url,
data=json.dumps(credentials, sort_keys=True).encode('utf-8'),
headers={
'Referer': referrer,
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Authorization': access_token,
'Content-Type': 'application/json'
})
self.extractor._download_webpage(
login_request, None, 'Logging in', 'Unable to log in')
# TODO this doesn't extract the cookie expiry from the cookie correctly
self.auth = self.extractor._get_cookies('http://nhl.com').get('Authorization').output()
def _get_oauth_access_token(self):
authorization = base64.b64encode(
'web_nhl-v1.0.0:2d1d846ea3b194a18ef40ac9fbce97e3'.encode('utf-8')
).decode('ascii')
oauth_request = sanitized_Request(
'https://user.svc.nhl.com/oauth/token?grant_type=client_credentials',
data='',
headers={
'Referer': 'https://www.nhl.com/login',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Authorization': 'Basic %s' % authorization,
'Content-Type': 'application/json',
})
oauth_response = self.extractor._download_json(
oauth_request,
None, # video_id
'Requesting OAuth access token',
'Unable to get OAuth access token')
return oauth_response['access_token']
def get_game_data(self, video_id, game_id):
game_data_url = 'https://statsapi.web.nhl.com/api/v1/schedule?gamePk=%s&expand=schedule.game.content.media.milestones&expand=schedule.game.content.media.epg&expand=schedule.venue' % game_id
return self.extractor._download_json(
game_data_url,
video_id,
'Downloading game data',
'Unable to download game data')
def get_stream_data(self, video_id):
stream_data_url = 'https://mf.svc.nhl.com/ws/media/mf/v2.4/stream?contentId=%s&playbackScenario=HTTP_CLOUD_WIRED_WEB&sessionKey=%s&auth=response&format=json&platform=WEB_MEDIAPLAYER&_=%s000'
session_key = 'abcdefghijklmnop'
timestamp = int(time.time())
url = stream_data_url % (video_id, session_key, timestamp)
return self.extractor._download_json(url, video_id, 'Downloading stream data', 'Unable to download stream data')
class NHLTVIE(InfoExtractor):
IE_NAME = 'nhl.com:nhltv'
_VALID_URL = r'https?://(?:www\.)?nhl.com/tv/(?P<gameId>\d+)(/[^/]+)*(/(?P<id>\d+))?'
_OAUTH_URL = 'https://user.svc.nhl.com/oauth/token?grant_type=client_credentials'
_NHL_LOGIN_URL = 'https://gateway.web.nhl.com/ws/subscription/flow/nhlPurchase.login'
_ROGERS_LOGIN_URL = 'https://activation-rogers.svc.nhl.com/ws/subscription/flow/rogers.login-check'
_NETRC_MACHINE = 'nhltv'
_NHLTV_CACHE = 'nhltv'
_TESTS = [{
# This is a free video that can be accessed by anyone with an NHL TV login
# This is a free video that anyone with an NHL TV login can access
'url': 'https://www.nhl.com/tv/2016020321/221-1003765/46561403',
'md5': '34d9518c495ebdad947b9723b5a7c9a9',
'info_dict': {
@ -386,128 +471,31 @@ class NHLTVIE(InfoExtractor):
'only_matching': True,
}]
def _real_initialize(self):
auth = self._downloader.cache.load(self._NHLTV_CACHE, 'auth')
self.api = NHLApi(self, auth)
if auth is None:
self._login()
def _login(self):
# TODO cache login to avoid 'Sign-on restriction: Too many usage attempts'
(username, password) = self._get_login_info()
if username is None:
self.raise_login_required()
authorization = base64.b64encode(
'web_nhl-v1.0.0:2d1d846ea3b194a18ef40ac9fbce97e3'.encode('utf-8')).decode('ascii')
oauth_request = sanitized_Request(
self._OAUTH_URL,
data="",
headers={
'Referer': 'https://www.nhl.com/login',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Authorization': 'Basic %s' % authorization,
'Content-Type': 'application/json',
})
oauth_response = self._download_json(
oauth_request,
None, # video_id
'Requesting OAuth access token',
'Unable to get OAuth access token')
access_token = oauth_response['access_token']
auth_provider = self._get_auth_provider()
if auth_provider == 'rogers':
login_request = self._create_rogers_login_request(username, password, access_token)
elif auth_provider == 'nhl' or auth_provider is None:
login_request = self._create_nhl_login_request(username, password, access_token)
else:
raise ExtractorError('Unknown authentication provider: %s. Valid values are nhl, rogers' % auth_provider)
# sets up the cookies we need to download
self._download_webpage(
login_request, None, 'Logging in', 'Unable to log in')
def _create_nhl_login_request(self, username, password, access_token):
login_data = {
'nhlCredentials': {
'email': username,
'password': password,
}
}
return sanitized_Request(
self._NHL_LOGIN_URL,
data=json.dumps(login_data, sort_keys=True).encode('utf-8'),
headers={
'Referer': 'https://www.nhl.com/login/nhl',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Authorization': access_token,
'Content-Type': 'application/json'
})
def _create_rogers_login_request(self, username, password, access_token):
login_data = {
'rogerCredentials': {
'email': username,
'password': password,
}
}
return sanitized_Request(
self._ROGERS_LOGIN_URL,
data=json.dumps(login_data, sort_keys=True).encode('utf-8'),
headers={
'Referer': 'https://www.nhl.com/login/rogers',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Authorization': access_token,
'Content-Type': 'application/json'
})
def _real_initialize(self):
self._login()
def extract_stream_info(self, video_id):
timestamp = int(time.time())
session_key = "abcdefghijklmnop"
stream_data_url = 'https://mf.svc.nhl.com/ws/media/mf/v2.4/stream?contentId=%s&playbackScenario=HTTP_CLOUD_WIRED_WEB&sessionKey=%s&auth=response&format=json&platform=WEB_MEDIAPLAYER&_=%s000' % (video_id, session_key, timestamp)
stream_data = self._download_json(stream_data_url, video_id, 'Downloading stream data', 'Unable to download stream data')
status_code = stream_data.get('status_code')
if status_code != 1:
# e.g. Media not found, Too many sign ons, etc.
status_message = stream_data.get('status_message')
raise ExtractorError(status_message, expected=True)
media_auth = stream_data.get('session_info').get('sessionAttributes')[0].get('attributeValue')
m3u8_url = stream_data.get('user_verified_event')[0].get('user_verified_content')[0].get('user_verified_media_item')[0].get('url')
return (media_auth, m3u8_url)
def extract_game_info(self, video_id, game_id):
game_data_url = 'https://statsapi.web.nhl.com/api/v1/schedule?gamePk=%s&expand=schedule.game.content.media.milestones&expand=schedule.game.content.media.epg&expand=schedule.venue' % game_id
game_data = self._download_json(game_data_url, video_id, 'Downloading game data', 'Unable to download game data')
game_date = game_data.get('dates')[0]
date = game_date.get('date') # yyyy-mm-dd
game = game_date.get('games')[0]
teams = game.get('teams')
away = teams.get('away').get('team').get('name')
home = teams.get('home').get('team').get('name')
feed_type = "UNKNOWN"
media_node = game.get("content").get("media")
for epg_item in media_node.get("epg", []):
if epg_item.get("title") != "NHLTV":
continue
for item in epg_item.get('items', []):
if item.get('mediaPlaybackId') == video_id or video_id is None:
feed_type = item.get('mediaFeedType')
video_id = item.get('mediaPlaybackId')
timestamp = parse_iso8601(media_node.get('milestones').get('streamStart'))
title = "%s: %s @ %s (%s feed)" % (date, away, home, feed_type)
return (video_id, title, timestamp)
def get_60fps_playlist(self, url):
"""Returns a modified url that adds a 60 fps broadcast"""
return re.sub('_wired_web', '_wired60', url)
auth_provider = self._get_auth_provider() or 'nhl'
self.report_login()
self.api.login(username, password, auth_provider)
self._downloader.cache.store(self._NHLTV_CACHE, 'auth', self.api.auth)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, game_id = mobj.group('id'), mobj.group('gameId')
# At this point we may have a game_id without a video_id. The next call
# ensures that we get a video_id for the desired game.
video_id, title, timestamp = self.extract_game_info(video_id, game_id)
media_auth, m3u8_url = self.extract_stream_info(video_id)
m3u8_url, media_auth = self.extract_playlist_url_and_auth(video_id)
m3u8_url = self.get_60fps_playlist(m3u8_url)
# media auth cookie is required for the downloader
# media auth cookie authenticates the specific download url
self._set_cookie('nhl.com', 'mediaAuth_v2', media_auth)
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'ts', m3u8_id='hls')
self._check_formats(formats, video_id)
@ -519,3 +507,48 @@ class NHLTVIE(InfoExtractor):
'timestamp': timestamp,
'formats': formats,
}
def extract_game_info(self, video_id, game_id):
"""calls the nhl api to get the video_id, title, and optionally the timestamp of the start of the game"""
game_data = self.api.get_game_data(video_id, game_id)
game_date = game_data['dates'][0]
date = game_date['date'] # yyyy-mm-dd
game = game_date['games'][0]
teams = game['teams']
away = teams['away']['team']['name']
home = teams['home']['team']['name']
feed_type = "UNKNOWN"
media = game['content']['media']
for epg_item in media['epg']:
# ignore audio feeds and highlights
if epg_item.get('title') != 'NHLTV':
continue
for item in epg_item.get('items', []):
# get the specified video feed based on the video id or the
# first video if there no video id is specified
if item.get('mediaPlaybackId') == video_id or video_id is None:
video_id = item.get('mediaPlaybackId')
feed_type = item.get('mediaFeedType') # HOME / AWAY
title = "%s: %s @ %s (%s feed)" % (date, away, home, feed_type)
streamStart = media.get('milestones', {}).get('streamStart')
if streamStart:
timestamp = parse_iso8601(streamStart)
return (video_id, title, timestamp)
def extract_playlist_url_and_auth(self, video_id):
"""Calls the nhl api to get the url of the video and an authorization key"""
stream_data = self.api.get_stream_data(video_id)
status_code = stream_data['status_code']
if status_code != 1:
# e.g. Media not found, Too many sign ons, etc.
status_message = stream_data['status_message']
raise ExtractorError(status_message, expected=True)
m3u8_url = stream_data['user_verified_event'][0]['user_verified_content'][0]['user_verified_media_item'][0]['url']
media_auth = stream_data['session_info']['sessionAttributes'][0]['attributeValue']
return (m3u8_url, media_auth)
def get_60fps_playlist(self, url):
"""Returns a modified url that adds a 720p 60fps broadcast"""
return re.sub('_wired_web', '_wired60', url)