[nhl] Cache authentication between runs

This hopefully solves the sign-on restriction error messages when a user attempts to login too many times. Refactored API calls to a NHLApi class and reduced repetition Made timestamp an optional field Made non-optional parsing use [] instead of get() (e.g. everything that produces the title / url / id)
2025-02-03 17:17:51 +08:00 · 2017-01-26 05:19:47 -06:00 · 2017-01-26 05:19:47 -06:00 · 34266aa604
commit 34266aa604
parent d044355fe0
1 changed files with 148 additions and 115 deletions
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dl/extractor/nhl.py
@ -8,6 +8,7 @@ import time

 from .common import InfoExtractor
 from ..compat import (
+    compat_cookies,
    compat_urlparse,
    compat_urllib_parse_urlencode,
    compat_urllib_parse_urlparse,
@ -355,15 +356,99 @@ class NHLIE(InfoExtractor):
        }


+class NHLApi:
+    def __init__(self, extractor, auth=None):
+        self.extractor = extractor
+        self.auth = auth
+        if auth:
+            extractor.to_screen("Using cached credentials. Use the --rm-cache-dir option to remove.")
+            cookie = compat_cookies.SimpleCookie(auth.encode('utf8', 'replace'))
+            auth_cookie = cookie['Authorization']
+            # TODO handle cookie expiry
+            extractor._set_cookie('nhl.com', 'Authorization', auth_cookie.value)
+
+    def login(self, username, password, auth_provider='nhl'):
+        if auth_provider not in ['nhl', 'rogers']:
+            raise ExtractorError('Unknown authentication provider: %s. Valid values are nhl, rogers' % auth_provider)
+
+        access_token = self._get_oauth_access_token()
+
+        if auth_provider == 'nhl':
+            url = 'https://gateway.web.nhl.com/ws/subscription/flow/nhlPurchase.login'
+            credentials = {
+                'nhlCredentials': {
+                    'email': username,
+                    'password': password,
+                }
+            }
+            referrer = 'https://www.nhl.com/login/nhl'
+        elif auth_provider == 'rogers':
+            url = 'https://activation-rogers.svc.nhl.com/ws/subscription/flow/rogers.login-check'
+            credentials = {
+                'rogerCredentials': {
+                    'email': username,
+                    'password': password,
+                }
+            }
+            referrer = 'https://www.nhl.com/login/rogers'
+
+        login_request = sanitized_Request(
+            url,
+            data=json.dumps(credentials, sort_keys=True).encode('utf-8'),
+            headers={
+                'Referer': referrer,
+                'Accept': 'application/json, text/javascript, */*; q=0.01',
+                'Authorization': access_token,
+                'Content-Type': 'application/json'
+            })
+        self.extractor._download_webpage(
+            login_request, None, 'Logging in', 'Unable to log in')
+        # TODO this doesn't extract the cookie expiry from the cookie correctly
+        self.auth = self.extractor._get_cookies('http://nhl.com').get('Authorization').output()
+
+    def _get_oauth_access_token(self):
+        authorization = base64.b64encode(
+            'web_nhl-v1.0.0:2d1d846ea3b194a18ef40ac9fbce97e3'.encode('utf-8')
+        ).decode('ascii')
+        oauth_request = sanitized_Request(
+            'https://user.svc.nhl.com/oauth/token?grant_type=client_credentials',
+            data='',
+            headers={
+                'Referer': 'https://www.nhl.com/login',
+                'Accept': 'application/json, text/javascript, */*; q=0.01',
+                'Authorization': 'Basic %s' % authorization,
+                'Content-Type': 'application/json',
+            })
+        oauth_response = self.extractor._download_json(
+            oauth_request,
+            None,  # video_id
+            'Requesting OAuth access token',
+            'Unable to get OAuth access token')
+        return oauth_response['access_token']
+
+    def get_game_data(self, video_id, game_id):
+        game_data_url = 'https://statsapi.web.nhl.com/api/v1/schedule?gamePk=%s&expand=schedule.game.content.media.milestones&expand=schedule.game.content.media.epg&expand=schedule.venue' % game_id
+        return self.extractor._download_json(
+            game_data_url,
+            video_id,
+            'Downloading game data',
+            'Unable to download game data')
+
+    def get_stream_data(self, video_id):
+        stream_data_url = 'https://mf.svc.nhl.com/ws/media/mf/v2.4/stream?contentId=%s&playbackScenario=HTTP_CLOUD_WIRED_WEB&sessionKey=%s&auth=response&format=json&platform=WEB_MEDIAPLAYER&_=%s000'
+        session_key = 'abcdefghijklmnop'
+        timestamp = int(time.time())
+        url = stream_data_url % (video_id, session_key, timestamp)
+        return self.extractor._download_json(url, video_id, 'Downloading stream data', 'Unable to download stream data')
+
+
 class NHLTVIE(InfoExtractor):
    IE_NAME = 'nhl.com:nhltv'
    _VALID_URL = r'https?://(?:www\.)?nhl.com/tv/(?P<gameId>\d+)(/[^/]+)*(/(?P<id>\d+))?'
-    _OAUTH_URL = 'https://user.svc.nhl.com/oauth/token?grant_type=client_credentials'
-    _NHL_LOGIN_URL = 'https://gateway.web.nhl.com/ws/subscription/flow/nhlPurchase.login'
-    _ROGERS_LOGIN_URL = 'https://activation-rogers.svc.nhl.com/ws/subscription/flow/rogers.login-check'
    _NETRC_MACHINE = 'nhltv'
+    _NHLTV_CACHE = 'nhltv'
    _TESTS = [{
-        # This is a free video that can be accessed by anyone with an NHL TV login
+        # This is a free video that anyone with an NHL TV login can access
        'url': 'https://www.nhl.com/tv/2016020321/221-1003765/46561403',
        'md5': '34d9518c495ebdad947b9723b5a7c9a9',
        'info_dict': {
@ -386,128 +471,31 @@ class NHLTVIE(InfoExtractor):
        'only_matching': True,
    }]

+    def _real_initialize(self):
+        auth = self._downloader.cache.load(self._NHLTV_CACHE, 'auth')
+        self.api = NHLApi(self, auth)
+        if auth is None:
+            self._login()
+
    def _login(self):
-        # TODO cache login to avoid 'Sign-on restriction: Too many usage attempts'
        (username, password) = self._get_login_info()
        if username is None:
            self.raise_login_required()
-
-        authorization = base64.b64encode(
-            'web_nhl-v1.0.0:2d1d846ea3b194a18ef40ac9fbce97e3'.encode('utf-8')).decode('ascii')
-        oauth_request = sanitized_Request(
-            self._OAUTH_URL,
-            data="",
-            headers={
-                'Referer': 'https://www.nhl.com/login',
-                'Accept': 'application/json, text/javascript, */*; q=0.01',
-                'Authorization': 'Basic %s' % authorization,
-                'Content-Type': 'application/json',
-            })
-        oauth_response = self._download_json(
-            oauth_request,
-            None,  # video_id
-            'Requesting OAuth access token',
-            'Unable to get OAuth access token')
-        access_token = oauth_response['access_token']
-
-        auth_provider = self._get_auth_provider()
-        if auth_provider == 'rogers':
-            login_request = self._create_rogers_login_request(username, password, access_token)
-        elif auth_provider == 'nhl' or auth_provider is None:
-            login_request = self._create_nhl_login_request(username, password, access_token)
-        else:
-            raise ExtractorError('Unknown authentication provider: %s. Valid values are nhl, rogers' % auth_provider)
-
-        # sets up the cookies we need to download
-        self._download_webpage(
-            login_request, None, 'Logging in', 'Unable to log in')
-
-    def _create_nhl_login_request(self, username, password, access_token):
-        login_data = {
-            'nhlCredentials': {
-                'email': username,
-                'password': password,
-            }
-        }
-        return sanitized_Request(
-            self._NHL_LOGIN_URL,
-            data=json.dumps(login_data, sort_keys=True).encode('utf-8'),
-            headers={
-                'Referer': 'https://www.nhl.com/login/nhl',
-                'Accept': 'application/json, text/javascript, */*; q=0.01',
-                'Authorization': access_token,
-                'Content-Type': 'application/json'
-            })
-
-    def _create_rogers_login_request(self, username, password, access_token):
-        login_data = {
-            'rogerCredentials': {
-                'email': username,
-                'password': password,
-            }
-        }
-        return sanitized_Request(
-            self._ROGERS_LOGIN_URL,
-            data=json.dumps(login_data, sort_keys=True).encode('utf-8'),
-            headers={
-                'Referer': 'https://www.nhl.com/login/rogers',
-                'Accept': 'application/json, text/javascript, */*; q=0.01',
-                'Authorization': access_token,
-                'Content-Type': 'application/json'
-            })
-
-    def _real_initialize(self):
-        self._login()
-
-    def extract_stream_info(self, video_id):
-        timestamp = int(time.time())
-        session_key = "abcdefghijklmnop"
-        stream_data_url = 'https://mf.svc.nhl.com/ws/media/mf/v2.4/stream?contentId=%s&playbackScenario=HTTP_CLOUD_WIRED_WEB&sessionKey=%s&auth=response&format=json&platform=WEB_MEDIAPLAYER&_=%s000' % (video_id, session_key, timestamp)
-        stream_data = self._download_json(stream_data_url, video_id, 'Downloading stream data', 'Unable to download stream data')
-        status_code = stream_data.get('status_code')
-        if status_code != 1:
-            # e.g. Media not found, Too many sign ons, etc.
-            status_message = stream_data.get('status_message')
-            raise ExtractorError(status_message, expected=True)
-        media_auth = stream_data.get('session_info').get('sessionAttributes')[0].get('attributeValue')
-        m3u8_url = stream_data.get('user_verified_event')[0].get('user_verified_content')[0].get('user_verified_media_item')[0].get('url')
-        return (media_auth, m3u8_url)
-
-    def extract_game_info(self, video_id, game_id):
-        game_data_url = 'https://statsapi.web.nhl.com/api/v1/schedule?gamePk=%s&expand=schedule.game.content.media.milestones&expand=schedule.game.content.media.epg&expand=schedule.venue' % game_id
-        game_data = self._download_json(game_data_url, video_id, 'Downloading game data', 'Unable to download game data')
-        game_date = game_data.get('dates')[0]
-        date = game_date.get('date')  # yyyy-mm-dd
-        game = game_date.get('games')[0]
-        teams = game.get('teams')
-        away = teams.get('away').get('team').get('name')
-        home = teams.get('home').get('team').get('name')
-        feed_type = "UNKNOWN"
-        media_node = game.get("content").get("media")
-        for epg_item in media_node.get("epg", []):
-            if epg_item.get("title") != "NHLTV":
-                continue
-            for item in epg_item.get('items', []):
-                if item.get('mediaPlaybackId') == video_id or video_id is None:
-                    feed_type = item.get('mediaFeedType')
-                    video_id = item.get('mediaPlaybackId')
-        timestamp = parse_iso8601(media_node.get('milestones').get('streamStart'))
-        title = "%s: %s @ %s (%s feed)" % (date, away, home, feed_type)
-        return (video_id, title, timestamp)
-
-    def get_60fps_playlist(self, url):
-        """Returns a modified url that adds a 60 fps broadcast"""
-        return re.sub('_wired_web', '_wired60', url)
+        auth_provider = self._get_auth_provider() or 'nhl'
+        self.report_login()
+        self.api.login(username, password, auth_provider)
+        self._downloader.cache.store(self._NHLTV_CACHE, 'auth', self.api.auth)

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id, game_id = mobj.group('id'), mobj.group('gameId')
-
+        # At this point we may have a game_id without a video_id. The next call
+        # ensures that we get a video_id for the desired game.
        video_id, title, timestamp = self.extract_game_info(video_id, game_id)
-        media_auth, m3u8_url = self.extract_stream_info(video_id)
+        m3u8_url, media_auth = self.extract_playlist_url_and_auth(video_id)
        m3u8_url = self.get_60fps_playlist(m3u8_url)

-        # media auth cookie is required for the downloader
+        # media auth cookie authenticates the specific download url
        self._set_cookie('nhl.com', 'mediaAuth_v2', media_auth)
        formats = self._extract_m3u8_formats(m3u8_url, video_id, 'ts', m3u8_id='hls')
        self._check_formats(formats, video_id)
@ -519,3 +507,48 @@ class NHLTVIE(InfoExtractor):
            'timestamp': timestamp,
            'formats': formats,
        }
+
+    def extract_game_info(self, video_id, game_id):
+        """calls the nhl api to get the video_id, title, and optionally the timestamp of the start of the game"""
+        game_data = self.api.get_game_data(video_id, game_id)
+        game_date = game_data['dates'][0]
+        date = game_date['date']  # yyyy-mm-dd
+        game = game_date['games'][0]
+        teams = game['teams']
+        away = teams['away']['team']['name']
+        home = teams['home']['team']['name']
+        feed_type = "UNKNOWN"
+        media = game['content']['media']
+        for epg_item in media['epg']:
+            # ignore audio feeds and highlights
+            if epg_item.get('title') != 'NHLTV':
+                continue
+            for item in epg_item.get('items', []):
+                # get the specified video feed based on the video id or the
+                # first video if there no video id is specified
+                if item.get('mediaPlaybackId') == video_id or video_id is None:
+                    video_id = item.get('mediaPlaybackId')
+                    feed_type = item.get('mediaFeedType')  # HOME / AWAY
+        title = "%s: %s @ %s (%s feed)" % (date, away, home, feed_type)
+
+        streamStart = media.get('milestones', {}).get('streamStart')
+        if streamStart:
+            timestamp = parse_iso8601(streamStart)
+
+        return (video_id, title, timestamp)
+
+    def extract_playlist_url_and_auth(self, video_id):
+        """Calls the nhl api to get the url of the video and an authorization key"""
+        stream_data = self.api.get_stream_data(video_id)
+        status_code = stream_data['status_code']
+        if status_code != 1:
+            # e.g. Media not found, Too many sign ons, etc.
+            status_message = stream_data['status_message']
+            raise ExtractorError(status_message, expected=True)
+        m3u8_url = stream_data['user_verified_event'][0]['user_verified_content'][0]['user_verified_media_item'][0]['url']
+        media_auth = stream_data['session_info']['sessionAttributes'][0]['attributeValue']
+        return (m3u8_url, media_auth)
+
+    def get_60fps_playlist(self, url):
+        """Returns a modified url that adds a 720p 60fps broadcast"""
+        return re.sub('_wired_web', '_wired60', url)