From b54bb7742bb9538f14c4e160f53f90c96c7c2567 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sun, 5 Mar 2017 01:39:12 -0500 Subject: [PATCH 1/3] [nbc] Support SNL videos that need ?snl=0 --- youtube_dl/extractor/nbc.py | 38 ++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index d2a44d05d..bf1c61081 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -5,7 +5,10 @@ import re from .common import InfoExtractor from .theplatform import ThePlatformIE from .adobepass import AdobePassIE -from ..compat import compat_urllib_parse_urlparse +from ..compat import ( + compat_urllib_parse_urlparse, + compat_parse_qs, +) from ..utils import ( find_xpath_attr, lowercase_escape, @@ -84,7 +87,20 @@ class NBCIE(AdobePassIE): 'skip_download': True, }, 'skip': 'Only works from US', - } + }, + { + # Some SNL videos need special ?snl=0 handling + 'url': 'http://www.nbc.com/saturday-night-live/video/snl-host-octavia-spencer-finds-studio-8h/3477499', + 'info_dict': { + 'id': '3477499', + 'ext': 'mp4', + 'title': 'SNL Host Octavia Spencer Finds Studio 8H', + 'upload_date': '20170301', + 'description': 'Octavia Spencer hosts Saturday Night Live on March 4, 2017, with musical guest Father John Misty.', + 'uploader': 'NBCU-COM', + 'timestamp': 1488375900, + }, + }, ] def _real_extract(self, url): @@ -95,12 +111,28 @@ class NBCIE(AdobePassIE): 'ie_key': 'ThePlatform', 'id': video_id, } + parsed_url = compat_urllib_parse_urlparse(url) + + # http://www.nbc.com/generetic/generated/generetic-responsive.js?v2.31.26 + # does the following in browers: if the page is Saturday Night + # Live (snl), check for the query parameter ?snl=0; If absent, + # it load the page with ?snl=1 and then with ?snl=0. + # Emulate that, but shortcircuit straight to ?snl=0. + urlmeta = self._html_search_meta( + ['al:ios:url','al:android:url', 'twitter:app:url:googleplay'], + webpage) + if urlmeta and urlmeta.startswith('nbcsnl://') and \ + compat_parse_qs(parsed_url.query).get('snl') != 0: + url = update_url_query(url, { 'snl': 0}) + parsed_url = compat_urllib_parse_urlparse(url) + webpage = self._download_webpage(url, video_id) + video_data = None preload = self._search_regex( r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None) if preload: preload_data = self._parse_json(preload, video_id) - path = compat_urllib_parse_urlparse(url).path.rstrip('/') + path = parsed_url.path.rstrip('/') entity_id = preload_data.get('xref', {}).get(path) video_data = preload_data.get('entities', {}).get(entity_id) if video_data: From 5607dac41cfd013c60c5fa6abb9aad0b688a29e8 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sun, 26 Mar 2017 12:07:50 -0400 Subject: [PATCH 2/3] [nbc] flake8 for PEP 8 --- youtube_dl/extractor/nbc.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index bf1c61081..a08603610 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -119,14 +119,14 @@ class NBCIE(AdobePassIE): # it load the page with ?snl=1 and then with ?snl=0. # Emulate that, but shortcircuit straight to ?snl=0. urlmeta = self._html_search_meta( - ['al:ios:url','al:android:url', 'twitter:app:url:googleplay'], + ['al:ios:url', 'al:android:url', 'twitter:app:url:googleplay'], webpage) - if urlmeta and urlmeta.startswith('nbcsnl://') and \ - compat_parse_qs(parsed_url.query).get('snl') != 0: - url = update_url_query(url, { 'snl': 0}) - parsed_url = compat_urllib_parse_urlparse(url) - webpage = self._download_webpage(url, video_id) - + if (urlmeta and urlmeta.startswith('nbcsnl://') and + compat_parse_qs(parsed_url.query).get('snl') != 0): + url = update_url_query(url, {'snl': 0}) + parsed_url = compat_urllib_parse_urlparse(url) + webpage = self._download_webpage(url, video_id) + video_data = None preload = self._search_regex( r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None) From 4a79ef39530fa2144eddf9957aaefeef0522e7f3 Mon Sep 17 00:00:00 2001 From: John Hawkinson Date: Sun, 26 Mar 2017 12:08:54 -0400 Subject: [PATCH 3/3] [nbc] grammar in comment --- youtube_dl/extractor/nbc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index a08603610..445b82d5f 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -116,8 +116,8 @@ class NBCIE(AdobePassIE): # http://www.nbc.com/generetic/generated/generetic-responsive.js?v2.31.26 # does the following in browers: if the page is Saturday Night # Live (snl), check for the query parameter ?snl=0; If absent, - # it load the page with ?snl=1 and then with ?snl=0. - # Emulate that, but shortcircuit straight to ?snl=0. + # it loads the page with ?snl=1 and then with ?snl=0. + # Emulate that, but shortcut straight to ?snl=0. urlmeta = self._html_search_meta( ['al:ios:url', 'al:android:url', 'twitter:app:url:googleplay'], webpage)