From a0a5cc854cd8fce4cb568a183ace88f1df7a77ee Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sat, 14 Oct 2017 22:09:44 -0700 Subject: [PATCH 01/16] Initial commit --- youtube_dl/extractor/cammodels.py | 48 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/cammodels.py diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py new file mode 100644 index 000000000..efc372025 --- /dev/null +++ b/youtube_dl/extractor/cammodels.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals +from .common import InfoExtractor + +class CamModelsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P\w+)' + _MANIFEST_URL_ROOT_REGEX = r'manifestUrlRoot=(?Phttps?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))' + _USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36' + # _TEST = { + # 'url': 'https://www.cammodels.com/cam/AutumnKnight', + # 'params': { + # 'skip_download': True, + # }, + # 'skip': _ROOM_OFFLINE, + # 'info_dict': { + # 'id': 'AutumnKnight', + # 'ext': 'flv' + # } + # } + + def _real_extract(self, url): + video_id = self._match_id(url) + headers = { + 'User-Agent': self._USER_AGENT + } + webpage = self._download_webpage(url_or_request=url, video_id=video_id, headers=headers) + manifest_url_root = self._html_search_regex(self._MANIFEST_URL_ROOT_REGEX, webpage, 'manifest') + manifest_url = manifest_url_root + video_id + '.json' + manifest = self._download_json(manifest_url, video_id=video_id, headers=headers) + rtmp_formats = manifest['formats']['mp4-rtmp']['encodings'] + formats = [] + for format in rtmp_formats: + formats.append({ + 'ext': 'flv', + 'url': format.get('location'), + 'width': format.get('videoWidth'), + 'height': format.get('videoHeight'), + 'vbr': format.get('videoKbps'), + 'abr': format.get('audioKbps'), + 'format_id': str(format.get('videoWidth')) + }) + self._sort_formats(formats) + return { + 'id': video_id, + 'title': self._live_title(video_id), + 'age_limit': self._rta_search(webpage), + 'ext': 'flv', + 'formats': formats + } \ No newline at end of file diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ecb33bc9e..ceff5dd24 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -144,6 +144,7 @@ from .camdemy import ( CamdemyIE, CamdemyFolderIE ) +from .cammodels import CamModelsIE from .camwithher import CamWithHerIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE From 8e46a0a46195a3332aeeaa0d6133b11ea6739dca Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sat, 14 Oct 2017 22:16:06 -0700 Subject: [PATCH 02/16] Fix Flake8 style violations --- youtube_dl/extractor/cammodels.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index efc372025..d852ca086 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor + class CamModelsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P\w+)' _MANIFEST_URL_ROOT_REGEX = r'manifestUrlRoot=(?Phttps?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))' @@ -45,4 +46,4 @@ class CamModelsIE(InfoExtractor): 'age_limit': self._rta_search(webpage), 'ext': 'flv', 'formats': formats - } \ No newline at end of file + } From 6e9f40c86dcf04992e8dd8c5a3dc1f83a51ee215 Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 13:58:37 -0700 Subject: [PATCH 03/16] Remove useless test, add fallback logic to manifest parser, and provide useful errors --- youtube_dl/extractor/cammodels.py | 102 ++++++++++++++++++++---------- 1 file changed, 67 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index d852ca086..f215c3141 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -1,49 +1,81 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .common import ExtractorError +import json class CamModelsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P\w+)' - _MANIFEST_URL_ROOT_REGEX = r'manifestUrlRoot=(?Phttps?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))' - _USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36' - # _TEST = { - # 'url': 'https://www.cammodels.com/cam/AutumnKnight', - # 'params': { - # 'skip_download': True, - # }, - # 'skip': _ROOM_OFFLINE, - # 'info_dict': { - # 'id': 'AutumnKnight', - # 'ext': 'flv' - # } - # } + _MANIFEST_URL = r'manifestUrlRoot=(?Phttps?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))' + _MANIFEST_URL_CONSOLE_ERROR = 'Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.' + _OFFLINE = r'(?PI\'m offline, but let\'s stay connected!)' + _OFFLINE_CONSOLE_ERROR = 'This user is currently offline, so nothing can be downloaded.' + _PRIVATE = r'(?PI’m in a private show right now)' + _PRIVATE_CONSOLE_ERROR = 'This user is doing a private show, which requires payment. This extractor currently does not support private streams.' + _MANIFEST_CONSOLE_ERROR = 'Link to stream info was found, but we couldn\'t access it. This stream may require login.' + _RTMP_URL_FALLBACK = r'(?Prtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))' + _RTMP_URL_FALLBACK_CONSOLE_ERROR = 'Link to stream info was found, but we couldn\'t read the response. This is probably a bug.' + _HEADERS = { + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36' + } def _real_extract(self, url): video_id = self._match_id(url) - headers = { - 'User-Agent': self._USER_AGENT - } - webpage = self._download_webpage(url_or_request=url, video_id=video_id, headers=headers) - manifest_url_root = self._html_search_regex(self._MANIFEST_URL_ROOT_REGEX, webpage, 'manifest') - manifest_url = manifest_url_root + video_id + '.json' - manifest = self._download_json(manifest_url, video_id=video_id, headers=headers) - rtmp_formats = manifest['formats']['mp4-rtmp']['encodings'] - formats = [] - for format in rtmp_formats: - formats.append({ - 'ext': 'flv', - 'url': format.get('location'), - 'width': format.get('videoWidth'), - 'height': format.get('videoHeight'), - 'vbr': format.get('videoKbps'), - 'abr': format.get('audioKbps'), - 'format_id': str(format.get('videoWidth')) - }) - self._sort_formats(formats) + webpage = self._download_webpage(url_or_request=url, video_id=video_id, headers=self._HEADERS) + manifest_url = self._get_manifest_url_from_webpage(video_id=video_id, webpage=webpage) + manifest = self._get_manifest_from_manifest_url(manifest_url=manifest_url, video_id=video_id, webpage=webpage) + formats = self._get_formats_from_manifest(manifest=manifest, video_id=video_id) return { 'id': video_id, 'title': self._live_title(video_id), - 'age_limit': self._rta_search(webpage), - 'ext': 'flv', 'formats': formats } + + def _get_manifest_url_from_webpage(self, video_id, webpage): + manifest_url_root = self._html_search_regex(pattern=self._MANIFEST_URL, string=webpage, name='manifest', fatal=False) + if not manifest_url_root: + offline = self._html_search_regex(pattern=self._OFFLINE, string=webpage, name='offline indicator', fatal=False) + if offline: + raise ExtractorError(msg=self._OFFLINE_CONSOLE_ERROR, expected=True, video_id=video_id) + private = self._html_search_regex(pattern=self._PRIVATE, string=webpage, name='private show indicator', fatal=False) + if private: + raise ExtractorError(msg=self._PRIVATE_CONSOLE_ERROR, expected=True, video_id=video_id) + raise ExtractorError(msg=self._MANIFEST_URL_CONSOLE_ERROR, expected=False, video_id=video_id) + manifest_url = manifest_url_root + video_id + '.json' + return manifest_url + + def _get_manifest_from_manifest_url(self, manifest_url, video_id, webpage): + manifest = self._download_json(url_or_request=manifest_url, video_id=video_id, headers=self._HEADERS, fatal=False) + if not manifest: + raise ExtractorError(msg=self._MANIFEST_CONSOLE_ERROR, expected=False, video_id=video_id) + return manifest + + def _get_formats_from_manifest(self, manifest, video_id): + try: + rtmp_formats = manifest['formats']['mp4-rtmp']['encodings'] + formats = [] + for format in rtmp_formats: + formats.append({ + 'ext': 'flv', + 'url': format.get('location'), + 'width': format.get('videoWidth'), + 'height': format.get('videoHeight'), + 'vbr': format.get('videoKbps'), + 'abr': format.get('audioKbps'), + 'format_id': str(format.get('videoWidth')) + }) + # If they change the JSON format, then fallback to parsing out RTMP links via regex. + except: + manifest_json = json.dumps(manifest) + manifest_links = self._search_regex(pattern=self._RTMP_URL_FALLBACK, string=manifest_json, name='RTMP URLs', fatal=False) + if not manifest_links: + raise ExtractorError(msg=self._RTMP_URL_FALLBACK_CONSOLE_ERROR, expected=False, video_id=video_id) + formats = [] + for manifest_link in manifest_links: + formats.append({ + 'ext': 'flv', + 'url': manifest_link, + 'format_id': manifest_link.split(sep='/')[-1] + }) + self._sort_formats(formats) + return formats From d538ff2e583188bc7fb0e672a230d0494b73216e Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 14:26:05 -0700 Subject: [PATCH 04/16] Fix some fallback logic for RTMP link parsing --- youtube_dl/extractor/cammodels.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index f215c3141..931701af9 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from .common import ExtractorError import json +import re class CamModelsIE(InfoExtractor): @@ -67,15 +68,16 @@ class CamModelsIE(InfoExtractor): # If they change the JSON format, then fallback to parsing out RTMP links via regex. except: manifest_json = json.dumps(manifest) - manifest_links = self._search_regex(pattern=self._RTMP_URL_FALLBACK, string=manifest_json, name='RTMP URLs', fatal=False) + manifest_links = re.finditer(pattern=self._RTMP_URL_FALLBACK, string=manifest_json) if not manifest_links: raise ExtractorError(msg=self._RTMP_URL_FALLBACK_CONSOLE_ERROR, expected=False, video_id=video_id) formats = [] for manifest_link in manifest_links: + url = manifest_link.group('id') formats.append({ 'ext': 'flv', - 'url': manifest_link, - 'format_id': manifest_link.split(sep='/')[-1] + 'url': url, + 'format_id': url.split(sep='/')[-1] }) self._sort_formats(formats) return formats From 1174b97fb87c08b87893847c9a27a9f91816be69 Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 15:38:29 -0700 Subject: [PATCH 05/16] Inlined strings that were only used once --- youtube_dl/extractor/cammodels.py | 56 +++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index 931701af9..03370f708 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -7,15 +7,6 @@ import re class CamModelsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P\w+)' - _MANIFEST_URL = r'manifestUrlRoot=(?Phttps?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))' - _MANIFEST_URL_CONSOLE_ERROR = 'Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.' - _OFFLINE = r'(?PI\'m offline, but let\'s stay connected!)' - _OFFLINE_CONSOLE_ERROR = 'This user is currently offline, so nothing can be downloaded.' - _PRIVATE = r'(?PI’m in a private show right now)' - _PRIVATE_CONSOLE_ERROR = 'This user is doing a private show, which requires payment. This extractor currently does not support private streams.' - _MANIFEST_CONSOLE_ERROR = 'Link to stream info was found, but we couldn\'t access it. This stream may require login.' - _RTMP_URL_FALLBACK = r'(?Prtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))' - _RTMP_URL_FALLBACK_CONSOLE_ERROR = 'Link to stream info was found, but we couldn\'t read the response. This is probably a bug.' _HEADERS = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36' } @@ -33,22 +24,46 @@ class CamModelsIE(InfoExtractor): } def _get_manifest_url_from_webpage(self, video_id, webpage): - manifest_url_root = self._html_search_regex(pattern=self._MANIFEST_URL, string=webpage, name='manifest', fatal=False) + manifest_url_root = self._html_search_regex( + pattern=r'manifestUrlRoot=(?Phttps?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))', + string=webpage, + name='manifest', + fatal=False) if not manifest_url_root: - offline = self._html_search_regex(pattern=self._OFFLINE, string=webpage, name='offline indicator', fatal=False) + offline = self._html_search_regex( + pattern=r'(?PI\'m offline, but let\'s stay connected!)', + string=webpage, + name='offline indicator', + fatal=False) if offline: - raise ExtractorError(msg=self._OFFLINE_CONSOLE_ERROR, expected=True, video_id=video_id) - private = self._html_search_regex(pattern=self._PRIVATE, string=webpage, name='private show indicator', fatal=False) + raise ExtractorError( + msg='This user is currently offline, so nothing can be downloaded.', + expected=True, + video_id=video_id) + private = self._html_search_regex( + pattern=r'(?PI’m in a private show right now)', + string=webpage, + name='private show indicator', + fatal=False) if private: - raise ExtractorError(msg=self._PRIVATE_CONSOLE_ERROR, expected=True, video_id=video_id) - raise ExtractorError(msg=self._MANIFEST_URL_CONSOLE_ERROR, expected=False, video_id=video_id) + raise ExtractorError( + msg='This user is doing a private show, which requires payment. This extractor currently does not support private streams.', + expected=True, + video_id=video_id) + raise ExtractorError( + msg='Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.', + expected=False, + video_id=video_id) manifest_url = manifest_url_root + video_id + '.json' return manifest_url def _get_manifest_from_manifest_url(self, manifest_url, video_id, webpage): manifest = self._download_json(url_or_request=manifest_url, video_id=video_id, headers=self._HEADERS, fatal=False) if not manifest: - raise ExtractorError(msg=self._MANIFEST_CONSOLE_ERROR, expected=False, video_id=video_id) + raise ExtractorError( + msg='Link to stream info was found, but we couldn\'t access it. This stream may require login.', + expected=False, + video_id=video_id) return manifest def _get_formats_from_manifest(self, manifest, video_id): @@ -68,9 +83,14 @@ class CamModelsIE(InfoExtractor): # If they change the JSON format, then fallback to parsing out RTMP links via regex. except: manifest_json = json.dumps(manifest) - manifest_links = re.finditer(pattern=self._RTMP_URL_FALLBACK, string=manifest_json) + manifest_links = re.finditer( + pattern=r'(?Prtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))', + string=manifest_json) if not manifest_links: - raise ExtractorError(msg=self._RTMP_URL_FALLBACK_CONSOLE_ERROR, expected=False, video_id=video_id) + raise ExtractorError( + msg='Link to stream info was found, but we couldn\'t read the response. This is probably a bug.', + expected=False, + video_id=video_id) formats = [] for manifest_link in manifest_links: url = manifest_link.group('id') From 26ab8d70cd8847b2a8be7e3e4a323979b5ae0667 Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 15:40:34 -0700 Subject: [PATCH 06/16] Remove misleading part of error message regarding need for login --- youtube_dl/extractor/cammodels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index 03370f708..94fba700a 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -61,7 +61,7 @@ class CamModelsIE(InfoExtractor): manifest = self._download_json(url_or_request=manifest_url, video_id=video_id, headers=self._HEADERS, fatal=False) if not manifest: raise ExtractorError( - msg='Link to stream info was found, but we couldn\'t access it. This stream may require login.', + msg='Link to stream URLs was found, but we couldn\'t access it.', expected=False, video_id=video_id) return manifest From 765de0f39c951582119f3ef98efa8a8a08141502 Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 15:53:51 -0700 Subject: [PATCH 07/16] Remove unneeded positional arguments. Add comment explaining why overriding User-Agent is needed. --- youtube_dl/extractor/cammodels.py | 59 +++++++++++++++++++------------ 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index 94fba700a..5521ad4ff 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -9,59 +9,74 @@ class CamModelsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P\w+)' _HEADERS = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36' + # Needed because server doesn't return links to video URLs if a browser-like User-Agent is not used } def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url_or_request=url, video_id=video_id, headers=self._HEADERS) - manifest_url = self._get_manifest_url_from_webpage(video_id=video_id, webpage=webpage) - manifest = self._get_manifest_from_manifest_url(manifest_url=manifest_url, video_id=video_id, webpage=webpage) - formats = self._get_formats_from_manifest(manifest=manifest, video_id=video_id) + webpage = self._download_webpage( + url, + video_id, + headers=self._HEADERS) + manifest_url = self._get_manifest_url_from_webpage( + webpage, + video_id) + manifest = self._get_manifest_from_manifest_url( + manifest_url, + video_id, + webpage) + formats = self._get_formats_from_manifest( + manifest, + video_id) return { 'id': video_id, 'title': self._live_title(video_id), 'formats': formats } - def _get_manifest_url_from_webpage(self, video_id, webpage): + def _get_manifest_url_from_webpage(self, webpage, video_id): manifest_url_root = self._html_search_regex( - pattern=r'manifestUrlRoot=(?Phttps?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))', - string=webpage, - name='manifest', + r'manifestUrlRoot=(?Phttps?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))', + webpage, + 'manifest', fatal=False) if not manifest_url_root: offline = self._html_search_regex( - pattern=r'(?PI\'m offline, but let\'s stay connected!)', - string=webpage, - name='offline indicator', + r'(?PI\'m offline, but let\'s stay connected!)', + webpage, + 'offline indicator', fatal=False) if offline: raise ExtractorError( - msg='This user is currently offline, so nothing can be downloaded.', + 'This user is currently offline, so nothing can be downloaded.', expected=True, video_id=video_id) private = self._html_search_regex( - pattern=r'(?PI’m in a private show right now)', - string=webpage, - name='private show indicator', + r'(?PI’m in a private show right now)', + webpage, + 'private show indicator', fatal=False) if private: raise ExtractorError( - msg='This user is doing a private show, which requires payment. This extractor currently does not support private streams.', + 'This user is doing a private show, which requires payment. This extractor currently does not support private streams.', expected=True, video_id=video_id) raise ExtractorError( - msg='Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.', + 'Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.', expected=False, video_id=video_id) manifest_url = manifest_url_root + video_id + '.json' return manifest_url def _get_manifest_from_manifest_url(self, manifest_url, video_id, webpage): - manifest = self._download_json(url_or_request=manifest_url, video_id=video_id, headers=self._HEADERS, fatal=False) + manifest = self._download_json( + manifest_url, + video_id, + headers=self._HEADERS, + fatal=False) if not manifest: raise ExtractorError( - msg='Link to stream URLs was found, but we couldn\'t access it.', + 'Link to stream URLs was found, but we couldn\'t access it.', expected=False, video_id=video_id) return manifest @@ -84,11 +99,11 @@ class CamModelsIE(InfoExtractor): except: manifest_json = json.dumps(manifest) manifest_links = re.finditer( - pattern=r'(?Prtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))', - string=manifest_json) + r'(?Prtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))', + manifest_json) if not manifest_links: raise ExtractorError( - msg='Link to stream info was found, but we couldn\'t read the response. This is probably a bug.', + 'Link to stream info was found, but we couldn\'t read the response. This is probably a bug.', expected=False, video_id=video_id) formats = [] From cd56d2832589bae46c6dba36fa79e0244f950177 Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 15:58:12 -0700 Subject: [PATCH 08/16] Inline methods only used once --- youtube_dl/extractor/cammodels.py | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index 5521ad4ff..8e00d8edc 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -18,23 +18,6 @@ class CamModelsIE(InfoExtractor): url, video_id, headers=self._HEADERS) - manifest_url = self._get_manifest_url_from_webpage( - webpage, - video_id) - manifest = self._get_manifest_from_manifest_url( - manifest_url, - video_id, - webpage) - formats = self._get_formats_from_manifest( - manifest, - video_id) - return { - 'id': video_id, - 'title': self._live_title(video_id), - 'formats': formats - } - - def _get_manifest_url_from_webpage(self, webpage, video_id): manifest_url_root = self._html_search_regex( r'manifestUrlRoot=(?Phttps?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))', webpage, @@ -66,9 +49,6 @@ class CamModelsIE(InfoExtractor): expected=False, video_id=video_id) manifest_url = manifest_url_root + video_id + '.json' - return manifest_url - - def _get_manifest_from_manifest_url(self, manifest_url, video_id, webpage): manifest = self._download_json( manifest_url, video_id, @@ -79,9 +59,6 @@ class CamModelsIE(InfoExtractor): 'Link to stream URLs was found, but we couldn\'t access it.', expected=False, video_id=video_id) - return manifest - - def _get_formats_from_manifest(self, manifest, video_id): try: rtmp_formats = manifest['formats']['mp4-rtmp']['encodings'] formats = [] @@ -115,4 +92,8 @@ class CamModelsIE(InfoExtractor): 'format_id': url.split(sep='/')[-1] }) self._sort_formats(formats) - return formats + return { + 'id': video_id, + 'title': self._live_title(video_id), + 'formats': formats + } From af9fa317f1eae6c2a49c1a6f410fe32031bf6b9d Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 16:12:57 -0700 Subject: [PATCH 09/16] Make regex searches for offline/private error strings not throw warnings --- youtube_dl/extractor/cammodels.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index 8e00d8edc..d2c013c12 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -28,7 +28,8 @@ class CamModelsIE(InfoExtractor): r'(?PI\'m offline, but let\'s stay connected!)', webpage, 'offline indicator', - fatal=False) + None, + False) if offline: raise ExtractorError( 'This user is currently offline, so nothing can be downloaded.', @@ -38,7 +39,8 @@ class CamModelsIE(InfoExtractor): r'(?PI’m in a private show right now)', webpage, 'private show indicator', - fatal=False) + None, + False) if private: raise ExtractorError( 'This user is doing a private show, which requires payment. This extractor currently does not support private streams.', From f9d000e26507987c06dd1204eb4c51f5811f2a40 Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 16:22:00 -0700 Subject: [PATCH 10/16] Make download of JSON list of video URLs fatal. Use note and errNote instead of directly raising ExtractorErrors --- youtube_dl/extractor/cammodels.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index d2c013c12..5381ed674 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -54,13 +54,9 @@ class CamModelsIE(InfoExtractor): manifest = self._download_json( manifest_url, video_id, - headers=self._HEADERS, - fatal=False) - if not manifest: - raise ExtractorError( - 'Link to stream URLs was found, but we couldn\'t access it.', - expected=False, - video_id=video_id) + 'Downloading links to streams.', + 'Link to stream URLs was found, but we couldn\'t access it.', + headers=self._HEADERS) try: rtmp_formats = manifest['formats']['mp4-rtmp']['encodings'] formats = [] From 437dd510d356bffb381a955c0887c4b9e63ae400 Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 16:30:10 -0700 Subject: [PATCH 11/16] Only catch KeyError when accessing JSON dict --- youtube_dl/extractor/cammodels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index 5381ed674..28e20033b 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -71,7 +71,7 @@ class CamModelsIE(InfoExtractor): 'format_id': str(format.get('videoWidth')) }) # If they change the JSON format, then fallback to parsing out RTMP links via regex. - except: + except KeyError: manifest_json = json.dumps(manifest) manifest_links = re.finditer( r'(?Prtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))', From 5ce4b7599a1f3b3d16c5220a1c8095236ff2d54f Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 16:58:55 -0700 Subject: [PATCH 12/16] Get all formats, not just RTMP ones, so that users don't need to install rtmpdump. --- youtube_dl/extractor/cammodels.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index 28e20033b..86bb302a2 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -22,7 +22,8 @@ class CamModelsIE(InfoExtractor): r'manifestUrlRoot=(?Phttps?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))', webpage, 'manifest', - fatal=False) + None, + False) if not manifest_url_root: offline = self._html_search_regex( r'(?PI\'m offline, but let\'s stay connected!)', @@ -58,18 +59,23 @@ class CamModelsIE(InfoExtractor): 'Link to stream URLs was found, but we couldn\'t access it.', headers=self._HEADERS) try: - rtmp_formats = manifest['formats']['mp4-rtmp']['encodings'] formats = [] - for format in rtmp_formats: - formats.append({ - 'ext': 'flv', - 'url': format.get('location'), - 'width': format.get('videoWidth'), - 'height': format.get('videoHeight'), - 'vbr': format.get('videoKbps'), - 'abr': format.get('audioKbps'), - 'format_id': str(format.get('videoWidth')) - }) + all_formats = manifest['formats'] + for fmtName in all_formats: + fmt = all_formats[fmtName] + encodings = fmt.get('encodings') + if not encodings: + continue + for encoding in encodings: + formats.append({ + 'ext': 'mp4', + 'url': encoding.get('location'), + 'width': encoding.get('videoWidth'), + 'height': encoding.get('videoHeight'), + 'vbr': encoding.get('videoKbps'), + 'abr': encoding.get('audioKbps'), + 'format_id': fmtName + str(encoding.get('videoWidth')) + }) # If they change the JSON format, then fallback to parsing out RTMP links via regex. except KeyError: manifest_json = json.dumps(manifest) From 8e4f2196488aabdad8d64f63674201bef7662ff5 Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 17:10:35 -0700 Subject: [PATCH 13/16] Fix accidentally casting URL using int_or_none --- youtube_dl/extractor/cammodels.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index 86bb302a2..963afeb36 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -3,6 +3,8 @@ from .common import InfoExtractor from .common import ExtractorError import json import re +from ..utils import int_or_none + class CamModelsIE(InfoExtractor): @@ -70,10 +72,10 @@ class CamModelsIE(InfoExtractor): formats.append({ 'ext': 'mp4', 'url': encoding.get('location'), - 'width': encoding.get('videoWidth'), - 'height': encoding.get('videoHeight'), - 'vbr': encoding.get('videoKbps'), - 'abr': encoding.get('audioKbps'), + 'width': int_or_none(encoding.get('videoWidth')), + 'height': int_or_none(encoding.get('videoHeight')), + 'vbr': int_or_none(encoding.get('videoKbps')), + 'abr': int_or_none(encoding.get('audioKbps')), 'format_id': fmtName + str(encoding.get('videoWidth')) }) # If they change the JSON format, then fallback to parsing out RTMP links via regex. From f70b37ef4f4ab191b203c5ff290aea155e6b01d0 Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 17:53:14 -0700 Subject: [PATCH 14/16] Combine ExtractorError calls --- youtube_dl/extractor/cammodels.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index 963afeb36..f4365fc60 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -6,7 +6,6 @@ import re from ..utils import int_or_none - class CamModelsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P\w+)' _HEADERS = { @@ -33,26 +32,20 @@ class CamModelsIE(InfoExtractor): 'offline indicator', None, False) - if offline: - raise ExtractorError( - 'This user is currently offline, so nothing can be downloaded.', - expected=True, - video_id=video_id) private = self._html_search_regex( r'(?PI’m in a private show right now)', webpage, 'private show indicator', None, False) - if private: - raise ExtractorError( - 'This user is doing a private show, which requires payment. This extractor currently does not support private streams.', - expected=True, - video_id=video_id) + err = 'This user is currently offline, so nothing can be downloaded.' if offline \ + else 'This user is doing a private show, which requires payment. This extractor currently does not support private streams.' if private \ + else 'Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.' raise ExtractorError( - 'Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.', - expected=False, - video_id=video_id) + err, + expected=True if offline or private else False, + video_id=video_id + ) manifest_url = manifest_url_root + video_id + '.json' manifest = self._download_json( manifest_url, From 95dc812c9cdeb9d1079f9c48da900c55c12ee309 Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 17:56:35 -0700 Subject: [PATCH 15/16] Make mandatory field use unsafe access, so it triggers fallback if not found --- youtube_dl/extractor/cammodels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index f4365fc60..af5b89399 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -64,7 +64,7 @@ class CamModelsIE(InfoExtractor): for encoding in encodings: formats.append({ 'ext': 'mp4', - 'url': encoding.get('location'), + 'url': encoding['location'], 'width': int_or_none(encoding.get('videoWidth')), 'height': int_or_none(encoding.get('videoHeight')), 'vbr': int_or_none(encoding.get('videoKbps')), From 5d7edefcb7058bd233186b206579e1c4228a31ab Mon Sep 17 00:00:00 2001 From: mars67857 Date: Sun, 15 Oct 2017 22:17:49 -0700 Subject: [PATCH 16/16] WSS:// URLs aren't supported, so limit to RTMP and HLS-over-HTTP protocols. --- youtube_dl/extractor/cammodels.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py index af5b89399..1711d7096 100644 --- a/youtube_dl/extractor/cammodels.py +++ b/youtube_dl/extractor/cammodels.py @@ -55,13 +55,8 @@ class CamModelsIE(InfoExtractor): headers=self._HEADERS) try: formats = [] - all_formats = manifest['formats'] - for fmtName in all_formats: - fmt = all_formats[fmtName] - encodings = fmt.get('encodings') - if not encodings: - continue - for encoding in encodings: + for fmtName in ['mp4-rtmp', 'mp4-hls']: + for encoding in manifest['formats'][fmtName]['encodings']: formats.append({ 'ext': 'mp4', 'url': encoding['location'], @@ -86,7 +81,7 @@ class CamModelsIE(InfoExtractor): for manifest_link in manifest_links: url = manifest_link.group('id') formats.append({ - 'ext': 'flv', + 'ext': 'mp4', 'url': url, 'format_id': url.split(sep='/')[-1] })