From b037c71c1d1ba35dc29ba86ebbba9a9bdd008ec0 Mon Sep 17 00:00:00 2001 From: wankerer Date: Tue, 24 May 2016 10:18:36 -0700 Subject: [PATCH 1/5] [eporner] fix for the new URL layout Recently eporner slightly changed the URL layout, the ID that used to be digits only are now digits and letters, so youtube-dl falls back to the generic extractor that doesn't work. Fix the matching regex to allow letters in ID. [v2: added a test case] --- youtube_dl/extractor/eporner.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index e006921ec..581276694 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -11,8 +11,8 @@ from ..utils import ( class EpornerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P\d+)/(?P[\w-]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P\w+)/(?P[\w-]+)' + _TESTS = [{ 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', 'md5': '39d486f046212d8e1b911c52ab4691f8', 'info_dict': { @@ -23,8 +23,22 @@ class EpornerIE(InfoExtractor): 'duration': 1838, 'view_count': int, 'age_limit': 18, - } - } + }, + }, + # New (May 2016) URL layout + { + 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', + 'md5': '3469eeaa93b6967a34cdbdbb9d064b33', + 'info_dict': { + 'id': '3YRUtzMcWn0', + 'display_id': 'Star-Wars-XXX-Parody', + 'ext': 'mp4', + 'title': 'Star Wars XXX Parody', + 'duration': 361.0, + 'view_count': int, + 'age_limit': 18, + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From 88b7596d78f0ff6cfd75adeb5cc4094aaa0dd867 Mon Sep 17 00:00:00 2001 From: Kagami Hiiragi Date: Tue, 24 May 2016 12:13:05 +0300 Subject: [PATCH 2/5] [vlive] Address site update Changes: * Fix video params extraction * Don't make status request since status info now available on the page * Remove unneeded code * Fix test --- youtube_dl/extractor/vlive.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index a672ea9c5..147f52d45 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -1,8 +1,7 @@ # coding: utf-8 -from __future__ import division, unicode_literals +from __future__ import unicode_literals import re -import time from .common import InfoExtractor from ..utils import ( @@ -23,7 +22,7 @@ class VLiveIE(InfoExtractor): 'info_dict': { 'id': '1326', 'ext': 'mp4', - 'title': "[V] Girl's Day's Broadcast", + 'title': "[V LIVE] Girl's Day's Broadcast", 'creator': "Girl's Day", 'view_count': int, }, @@ -35,24 +34,11 @@ class VLiveIE(InfoExtractor): webpage = self._download_webpage( 'http://www.vlive.tv/video/%s' % video_id, video_id) - # UTC+x - UTC+9 (KST) - tz = time.altzone if time.localtime().tm_isdst == 1 else time.timezone - tz_offset = -tz // 60 - 9 * 60 - self._set_cookie('vlive.tv', 'timezoneOffset', '%d' % tz_offset) - - status_params = self._download_json( - 'http://www.vlive.tv/video/status?videoSeq=%s' % video_id, - video_id, 'Downloading JSON status', - headers={'Referer': url.encode('utf-8')}) - status = status_params.get('status') - air_start = status_params.get('onAirStartAt', '') - is_live = status_params.get('isLive') - video_params = self._search_regex( - r'vlive\.tv\.video\.ajax\.request\.handler\.init\((.+)\)', + r'\bvlive\.video\.init\(([^)]+)\)', webpage, 'video params') - live_params, long_video_id, key = re.split( - r'"\s*,\s*"', video_params)[1:4] + status, _, _, live_params, long_video_id, key = re.split( + r'"\s*,\s*"', video_params)[2:8] if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR': live_params = self._parse_json('"%s"' % live_params, video_id) @@ -61,8 +47,6 @@ class VLiveIE(InfoExtractor): elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO': if long_video_id and key: return self._replay(video_id, webpage, long_video_id, key) - elif is_live: - status = 'LIVE_END' else: status = 'COMING_SOON' @@ -70,7 +54,7 @@ class VLiveIE(InfoExtractor): raise ExtractorError('Uploading for replay. Please wait...', expected=True) elif status == 'COMING_SOON': - raise ExtractorError('Coming soon! %s' % air_start, expected=True) + raise ExtractorError('Coming soon!', expected=True) elif status == 'CANCELED': raise ExtractorError('We are sorry, ' 'but the live broadcast has been canceled.', From edfb05f5f62c438a0a7975216995179ee6741bc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 25 May 2016 20:51:17 +0600 Subject: [PATCH 3/5] [eporner] Make test only_matching --- youtube_dl/extractor/eporner.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py index 581276694..ac5d0fe24 100644 --- a/youtube_dl/extractor/eporner.py +++ b/youtube_dl/extractor/eporner.py @@ -24,20 +24,10 @@ class EpornerIE(InfoExtractor): 'view_count': int, 'age_limit': 18, }, - }, - # New (May 2016) URL layout - { + }, { + # New (May 2016) URL layout 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', - 'md5': '3469eeaa93b6967a34cdbdbb9d064b33', - 'info_dict': { - 'id': '3YRUtzMcWn0', - 'display_id': 'Star-Wars-XXX-Parody', - 'ext': 'mp4', - 'title': 'Star Wars XXX Parody', - 'duration': 361.0, - 'view_count': int, - 'age_limit': 18, - }, + 'only_matching': True, }] def _real_extract(self, url): From fc93b584d462b20fa67ccdcff82cc4e6bc372b33 Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Thu, 26 May 2016 18:21:35 +0200 Subject: [PATCH 4/5] [GodTV] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/godtv.py | 31 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 youtube_dl/extractor/godtv.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 05561149a..2ad9ac5bc 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -289,6 +289,7 @@ from .globo import ( GloboArticleIE, ) from .godtube import GodTubeIE +from .godtv import GodTVIE from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE from .googledrive import GoogleDriveIE diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py new file mode 100644 index 000000000..f5dc757ed --- /dev/null +++ b/youtube_dl/extractor/godtv.py @@ -0,0 +1,31 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from .ooyala import OoyalaIE + + +class GodTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[a-z0-9-]+)+/(?P)' + _TEST = { + 'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham', + 'info_dict': { + 'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3', + 'ext': 'mp4', + 'title': 'Randy Needham', + 'duration': 3615.08, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id) + ooyala_id = self._search_regex(r'"content_id"\s*:\s*"([\w-]{32})"', webpage, display_id) + + return OoyalaIE._build_url_result(ooyala_id) From 2a4c631fee6d013edebae8ae609d8d5a2b3925fe Mon Sep 17 00:00:00 2001 From: TRox1972 Date: Thu, 26 May 2016 20:23:32 +0200 Subject: [PATCH 5/5] [GodTV] Fix extraction --- youtube_dl/extractor/godtv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/godtv.py b/youtube_dl/extractor/godtv.py index f5dc757ed..a57b17b03 100644 --- a/youtube_dl/extractor/godtv.py +++ b/youtube_dl/extractor/godtv.py @@ -7,7 +7,7 @@ from .ooyala import OoyalaIE class GodTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[a-z0-9-]+)+/(?P)' + _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[a-z0-9-]+)+/(?P[a-z0-9-]+)' _TEST = { 'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham', 'info_dict': {