diff --git a/AtomicParsley.exe b/AtomicParsley.exe deleted file mode 100644 index 8c95603ef..000000000 Binary files a/AtomicParsley.exe and /dev/null differ diff --git a/ffmpeg.exe b/ffmpeg.exe deleted file mode 100644 index f93a964ce..000000000 Binary files a/ffmpeg.exe and /dev/null differ diff --git a/ffprobe.exe b/ffprobe.exe deleted file mode 100644 index c88090900..000000000 Binary files a/ffprobe.exe and /dev/null differ diff --git a/venv.bat b/venv.bat deleted file mode 100644 index 1b8d61c75..000000000 --- a/venv.bat +++ /dev/null @@ -1,3 +0,0 @@ - - -@call ".\venv\Scripts\activate" diff --git a/youtube_dl/extractor/pornhubpremium.py b/youtube_dl/extractor/pornhubpremium.py deleted file mode 100644 index 22b7e19a6..000000000 --- a/youtube_dl/extractor/pornhubpremium.py +++ /dev/null @@ -1,451 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import functools -import itertools -import operator -import re - -from .common import InfoExtractor -from ..compat import ( - compat_HTTPError, - compat_str, - compat_urllib_request, -) -from .openload import PhantomJSwrapper -from ..utils import ( - determine_ext, - ExtractorError, - int_or_none, - orderedSet, - remove_quotes, - str_to_int, - url_or_none, -) - - -class PorHubPremiumBaseIE(InfoExtractor): - def _download_webpage_handle(self, *args, **kwargs): - def dl(*args, **kwargs): - return super(PorHubPremiumBaseIE, self)._download_webpage_handle(*args, **kwargs) - - webpage, urlh = dl(*args, **kwargs) - - if any(re.search(p, webpage) for p in ( - r']+\bonload=["\']go\(\)', - r'document\.cookie\s*=\s*["\']RNKEY=', - r'document\.location\.reload\(true\)')): - url_or_request = args[0] - url = (url_or_request.get_full_url() - if isinstance(url_or_request, compat_urllib_request.Request) - else url_or_request) - phantom = PhantomJSwrapper(self, required_version='2.0') - phantom.get(url, html=webpage) - webpage, urlh = dl(*args, **kwargs) - - return webpage, urlh - - -class PorHubPremiumIE(PorHubPremiumBaseIE): - IE_DESC = 'PorHubPremium and Thumbzilla' - _VALID_URL = r'''(?x) - https?:// - (?: - (?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| - (?:www\.)?thumbzilla\.com/video/ - ) - (?P[\da-z]+) - ''' - _TESTS = [{ - 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', - 'md5': '1e19b41231a02eba417839222ac9d58e', - 'info_dict': { - 'id': '648719015', - 'ext': 'mp4', - 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', - 'uploader': 'Babes', - 'upload_date': '20130628', - 'duration': 361, - 'view_count': int, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, - 'age_limit': 18, - 'tags': list, - 'categories': list, - }, - }, { - # non-ASCII title - 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002', - 'info_dict': { - 'id': '1331683002', - 'ext': 'mp4', - 'title': '重庆婷婷女王足交', - 'uploader': 'Unknown', - 'upload_date': '20150213', - 'duration': 1753, - 'view_count': int, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, - 'age_limit': 18, - 'tags': list, - 'categories': list, - }, - 'params': { - 'skip_download': True, - }, - }, { - # subtitles - 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7', - 'info_dict': { - 'id': 'ph5af5fef7c2aa7', - 'ext': 'mp4', - 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor', - 'uploader': 'BFFs', - 'duration': 622, - 'view_count': int, - 'like_count': int, - 'dislike_count': int, - 'comment_count': int, - 'age_limit': 18, - 'tags': list, - 'categories': list, - 'subtitles': { - 'en': [{ - "ext": 'srt' - }] - }, - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', - 'only_matching': True, - }, { - # removed at the request of cam4.com - 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', - 'only_matching': True, - }, { - # removed at the request of the copyright owner - 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859', - 'only_matching': True, - }, { - # removed by uploader - 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111', - 'only_matching': True, - }, { - # private video - 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7', - 'only_matching': True, - }, { - 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex', - 'only_matching': True, - }, { - 'url': 'http://www.pornhub.com/video/show?viewkey=648719015', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', - 'only_matching': True, - }] - - @staticmethod - def _extract_urls(webpage): - return re.findall( - r']+?src=["\'](?P(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)', - webpage) - - def _extract_count(self, pattern, webpage, name): - return str_to_int(self._search_regex( - pattern, webpage, '%s count' % name, fatal=False)) - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') or 'pornhub.com' - video_id = mobj.group('id') - - self._set_cookie(host, 'age_verified', '1') - - def dl_webpage(platform): - self._set_cookie(host, 'platform', platform) - return self._download_webpage( - 'http://www.%s/view_video.php?viewkey=%s' % (host, video_id), - video_id, 'Downloading %s webpage' % platform) - - webpage = dl_webpage('pc') - - error_msg = self._html_search_regex( - r'(?s)]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P.+?)', - webpage, 'error message', default=None, group='error') - if error_msg: - error_msg = re.sub(r'\s+', ' ', error_msg) - raise ExtractorError( - 'PorHubPremium said: %s' % error_msg, - expected=True, video_id=video_id) - - # video_title from flashvars contains whitespace instead of non-ASCII (see - # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying - # on that anymore. - title = self._html_search_meta( - 'twitter:title', webpage, default=None) or self._search_regex( - (r']+class=["\']title["\'][^>]*>(?P[^<]+)', - r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1', - r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'), - webpage, 'title', group='title') - - video_urls = [] - video_urls_set = set() - subtitles = {} - - flashvars = self._parse_json( - self._search_regex( - r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), - video_id) - if flashvars: - subtitle_url = url_or_none(flashvars.get('closedCaptionsFile')) - if subtitle_url: - subtitles.setdefault('en', []).append({ - 'url': subtitle_url, - 'ext': 'srt', - }) - thumbnail = flashvars.get('image_url') - duration = int_or_none(flashvars.get('video_duration')) - media_definitions = flashvars.get('mediaDefinitions') - if isinstance(media_definitions, list): - for definition in media_definitions: - if not isinstance(definition, dict): - continue - video_url = definition.get('videoUrl') - if not video_url or not isinstance(video_url, compat_str): - continue - if video_url in video_urls_set: - continue - video_urls_set.add(video_url) - video_urls.append( - (video_url, int_or_none(definition.get('quality')))) - else: - thumbnail, duration = [None] * 2 - - if not video_urls: - tv_webpage = dl_webpage('tv') - - assignments = self._search_regex( - r'(var.+?mediastring.+?)</script>', tv_webpage, - 'encoded url').split(';') - - js_vars = {} - - def parse_js_value(inp): - inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) - if '+' in inp: - inps = inp.split('+') - return functools.reduce( - operator.concat, map(parse_js_value, inps)) - inp = inp.strip() - if inp in js_vars: - return js_vars[inp] - return remove_quotes(inp) - - for assn in assignments: - assn = assn.strip() - if not assn: - continue - assn = re.sub(r'var\s+', '', assn) - vname, value = assn.split('=', 1) - js_vars[vname] = parse_js_value(value) - - video_url = js_vars['mediastring'] - if video_url not in video_urls_set: - video_urls.append((video_url, None)) - video_urls_set.add(video_url) - - for mobj in re.finditer( - r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1', - webpage): - video_url = mobj.group('url') - if video_url not in video_urls_set: - video_urls.append((video_url, None)) - video_urls_set.add(video_url) - - upload_date = None - formats = [] - for video_url, height in video_urls: - if not upload_date: - upload_date = self._search_regex( - r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) - if upload_date: - upload_date = upload_date.replace('/', '') - if determine_ext(video_url) == 'mpd': - formats.extend(self._extract_mpd_formats( - video_url, video_id, mpd_id='dash', fatal=False)) - continue - tbr = None - mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url) - if mobj: - if not height: - height = int(mobj.group('height')) - tbr = int(mobj.group('tbr')) - formats.append({ - 'url': video_url, - 'format_id': '%dp' % height if height else None, - 'height': height, - 'tbr': tbr, - }) - self._sort_formats(formats) - - video_uploader = self._html_search_regex( - r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', - webpage, 'uploader', fatal=False) - - view_count = self._extract_count( - r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') - like_count = self._extract_count( - r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') - dislike_count = self._extract_count( - r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike') - comment_count = self._extract_count( - r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') - - def extract_list(meta_key): - div = self._search_regex( - r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>' - % meta_key, webpage, meta_key, default=None) - if div: - return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div) - - return { - 'id': video_id, - 'uploader': video_uploader, - 'upload_date': upload_date, - 'title': title, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, - 'like_count': like_count, - 'dislike_count': dislike_count, - 'comment_count': comment_count, - 'formats': formats, - 'age_limit': 18, - 'tags': extract_list('tags'), - 'categories': extract_list('categories'), - 'subtitles': subtitles, - } - - -class PorHubPremiumPlaylistBaseIE(PorHubPremiumBaseIE): - def _extract_entries(self, webpage, host): - # Only process container div with main playlist content skipping - # drop-down menu that uses similar pattern for videos (see - # https://github.com/ytdl-org/youtube-dl/issues/11594). - container = self._search_regex( - r'(?s)(<div[^>]+class=["\']container.+)', webpage, - 'container', default=webpage) - - return [ - self.url_result( - 'http://www.%s/%s' % (host, video_url), - PorHubPremiumIE.ie_key(), video_title=title) - for video_url, title in orderedSet(re.findall( - r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', - container)) - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - playlist_id = mobj.group('id') - - webpage = self._download_webpage(url, playlist_id) - - entries = self._extract_entries(webpage, host) - - playlist = self._parse_json( - self._search_regex( - r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage, - 'playlist', default='{}'), - playlist_id, fatal=False) - title = playlist.get('title') or self._search_regex( - r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False) - - return self.playlist_result( - entries, playlist_id, title, playlist.get('description')) - - -class PorHubPremiumPlaylistIE(PorHubPremiumPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/playlist/(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://www.pornhub.com/playlist/4667351', - 'info_dict': { - 'id': '4667351', - 'title': 'Nataly Hot', - }, - 'playlist_mincount': 2, - }, { - 'url': 'https://de.pornhub.com/playlist/4667351', - 'only_matching': True, - }] - - -class PorHubPremiumUserVideosIE(PorHubPremiumPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos' - _TESTS = [{ - 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', - 'info_dict': { - 'id': 'zoe_ph', - }, - 'playlist_mincount': 171, - }, { - 'url': 'http://www.pornhub.com/users/rushandlia/videos', - 'only_matching': True, - }, { - # default sorting as Top Rated Videos - 'url': 'https://www.pornhub.com/channels/povd/videos', - 'info_dict': { - 'id': 'povd', - }, - 'playlist_mincount': 293, - }, { - # Top Rated Videos - 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra', - 'only_matching': True, - }, { - # Most Recent Videos - 'url': 'https://www.pornhub.com/channels/povd/videos?o=da', - 'only_matching': True, - }, { - # Most Viewed Videos - 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi', - 'only_matching': True, - }, { - 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/model/jayndrea/videos/upload', - 'only_matching': True, - }, { - 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - user_id = mobj.group('id') - - entries = [] - for page_num in itertools.count(1): - try: - webpage = self._download_webpage( - url, user_id, 'Downloading page %d' % page_num, - query={'page': page_num}) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - break - raise - page_entries = self._extract_entries(webpage, host) - if not page_entries: - break - entries.extend(page_entries) - - return self.playlist_result(entries, user_id) diff --git a/youtube_dl/extractor/yourextractor.py b/youtube_dl/extractor/yourextractor.py deleted file mode 100644 index 881a4bab2..000000000 --- a/youtube_dl/extractor/yourextractor.py +++ /dev/null @@ -1,38 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class YourExtractorIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)' - _TEST = { - 'url': 'https://yourextractor.com/watch/42', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', - 'info_dict': { - 'id': '42', - 'ext': 'mp4', - 'title': 'Video title goes here', - 'thumbnail': r're:^https?://.*\.jpg$', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) - } - } - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - # TODO more code goes here, for example ... - title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title') - - return { - 'id': video_id, - 'title': title, - 'description': self._og_search_description(webpage), - 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see youtube_dl/extractor/common.py) - } \ No newline at end of file