From e69f9f5d68aed32cc27ca188b0f51925d949c365 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 12 Jun 2016 16:45:07 +0700 Subject: [PATCH 01/19] [downloader/external] Decode error string before writing to stderr --- youtube_dl/downloader/external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 3ff1f9ed4..fae245024 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -85,7 +85,7 @@ class ExternalFD(FileDownloader): cmd, stderr=subprocess.PIPE) _, stderr = p.communicate() if p.returncode != 0: - self.to_stderr(stderr) + self.to_stderr(stderr.decode('utf-8', 'replace')) return p.returncode From bccdac68749e7a39a47dd0e1ad0ec9c177657de6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 13 Jun 2016 01:11:04 +0700 Subject: [PATCH 02/19] [xfileshare:xvidstage] Add support for videos with packed codes (Closes #4335) --- youtube_dl/extractor/xfileshare.py | 31 ++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index ee4d04c20..fe0ab6300 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -5,8 +5,10 @@ import re from .common import InfoExtractor from ..utils import ( + decode_packed_codes, ExtractorError, int_or_none, + NO_DEFAULT, sanitized_Request, urlencode_postdata, ) @@ -23,6 +25,7 @@ class XFileShareIE(InfoExtractor): ('thevideobee.to', 'TheVideoBee'), ('vidto.me', 'Vidto'), ('streamin.to', 'Streamin.To'), + ('xvidstage.com', 'XVIDSTAGE'), ) IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) @@ -78,6 +81,13 @@ class XFileShareIE(InfoExtractor): 'ext': 'mp4', 'title': 'Big Buck Bunny trailer', }, + }, { + 'url': 'http://xvidstage.com/e0qcnl03co6z', + 'info_dict': { + 'id': 'e0qcnl03co6z', + 'ext': 'mp4', + 'title': 'Chucky Prank 2015.mp4', + }, }] def _real_extract(self, url): @@ -113,10 +123,23 @@ class XFileShareIE(InfoExtractor): r'>Watch (.+) ', r'

([^<]+)

'], webpage, 'title', default=None) or self._og_search_title(webpage)).strip() - video_url = self._search_regex( - [r'file\s*:\s*["\'](http[^"\']+)["\'],', - r'file_link\s*=\s*\'(https?:\/\/[0-9a-zA-z.\/\-_]+)'], - webpage, 'file url') + + def extract_video_url(default=NO_DEFAULT): + return self._search_regex( + (r'file\s*:\s*(["\'])(?Phttp.+?)\1,', + r'file_link\s*=\s*(["\'])(?Phttp.+?)\1', + r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?Phttp.+?)\2\)', + r']+src=(["\'])(?Phttp.+?)\1'), + webpage, 'file url', default=default, group='url') + + video_url = extract_video_url(default=None) + + if not video_url: + webpage = decode_packed_codes(self._search_regex( + r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))", + webpage, 'packed code')) + video_url = extract_video_url() + thumbnail = self._search_regex( r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None) From cf2bf840bac1742cb422549a5491a30f70d1abb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 13 Jun 2016 01:11:14 +0700 Subject: [PATCH 03/19] [xfileshare] Fix test --- youtube_dl/extractor/xfileshare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index fe0ab6300..0f8ccf430 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -39,7 +39,7 @@ class XFileShareIE(InfoExtractor): 'md5': '5ae4a3580620380619678ee4875893ba', 'info_dict': { 'id': '06y9juieqpmi', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ', 'thumbnail': 're:http://.*\.jpg', }, From 33b72ce64e8705a71f8ab0e6a322e5f9f3b99276 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 13 Jun 2016 01:19:54 +0700 Subject: [PATCH 04/19] [xfileshare] Improve removed videos detection --- youtube_dl/extractor/xfileshare.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py index 0f8ccf430..995aada0d 100644 --- a/youtube_dl/extractor/xfileshare.py +++ b/youtube_dl/extractor/xfileshare.py @@ -32,7 +32,10 @@ class XFileShareIE(InfoExtractor): _VALID_URL = (r'https?://(?P(?:www\.)?(?:%s))/(?:embed-)?(?P[0-9a-zA-Z]+)' % '|'.join(re.escape(site) for site in list(zip(*_SITES))[0])) - _FILE_NOT_FOUND_REGEX = r'>(?:404 - )?File Not Found<' + _FILE_NOT_FOUND_REGEXES = ( + r'>(?:404 - )?File Not Found<', + r'>The file was removed by administrator<', + ) _TESTS = [{ 'url': 'http://gorillavid.in/06y9juieqpmi', @@ -88,6 +91,10 @@ class XFileShareIE(InfoExtractor): 'ext': 'mp4', 'title': 'Chucky Prank 2015.mp4', }, + }, { + # removed by administrator + 'url': 'http://xvidstage.com/amfy7atlkx25', + 'only_matching': True, }] def _real_extract(self, url): @@ -97,7 +104,7 @@ class XFileShareIE(InfoExtractor): url = 'http://%s/%s' % (mobj.group('host'), video_id) webpage = self._download_webpage(url, video_id) - if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None: + if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES): raise ExtractorError('Video %s does not exist' % video_id, expected=True) fields = self._hidden_inputs(webpage) From b50e02c1e4c9ea70e88ab115b17cfa109b0c9617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 13 Jun 2016 07:05:32 +0700 Subject: [PATCH 05/19] [README.md] Update links to options available for YoutubeDL --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 43e5114ea..b5cbaced7 100644 --- a/README.md +++ b/README.md @@ -964,7 +964,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download(['http://www.youtube.com/watch?v=BaW_jenozKc']) ``` -Most likely, you'll want to use various options. For a list of what can be done, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L121-L269). For a start, if you want to intercept youtube-dl's output, set a `logger` object. +Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L128-L278). For a start, if you want to intercept youtube-dl's output, set a `logger` object. Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: From b4663f12b1c872f4e731f1940831ec017bc86959 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 13 Jun 2016 07:16:35 +0700 Subject: [PATCH 06/19] [README.md] Update links to info dict metafields --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b5cbaced7..5a9768161 100644 --- a/README.md +++ b/README.md @@ -935,8 +935,8 @@ After you have ensured this site is distributing it's content legally, you can f ``` 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want. -8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. +8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. 9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: From 79cd8b3d8acee7845260d5bd60698155a0d81d33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 13 Jun 2016 10:04:04 +0700 Subject: [PATCH 07/19] [README.md] Suggest checking extractor code under all Python versions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a9768161..f1e59542d 100644 --- a/README.md +++ b/README.md @@ -937,7 +937,7 @@ After you have ensured this site is distributing it's content legally, you can f 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. 7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. 8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. -9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). +9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py From 778f96944785f814a97964be1d6fb3bb78bc13f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Jun 2016 00:06:31 +0700 Subject: [PATCH 08/19] [twitch:clips] Add extractor (Closes #9767) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/twitch.py | 43 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 36ddc1f73..d2db4d803 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -862,6 +862,7 @@ from .twitch import ( TwitchProfileIE, TwitchPastBroadcastsIE, TwitchStreamIE, + TwitchClipsIE, ) from .twitter import ( TwitterCardIE, diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index d898f14c3..20919774d 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -16,6 +16,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + js_to_json, orderedSet, parse_duration, parse_iso8601, @@ -454,3 +455,45 @@ class TwitchStreamIE(TwitchBaseIE): 'formats': formats, 'is_live': True, } + + +class TwitchClipsIE(InfoExtractor): + IE_NAME = 'twitch:clips' + _VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P[^/?#&]+)' + + _TEST = { + 'url': 'https://clips.twitch.tv/ea/AggressiveCobraPoooound', + 'md5': '761769e1eafce0ffebfb4089cb3847cd', + 'info_dict': { + 'id': 'AggressiveCobraPoooound', + 'ext': 'mp4', + 'title': 'EA Play 2016 Live from the Novo Theatre', + 'thumbnail': 're:^https?://.*\.jpg', + 'creator': 'EA', + 'uploader': 'stereotype_', + 'uploader_id': 'stereotype_', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + clip = self._parse_json( + self._search_regex( + r'(?s)clipInfo\s*=\s*({.+?});', webpage, 'clip info'), + video_id, transform_source=js_to_json) + + video_url = clip['clip_video_url'] + title = clip['channel_title'] + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage), + 'creator': clip.get('broadcaster_display_name') or clip.get('broadcaster_login'), + 'uploader': clip.get('curator_login'), + 'uploader_id': clip.get('curator_display_name'), + } From 14d0f4e0f3e1b6a467b6302eb60644535aff4292 Mon Sep 17 00:00:00 2001 From: Dracony Date: Thu, 9 Jun 2016 13:31:22 +0200 Subject: [PATCH 09/19] Added extractor for rockstargames.com --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/rockstargames.py | 54 +++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 youtube_dl/extractor/rockstargames.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d2db4d803..8a6c54b97 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -649,6 +649,7 @@ from .revision3 import ( from .rice import RICEIE from .ringtv import RingTVIE from .ro220 import Ro220IE +from .rockstargames import RockstarGamesIE from .rottentomatoes import RottenTomatoesIE from .roxwel import RoxwelIE from .rtbf import RTBFIE diff --git a/youtube_dl/extractor/rockstargames.py b/youtube_dl/extractor/rockstargames.py new file mode 100644 index 000000000..427ab153a --- /dev/null +++ b/youtube_dl/extractor/rockstargames.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + qualities, + parse_iso8601 +) + + +class RockstarGamesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos/video/(?P[0-9]+)' + _TEST = { + 'url': 'https://www.rockstargames.com/videos/video/11544/', + 'md5': '03b5caa6e357a4bd50e3143fc03e5733', + 'info_dict': { + 'id': '11544', + 'ext': 'mp4', + 'title': 'Further Adventures in Finance and Felony Trailer', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3', + 'upload_date': '20160602', + 'timestamp': 1464876000 + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json( + 'https://www.rockstargames.com/videoplayer/videos/get-video.json?id=%s&locale=en_us' % video_id, + video_id + )['video'] + + formats = [] + + for video in json_data['files_processed']['video/mp4']: + if not video.get('src'): + continue + height = video.get('resolution', '').replace('p', '') + + formats.append({ + 'url': self._proto_relative_url(video['src']), + 'height': int(height) if height.isdigit() else -1, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': json_data['title'], + 'description': json_data.get('description'), + 'formats': formats, + 'thumbnail': self._proto_relative_url(json_data.get('screencap')), + 'timestamp': parse_iso8601(json_data.get('created')) + } From 16b6bd01d238c2c58e3ac7ba91c706261d5810e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Jun 2016 01:11:24 +0700 Subject: [PATCH 10/19] [rockstargames] Improve and add Youtube fallback (Closes #9737) --- youtube_dl/extractor/rockstargames.py | 55 +++++++++++++++++---------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/rockstargames.py b/youtube_dl/extractor/rockstargames.py index 427ab153a..48128e219 100644 --- a/youtube_dl/extractor/rockstargames.py +++ b/youtube_dl/extractor/rockstargames.py @@ -3,52 +3,67 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - qualities, - parse_iso8601 + int_or_none, + parse_iso8601, ) class RockstarGamesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos/video/(?P[0-9]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P\d+)' + _TESTS = [{ 'url': 'https://www.rockstargames.com/videos/video/11544/', 'md5': '03b5caa6e357a4bd50e3143fc03e5733', 'info_dict': { 'id': '11544', 'ext': 'mp4', 'title': 'Further Adventures in Finance and Felony Trailer', - 'thumbnail': 're:^https?://.*\.jpg$', 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3', + 'thumbnail': 're:^https?://.*\.jpg$', + 'timestamp': 1464876000, 'upload_date': '20160602', - 'timestamp': 1464876000 } - } + }, { + 'url': 'http://www.rockstargames.com/videos#/?video=48', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - json_data = self._download_json( - 'https://www.rockstargames.com/videoplayer/videos/get-video.json?id=%s&locale=en_us' % video_id, - video_id - )['video'] + + video = self._download_json( + 'https://www.rockstargames.com/videoplayer/videos/get-video.json', + video_id, query={ + 'id': video_id, + 'locale': 'en_us', + })['video'] + + title = video['title'] formats = [] - - for video in json_data['files_processed']['video/mp4']: + for video in video['files_processed']['video/mp4']: if not video.get('src'): continue - height = video.get('resolution', '').replace('p', '') - + resolution = video.get('resolution') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', resolution or '', 'height', default=None)) formats.append({ 'url': self._proto_relative_url(video['src']), - 'height': int(height) if height.isdigit() else -1, + 'format_id': resolution, + 'height': height, }) + + if not formats: + youtube_id = video.get('youtube_id') + if youtube_id: + return self.url_result(youtube_id, 'Youtube') + self._sort_formats(formats) return { 'id': video_id, - 'title': json_data['title'], - 'description': json_data.get('description'), + 'title': title, + 'description': video.get('description'), + 'thumbnail': self._proto_relative_url(video.get('screencap')), + 'timestamp': parse_iso8601(video.get('created')), 'formats': formats, - 'thumbnail': self._proto_relative_url(json_data.get('screencap')), - 'timestamp': parse_iso8601(json_data.get('created')) } From fea55ef4a95d226668bd63742c4731832de93a79 Mon Sep 17 00:00:00 2001 From: venth Date: Sun, 8 May 2016 22:26:08 +0200 Subject: [PATCH 11/19] [wrzuta.pl:playlist] Added playlist extraction from wrzuta.pl --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/wrzuta.py | 74 ++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8a6c54b97..5a93fec6a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -981,6 +981,7 @@ from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE from .wrzuta import WrzutaIE +from .wrzuta import WrzutaPlaylistIE from .wsj import WSJIE from .xbef import XBefIE from .xboxclips import XboxClipsIE diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index c42764921..830649214 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -80,3 +80,77 @@ class WrzutaIE(InfoExtractor): 'description': self._og_search_description(webpage), 'age_limit': embedpage.get('minimalAge', 0), } + + +_ENTRY_PATTERN = r'' +_PLAYLIST_SIZE_PATTERN = r'
[0-9]+/([0-9]+)
' + + +class WrzutaPlaylistIE(InfoExtractor): + """ + this class covers extraction of wrzuta playlist entries + the extraction process bases on following steps: + * collect information of playlist size + * download all entries provided on + the playlist webpage (the playlist is split + on two pages: first directly reached from webpage + second: downloaded on demand by ajax call and rendered + using the ajax call response) + * in case size of extracted entries not reached total number of entries + use the ajax call to collect the remaining entries + """ + + IE_NAME = 'wrzuta.pl:playlist' + + _VALID_URL = r'https?://(?P[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/' \ + '(?P[0-9a-zA-Z]+)/.*' + + _TESTS = [{ + 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza', + 'playlist_mincount': 14, + 'info_dict': { + 'id': '7XfO4vE84iR', + 'title': 'Moja muza', + }, + }, { + 'url': 'http://heroesf70.wrzuta.pl/playlista/6Nj3wQHx756/lipiec_-_lato_2015_muzyka_swiata', + 'playlist_mincount': 144, + 'info_dict': { + 'id': '6Nj3wQHx756', + 'title': 'Lipiec - Lato 2015 Muzyka Świata', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + uploader = mobj.group('uploader') + + entries = [] + + webpage = self._download_webpage(url, playlist_id) + + playlist_size = self._html_search_regex(_PLAYLIST_SIZE_PATTERN, webpage, 'Size of the playlist') + playlist_size = int(playlist_size) if playlist_size else 0 + + playlist_title = self._og_search_title(webpage).replace('Playlista: ', '', 1) + + if playlist_size: + entries = list(map( + lambda entry_url: self.url_result(entry_url), + re.findall(_ENTRY_PATTERN, webpage) + )) + + if playlist_size > len(entries): + playlist_content = self._download_json( + 'http://{uploader_id}.wrzuta.pl/xhr/get_playlist_offset/{playlist_id}'.format( + uploader_id=uploader, + playlist_id=playlist_id, + ), + playlist_id, + 'Downloading playlist content as JSON metadata', + 'Unable to download playlist content as JSON metadata', + ) + entries += [self.url_result(entry['filelink']) for entry in playlist_content['files']] + + return self.playlist_result(entries, playlist_id, playlist_title) From 1759672eede27be0a3d473c4b2925a0b10dce547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Jun 2016 02:13:54 +0700 Subject: [PATCH 12/19] [wrzuta:playlist] Improve and simplify (Closes #9341) --- youtube_dl/extractor/extractors.py | 6 ++-- youtube_dl/extractor/wrzuta.py | 49 ++++++++++++++---------------- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5a93fec6a..5fce9f47a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -980,8 +980,10 @@ from .weiqitv import WeiqiTVIE from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE -from .wrzuta import WrzutaIE -from .wrzuta import WrzutaPlaylistIE +from .wrzuta import ( + WrzutaIE, + WrzutaPlaylistIE, +) from .wsj import WSJIE from .xbef import XBefIE from .xboxclips import XboxClipsIE diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index 830649214..b811f57fb 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( int_or_none, qualities, + remove_start, ) @@ -82,10 +83,6 @@ class WrzutaIE(InfoExtractor): } -_ENTRY_PATTERN = r'
' -_PLAYLIST_SIZE_PATTERN = r'
[0-9]+/([0-9]+)
' - - class WrzutaPlaylistIE(InfoExtractor): """ this class covers extraction of wrzuta playlist entries @@ -101,10 +98,7 @@ class WrzutaPlaylistIE(InfoExtractor): """ IE_NAME = 'wrzuta.pl:playlist' - - _VALID_URL = r'https?://(?P[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/' \ - '(?P[0-9a-zA-Z]+)/.*' - + _VALID_URL = r'https?://(?P[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/(?P[0-9a-zA-Z]+)' _TESTS = [{ 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza', 'playlist_mincount': 14, @@ -119,6 +113,9 @@ class WrzutaPlaylistIE(InfoExtractor): 'id': '6Nj3wQHx756', 'title': 'Lipiec - Lato 2015 Muzyka Świata', }, + }, { + 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR', + 'only_matching': True, }] def _real_extract(self, url): @@ -126,31 +123,31 @@ class WrzutaPlaylistIE(InfoExtractor): playlist_id = mobj.group('id') uploader = mobj.group('uploader') - entries = [] - webpage = self._download_webpage(url, playlist_id) - playlist_size = self._html_search_regex(_PLAYLIST_SIZE_PATTERN, webpage, 'Size of the playlist') - playlist_size = int(playlist_size) if playlist_size else 0 + playlist_size = int_or_none(self._html_search_regex( + (r']+class=["\']playlist-counter["\'][^>]*>\d+/(\d+)', + r']+class=["\']all-counter["\'][^>]*>(.+?)'), + webpage, 'playlist size', default=None)) - playlist_title = self._og_search_title(webpage).replace('Playlista: ', '', 1) + playlist_title = remove_start( + self._og_search_title(webpage), 'Playlista: ') + entries = [] if playlist_size: - entries = list(map( - lambda entry_url: self.url_result(entry_url), - re.findall(_ENTRY_PATTERN, webpage) - )) - + entries = [ + self.url_result(entry_url) + for _, entry_url in re.findall( + r']+href=(["\'])(http.+?)\1[^>]+class=["\']playlist-file-page', + webpage)] if playlist_size > len(entries): playlist_content = self._download_json( - 'http://{uploader_id}.wrzuta.pl/xhr/get_playlist_offset/{playlist_id}'.format( - uploader_id=uploader, - playlist_id=playlist_id, - ), + 'http://%s.wrzuta.pl/xhr/get_playlist_offset/%s' % (uploader, playlist_id), playlist_id, - 'Downloading playlist content as JSON metadata', - 'Unable to download playlist content as JSON metadata', - ) - entries += [self.url_result(entry['filelink']) for entry in playlist_content['files']] + 'Downloading playlist JSON', + 'Unable to download playlist JSON') + entries.extend([ + self.url_result(entry['filelink']) + for entry in playlist_content.get('files', []) if entry.get('filelink')]) return self.playlist_result(entries, playlist_id, playlist_title) From bc2a871f3eb5f2fce7fc1097787e829106d11f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Jun 2016 02:15:09 +0700 Subject: [PATCH 13/19] Credit @dracony for rockstargames (#9737) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 3272fc6ea..4f77de3c7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -173,3 +173,4 @@ Kevin Deldycke inondle Tomáš Čech Déstin Reed +Roman Tsiupa From a4ea28eee6c89756ac5bddfd0c6ef11dd490a191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Jun 2016 02:15:47 +0700 Subject: [PATCH 14/19] Credit @venth for wrzuta:playlist (#9341) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 4f77de3c7..cdf655c39 100644 --- a/AUTHORS +++ b/AUTHORS @@ -174,3 +174,4 @@ inondle Tomáš Čech Déstin Reed Roman Tsiupa +Artur Krysiak From d01fb21d4c58650a3ccd2a6fe2877cc9a53dd942 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Jun 2016 02:19:42 +0700 Subject: [PATCH 15/19] release 2016.06.14 --- .github/ISSUE_TEMPLATE.md | 6 +++--- CONTRIBUTING.md | 6 +++--- docs/supportedsites.md | 5 ++++- youtube_dl/version.py | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 243f2de5d..4c52c5933 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.12*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.12** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.14** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.06.12 +[debug] youtube-dl version 2016.06.14 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c83b8655a..a59fac9b2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -142,9 +142,9 @@ After you have ensured this site is distributing it's content legally, you can f ``` 5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L68-L226). Add tests and code for as many as you want. -8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/58525c94d547be1c8167d16c298bdd75506db328/youtube_dl/extractor/common.py#L138-L226) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. -9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want. +8. Keep in mind that the only mandatory fields in info dict for successful extraction process are `id`, `title` and either `url` or `formats`, i.e. these are the critical data the extraction does not make any sense without. This means that [any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L148-L252) apart from aforementioned mandatory ones should be treated **as optional** and extraction should be **tolerate** to situations when sources for these fields can potentially be unavailable (even if they always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. For example, if you have some intermediate dict `meta` that is a source of metadata and it has a key `summary` that you want to extract and put into resulting info dict as `description`, you should be ready that this key may be missing from the `meta` dict, i.e. you should extract it as `meta.get('summary')` and not `meta['summary']`. Similarly, you should pass `fatal=False` when extracting data from a webpage with `_search_regex/_html_search_regex`. +9. Check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](http://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. 10. When the tests pass, [add](http://git-scm.com/docs/git-add) the new files and [commit](http://git-scm.com/docs/git-commit) them and [push](http://git-scm.com/docs/git-push) the result, like this: $ git add youtube_dl/extractor/extractors.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e8c0a5d24..152552dee 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -535,6 +535,7 @@ - **revision3:embed** - **RICE** - **RingTV** + - **RockstarGames** - **RottenTomatoes** - **Roxwel** - **RTBF** @@ -699,6 +700,7 @@ - **TVPlay**: TV3Play and related services - **Tweakers** - **twitch:chapter** + - **twitch:clips** - **twitch:past_broadcasts** - **twitch:profile** - **twitch:stream** @@ -793,10 +795,11 @@ - **WNL** - **WorldStarHipHop** - **wrzuta.pl** + - **wrzuta.pl:playlist** - **WSJ**: Wall Street Journal - **XBef** - **XboxClips** - - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To + - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE - **XHamster** - **XHamsterEmbed** - **xiami:album**: 虾米音乐 - 专辑 diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5e9c14398..e441a5dc4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.06.12' +__version__ = '2016.06.14' From ff4af6ec5903dd922c7bb6ec632b11830c44d04a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Jun 2016 02:49:33 +0700 Subject: [PATCH 16/19] [lynda] Remove superfluous _NETRC_MACHINE --- youtube_dl/extractor/lynda.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 5b458d9bc..2d5040032 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -95,7 +95,6 @@ class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' IE_DESC = 'lynda.com videos' _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P\d+)' - _NETRC_MACHINE = 'lynda' _TIMECODE_REGEX = r'\[(?P\d+:\d+:\d+[\.,]\d+)\]' From 4cef70db6c3c3dfd4f45fdc5a85f98bef3ec67a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Jun 2016 03:16:56 +0700 Subject: [PATCH 17/19] [devscripts/release.sh] Add flag for gpg-sign commits --- devscripts/release.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 87e8eda50..f8d466ba8 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -15,6 +15,7 @@ set -e skip_tests=true +gpg_sign_commits="" buildserver='localhost:8142' while true @@ -24,6 +25,10 @@ case "$1" in skip_tests=false shift ;; + --gpg-sign-commits|-S) + gpg_sign_commits="-S" + shift + ;; --buildserver) buildserver="$2" shift 2 @@ -69,7 +74,7 @@ sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py /bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py -git commit -m "release $version" +git commit $gpg_sign_commits -m "release $version" /bin/echo -e "\n### Now tagging, signing and pushing..." git tag -s -m "Release $version" "$version" @@ -116,7 +121,7 @@ git clone --branch gh-pages --single-branch . build/gh-pages "$ROOT/devscripts/gh-pages/update-copyright.py" "$ROOT/devscripts/gh-pages/update-sites.py" git add *.html *.html.in update - git commit -m "release $version" + git commit $gpg_sign_commits -m "release $version" git push "$ROOT" gh-pages git push "$ORIGIN_URL" gh-pages ) From 6c3760292c9d20f891395111bea97f401270d86b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 14 Jun 2016 04:57:59 +0700 Subject: [PATCH 18/19] [pornhub] Improve title extraction (Closes #9777) --- youtube_dl/extractor/pornhub.py | 39 +++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 407ea08d4..6d57e1d35 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import itertools @@ -39,7 +40,25 @@ class PornHubIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'age_limit': 18, - } + }, + }, { + # non-ASCII title + 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002', + 'info_dict': { + 'id': '1331683002', + 'ext': 'mp4', + 'title': '重庆婷婷女王足交', + 'uploader': 'cj397186295', + 'duration': 1753, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'only_matching': True, @@ -76,19 +95,25 @@ class PornHubIE(InfoExtractor): 'PornHub said: %s' % error_msg, expected=True, video_id=video_id) + # video_title from flashvars contains whitespace instead of non-ASCII (see + # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying + # on that anymore. + title = self._html_search_meta( + 'twitter:title', webpage, default=None) or self._search_regex( + (r']+class=["\']title["\'][^>]*>(?P[^<]+)', + r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1', + r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'), + webpage, 'title', group='title') + flashvars = self._parse_json( self._search_regex( r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), video_id) if flashvars: - video_title = flashvars.get('video_title') thumbnail = flashvars.get('image_url') duration = int_or_none(flashvars.get('video_duration')) else: - video_title, thumbnail, duration = [None] * 3 - - if not video_title: - video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') + title, thumbnail, duration = [None] * 3 video_uploader = self._html_search_regex( r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', @@ -137,7 +162,7 @@ class PornHubIE(InfoExtractor): return { 'id': video_id, 'uploader': video_uploader, - 'title': video_title, + 'title': title, 'thumbnail': thumbnail, 'duration': duration, 'view_count': view_count, From 8bc4dbb1af1573f26685b0e609af319dcf34a48e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 14 Jun 2016 11:14:59 +0800 Subject: [PATCH 19/19] [wrzuta.pl] Detect error and update _TESTS --- youtube_dl/extractor/wrzuta.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index b811f57fb..bdd7097ba 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, qualities, remove_start, @@ -27,16 +28,17 @@ class WrzutaIE(InfoExtractor): 'uploader_id': 'laboratoriumdextera', 'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd', }, + 'skip': 'Redirected to wrzuta.pl', }, { - 'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty', - 'md5': 'bc78077859bea7bcfe4295d7d7fc9025', + 'url': 'http://vexling.wrzuta.pl/audio/01xBFabGXu6/james_horner_-_into_the_na_39_vi_world_bonus', + 'md5': 'f80564fb5a2ec6ec59705ae2bf2ba56d', 'info_dict': { - 'id': '063jOPX5ue2', - 'ext': 'ogg', - 'title': 'Liber & Natalia Szroeder - Teraz Ty', - 'duration': 203, - 'uploader_id': 'jolka85', - 'description': 'md5:2d2b6340f9188c8c4cd891580e481096', + 'id': '01xBFabGXu6', + 'ext': 'mp3', + 'title': 'James Horner - Into The Na\'vi World [Bonus]', + 'description': 'md5:30a70718b2cd9df3120fce4445b0263b', + 'duration': 95, + 'uploader_id': 'vexling', }, }] @@ -46,7 +48,10 @@ class WrzutaIE(InfoExtractor): typ = mobj.group('typ') uploader = mobj.group('uploader') - webpage = self._download_webpage(url, video_id) + webpage, urlh = self._download_webpage_handle(url, video_id) + + if urlh.geturl() == 'http://www.wrzuta.pl/': + raise ExtractorError('Video removed', expected=True) quality = qualities(['SD', 'MQ', 'HQ', 'HD'])